2024-heraklion-scientific-p.../notebooks/02a_Serialization.ipynb

455 lines
17 KiB
Plaintext
Raw Permalink Normal View History

2024-08-27 14:52:41 +02:00
{
"cells": [
{
"cell_type": "markdown",
"source": [
"# Serialization demo"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 1,
"outputs": [],
"source": [
"import json\n",
"import numpy as np\n",
"import pickle"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## pickle is simple but can be dangerous"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 2,
"outputs": [],
"source": [
"class SomethingSimple:\n",
" def __init__(self, foo, bar):\n",
" self.foo = foo\n",
" self.bar = bar\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 3,
"outputs": [
{
"data": {
"text/plain": "(__main__.SomethingSimple, 3, 'two')"
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"simple = SomethingSimple(foo=3, bar='two')\n",
"\n",
"with open('simple.pickle', 'wb') as f:\n",
" pickle.dump(simple, f)\n",
"\n",
"with open('simple.pickle', 'rb') as f:\n",
" simple_bis = pickle.load(f)\n",
"\n",
"type(simple_bis), simple_bis.foo, simple_bis.bar"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 4,
"outputs": [],
"source": [
"class SomethingSimple:\n",
" def __init__(self, foo, bla):\n",
" self.foo = foo\n",
" self.bla = bla"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 5,
"outputs": [
{
"data": {
"text/plain": "{'foo': 3, 'bar': 'two'}"
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"with open('simple.pickle', 'rb') as f:\n",
" simple_bis = pickle.load(f)\n",
"\n",
"simple_bis.__dict__"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"Even worse when you have a simple class name change"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 6,
"outputs": [
{
"ename": "AttributeError",
"evalue": "Can't get attribute 'SomethingSimple' on <module '__main__'>",
"output_type": "error",
"traceback": [
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mAttributeError\u001B[0m Traceback (most recent call last)",
"\u001B[0;32m<ipython-input-6-56e3756749ad>\u001B[0m in \u001B[0;36m<module>\u001B[0;34m()\u001B[0m\n\u001B[1;32m 8\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 9\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mopen\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'simple.pickle'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m'rb'\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 10\u001B[0;31m \u001B[0msimple_bis\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mpickle\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mload\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mf\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 11\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 12\u001B[0m \u001B[0msimple_bis\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__dict__\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;31mAttributeError\u001B[0m: Can't get attribute 'SomethingSimple' on <module '__main__'>"
]
}
],
"source": [
"# Simulate a name change of the SomethingSimple class\n",
"del SomethingSimple\n",
"\n",
"class Simple:\n",
" def __init__(self, foo, bar):\n",
" self.foo = foo\n",
" self.bar = bar\n",
"\n",
"with open('simple.pickle', 'rb') as f:\n",
" simple_bis = pickle.load(f)\n",
"\n",
"simple_bis.__dict__"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "markdown",
"source": [
"## JSON is still quite simple, and allows you a closer control"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": 7,
"outputs": [],
"source": [
"class SomethingNice:\n",
"\n",
" def __init__(self, foo, bar):\n",
" self.foo = foo\n",
" self.bar = bar\n",
"\n",
" @classmethod\n",
" def from_json(cls, fname):\n",
" with open(fname, 'r') as f:\n",
" dump = json.load(f)\n",
" return cls(**dump)\n",
"\n",
" def to_json(self, fname):\n",
" with open(fname, 'w') as f:\n",
" json.dump(self.__dict__, f)\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 8,
"outputs": [
{
"data": {
"text/plain": "{'foo': 3, 'bar': 'two'}"
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"so_nice = SomethingNice(foo=3, bar='two')\n",
"so_nice.__dict__\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 9,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"foo\": 3, \"bar\": \"two\"}"
]
}
],
"source": [
"so_nice.to_json('nice.json')\n",
"!cat ./nice.json"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 10,
"outputs": [
{
"data": {
"text/plain": "{'foo': 3, 'bar': 'two'}"
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"so_nice_again = SomethingNice.from_json('nice.json')\n",
"so_nice_again.__dict__"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 11,
"outputs": [
{
"data": {
"text/plain": "{'foo': 3, 'bar': array([1.2, 3.4])}"
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"not_so_nice = SomethingNice(foo=3, bar=np.array([1.2, 3.4]))\n",
"not_so_nice.__dict__"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 12,
"outputs": [
{
"ename": "TypeError",
"evalue": "Object of type 'ndarray' is not JSON serializable",
"output_type": "error",
"traceback": [
"\u001B[0;31m---------------------------------------------------------------------------\u001B[0m",
"\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)",
"\u001B[0;32m<ipython-input-12-e860ef19277d>\u001B[0m in \u001B[0;36m<module>\u001B[0;34m()\u001B[0m\n\u001B[0;32m----> 1\u001B[0;31m \u001B[0mnot_so_nice\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mto_json\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'not_so_nice.json'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m",
"\u001B[0;32m<ipython-input-7-c9b082d8b74f>\u001B[0m in \u001B[0;36mto_json\u001B[0;34m(self, fname)\u001B[0m\n\u001B[1;32m 13\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mto_json\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mfname\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 14\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mopen\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfname\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m'w'\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 15\u001B[0;31m \u001B[0mjson\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdump\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__dict__\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m",
"\u001B[0;32m~/miniconda3/envs/bog/lib/python3.6/json/__init__.py\u001B[0m in \u001B[0;36mdump\u001B[0;34m(obj, fp, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001B[0m\n\u001B[1;32m 177\u001B[0m \u001B[0;31m# could accelerate with writelines in some versions of Python, at\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 178\u001B[0m \u001B[0;31m# a debuggability cost\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 179\u001B[0;31m \u001B[0;32mfor\u001B[0m \u001B[0mchunk\u001B[0m \u001B[0;32min\u001B[0m \u001B[0miterable\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 180\u001B[0m \u001B[0mfp\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mwrite\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mchunk\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 181\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda3/envs/bog/lib/python3.6/json/encoder.py\u001B[0m in \u001B[0;36m_iterencode\u001B[0;34m(o, _current_indent_level)\u001B[0m\n\u001B[1;32m 428\u001B[0m \u001B[0;32myield\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0m_iterencode_list\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mo\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0m_current_indent_level\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 429\u001B[0m \u001B[0;32melif\u001B[0m \u001B[0misinstance\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mo\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdict\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 430\u001B[0;31m \u001B[0;32myield\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0m_iterencode_dict\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mo\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0m_current_indent_level\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 431\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 432\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mmarkers\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda3/envs/bog/lib/python3.6/json/encoder.py\u001B[0m in \u001B[0;36m_iterencode_dict\u001B[0;34m(dct, _current_indent_level)\u001B[0m\n\u001B[1;32m 402\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 403\u001B[0m \u001B[0mchunks\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0m_iterencode\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mvalue\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0m_current_indent_level\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 404\u001B[0;31m \u001B[0;32myield\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0mchunks\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 405\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mnewline_indent\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 406\u001B[0m \u001B[0m_current_indent_level\u001B[0m \u001B[0;34m-=\u001B[0m \u001B[0;36m1\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda3/envs/bog/lib/python3.6/json/encoder.py\u001B[0m in \u001B[0;36m_iterencode\u001B[0;34m(o, _current_indent_level)\u001B[0m\n\u001B[1;32m 435\u001B[0m \u001B[0;32mraise\u001B[0m \u001B[0mValueError\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m\"Circular reference detected\"\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 436\u001B[0m \u001B[0mmarkers\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mmarkerid\u001B[0m\u001B[0;34m]\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mo\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 437\u001B[0;31m \u001B[0mo\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0m_default\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mo\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 438\u001B[0m \u001B[0;32myield\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0m_iterencode\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mo\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0m_current_indent_level\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 439\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mmarkers\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;32m~/miniconda3/envs/bog/lib/python3.6/json/encoder.py\u001B[0m in \u001B[0;36mdefault\u001B[0;34m(self, o)\u001B[0m\n\u001B[1;32m 178\u001B[0m \"\"\"\n\u001B[1;32m 179\u001B[0m raise TypeError(\"Object of type '%s' is not JSON serializable\" %\n\u001B[0;32m--> 180\u001B[0;31m o.__class__.__name__)\n\u001B[0m\u001B[1;32m 181\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 182\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mencode\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mo\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n",
"\u001B[0;31mTypeError\u001B[0m: Object of type 'ndarray' is not JSON serializable"
]
}
],
"source": [
"not_so_nice.to_json('not_so_nice.json')"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 13,
"outputs": [],
"source": [
"class SomethingWorking:\n",
"\n",
" def __init__(self, foo, data):\n",
" self.foo = foo\n",
" self.data = data\n",
"\n",
" @classmethod\n",
" def from_json(cls, fname):\n",
" with open(fname, 'r') as f:\n",
" dump = json.load(f)\n",
" dump['data'] = np.array(dump['data'])\n",
" return cls(**dump)\n",
"\n",
" def to_json(self, fname):\n",
" dump = {\n",
" 'foo': self.foo,\n",
" 'data': self.data.tolist(),\n",
" }\n",
" with open(fname, 'w') as f:\n",
" json.dump(dump, f)\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 14,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"foo\": 3, \"data\": [[1, 2], [3, 4]]}"
]
}
],
"source": [
"not_so_nice = SomethingWorking(foo=3, data=np.array([[1, 2], [3,4 ]]))\n",
"not_so_nice.to_json('not_so_nice.json')\n",
"!cat not_so_nice.json\n"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python [default]",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.5"
},
"toc": {
"nav_menu": {
"height": "12px",
"width": "252px"
},
"navigate_menu": true,
"number_sections": true,
"sideBar": true,
"threshold": 4,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 0
}