{ "cells": [ { "cell_type": "markdown", "source": [ "# Serialization demo" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } } }, { "cell_type": "code", "execution_count": 1, "outputs": [], "source": [ "import json\n", "import numpy as np\n", "import pickle" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "## pickle is simple but can be dangerous" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } } }, { "cell_type": "code", "execution_count": 2, "outputs": [], "source": [ "class SomethingSimple:\n", " def __init__(self, foo, bar):\n", " self.foo = foo\n", " self.bar = bar\n" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 3, "outputs": [ { "data": { "text/plain": "(__main__.SomethingSimple, 3, 'two')" }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "simple = SomethingSimple(foo=3, bar='two')\n", "\n", "with open('simple.pickle', 'wb') as f:\n", " pickle.dump(simple, f)\n", "\n", "with open('simple.pickle', 'rb') as f:\n", " simple_bis = pickle.load(f)\n", "\n", "type(simple_bis), simple_bis.foo, simple_bis.bar" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 4, "outputs": [], "source": [ "class SomethingSimple:\n", " def __init__(self, foo, bla):\n", " self.foo = foo\n", " self.bla = bla" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 5, "outputs": [ { "data": { "text/plain": "{'foo': 3, 'bar': 'two'}" }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with open('simple.pickle', 'rb') as f:\n", " simple_bis = pickle.load(f)\n", "\n", "simple_bis.__dict__" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "Even worse when you have a simple class name change" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } } }, { "cell_type": "code", "execution_count": 6, "outputs": [ { "ename": "AttributeError", "evalue": "Can't get attribute 'SomethingSimple' on ", "output_type": "error", "traceback": [ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[0;31mAttributeError\u001B[0m Traceback (most recent call last)", "\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m()\u001B[0m\n\u001B[1;32m 8\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 9\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mopen\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'simple.pickle'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m'rb'\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 10\u001B[0;31m \u001B[0msimple_bis\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mpickle\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mload\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mf\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 11\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 12\u001B[0m \u001B[0msimple_bis\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__dict__\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;31mAttributeError\u001B[0m: Can't get attribute 'SomethingSimple' on " ] } ], "source": [ "# Simulate a name change of the SomethingSimple class\n", "del SomethingSimple\n", "\n", "class Simple:\n", " def __init__(self, foo, bar):\n", " self.foo = foo\n", " self.bar = bar\n", "\n", "with open('simple.pickle', 'rb') as f:\n", " simple_bis = pickle.load(f)\n", "\n", "simple_bis.__dict__" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "## JSON is still quite simple, and allows you a closer control" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } } }, { "cell_type": "code", "execution_count": 7, "outputs": [], "source": [ "class SomethingNice:\n", "\n", " def __init__(self, foo, bar):\n", " self.foo = foo\n", " self.bar = bar\n", "\n", " @classmethod\n", " def from_json(cls, fname):\n", " with open(fname, 'r') as f:\n", " dump = json.load(f)\n", " return cls(**dump)\n", "\n", " def to_json(self, fname):\n", " with open(fname, 'w') as f:\n", " json.dump(self.__dict__, f)\n" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 8, "outputs": [ { "data": { "text/plain": "{'foo': 3, 'bar': 'two'}" }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "so_nice = SomethingNice(foo=3, bar='two')\n", "so_nice.__dict__\n" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 9, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"foo\": 3, \"bar\": \"two\"}" ] } ], "source": [ "so_nice.to_json('nice.json')\n", "!cat ./nice.json" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 10, "outputs": [ { "data": { "text/plain": "{'foo': 3, 'bar': 'two'}" }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "so_nice_again = SomethingNice.from_json('nice.json')\n", "so_nice_again.__dict__" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 11, "outputs": [ { "data": { "text/plain": "{'foo': 3, 'bar': array([1.2, 3.4])}" }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "not_so_nice = SomethingNice(foo=3, bar=np.array([1.2, 3.4]))\n", "not_so_nice.__dict__" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 12, "outputs": [ { "ename": "TypeError", "evalue": "Object of type 'ndarray' is not JSON serializable", "output_type": "error", "traceback": [ "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", "\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)", "\u001B[0;32m\u001B[0m in \u001B[0;36m\u001B[0;34m()\u001B[0m\n\u001B[0;32m----> 1\u001B[0;31m \u001B[0mnot_so_nice\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mto_json\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m'not_so_nice.json'\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m", "\u001B[0;32m\u001B[0m in \u001B[0;36mto_json\u001B[0;34m(self, fname)\u001B[0m\n\u001B[1;32m 13\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mto_json\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mfname\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 14\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mopen\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mfname\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m'w'\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 15\u001B[0;31m \u001B[0mjson\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mdump\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__dict__\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m", "\u001B[0;32m~/miniconda3/envs/bog/lib/python3.6/json/__init__.py\u001B[0m in \u001B[0;36mdump\u001B[0;34m(obj, fp, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001B[0m\n\u001B[1;32m 177\u001B[0m \u001B[0;31m# could accelerate with writelines in some versions of Python, at\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 178\u001B[0m \u001B[0;31m# a debuggability cost\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 179\u001B[0;31m \u001B[0;32mfor\u001B[0m \u001B[0mchunk\u001B[0m \u001B[0;32min\u001B[0m \u001B[0miterable\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 180\u001B[0m \u001B[0mfp\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mwrite\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mchunk\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 181\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;32m~/miniconda3/envs/bog/lib/python3.6/json/encoder.py\u001B[0m in \u001B[0;36m_iterencode\u001B[0;34m(o, _current_indent_level)\u001B[0m\n\u001B[1;32m 428\u001B[0m \u001B[0;32myield\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0m_iterencode_list\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mo\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0m_current_indent_level\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 429\u001B[0m \u001B[0;32melif\u001B[0m \u001B[0misinstance\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mo\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mdict\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 430\u001B[0;31m \u001B[0;32myield\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0m_iterencode_dict\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mo\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0m_current_indent_level\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 431\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 432\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mmarkers\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;32m~/miniconda3/envs/bog/lib/python3.6/json/encoder.py\u001B[0m in \u001B[0;36m_iterencode_dict\u001B[0;34m(dct, _current_indent_level)\u001B[0m\n\u001B[1;32m 402\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 403\u001B[0m \u001B[0mchunks\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0m_iterencode\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mvalue\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0m_current_indent_level\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 404\u001B[0;31m \u001B[0;32myield\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0mchunks\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 405\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mnewline_indent\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 406\u001B[0m \u001B[0m_current_indent_level\u001B[0m \u001B[0;34m-=\u001B[0m \u001B[0;36m1\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;32m~/miniconda3/envs/bog/lib/python3.6/json/encoder.py\u001B[0m in \u001B[0;36m_iterencode\u001B[0;34m(o, _current_indent_level)\u001B[0m\n\u001B[1;32m 435\u001B[0m \u001B[0;32mraise\u001B[0m \u001B[0mValueError\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m\"Circular reference detected\"\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 436\u001B[0m \u001B[0mmarkers\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mmarkerid\u001B[0m\u001B[0;34m]\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mo\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 437\u001B[0;31m \u001B[0mo\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0m_default\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mo\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 438\u001B[0m \u001B[0;32myield\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0m_iterencode\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mo\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0m_current_indent_level\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 439\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mmarkers\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mnot\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;32m~/miniconda3/envs/bog/lib/python3.6/json/encoder.py\u001B[0m in \u001B[0;36mdefault\u001B[0;34m(self, o)\u001B[0m\n\u001B[1;32m 178\u001B[0m \"\"\"\n\u001B[1;32m 179\u001B[0m raise TypeError(\"Object of type '%s' is not JSON serializable\" %\n\u001B[0;32m--> 180\u001B[0;31m o.__class__.__name__)\n\u001B[0m\u001B[1;32m 181\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 182\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0mencode\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mo\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", "\u001B[0;31mTypeError\u001B[0m: Object of type 'ndarray' is not JSON serializable" ] } ], "source": [ "not_so_nice.to_json('not_so_nice.json')" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 13, "outputs": [], "source": [ "class SomethingWorking:\n", "\n", " def __init__(self, foo, data):\n", " self.foo = foo\n", " self.data = data\n", "\n", " @classmethod\n", " def from_json(cls, fname):\n", " with open(fname, 'r') as f:\n", " dump = json.load(f)\n", " dump['data'] = np.array(dump['data'])\n", " return cls(**dump)\n", "\n", " def to_json(self, fname):\n", " dump = {\n", " 'foo': self.foo,\n", " 'data': self.data.tolist(),\n", " }\n", " with open(fname, 'w') as f:\n", " json.dump(dump, f)\n" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 14, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"foo\": 3, \"data\": [[1, 2], [3, 4]]}" ] } ], "source": [ "not_so_nice = SomethingWorking(foo=3, data=np.array([[1, 2], [3,4 ]]))\n", "not_so_nice.to_json('not_so_nice.json')\n", "!cat not_so_nice.json\n" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.5" }, "toc": { "nav_menu": { "height": "12px", "width": "252px" }, "navigate_menu": true, "number_sections": true, "sideBar": true, "threshold": 4, "toc_cell": false, "toc_section_display": "block", "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 0 }