2024-heraklion-scientific-p.../notebooks/02a_Serialization.ipynb
2024-08-27 15:52:41 +03:00

17 KiB

Serialization demo

In [1]:
import json
import numpy as np
import pickle

pickle is simple but can be dangerous

In [2]:
class SomethingSimple:
    def __init__(self, foo, bar):
        self.foo = foo
        self.bar = bar
In [3]:
simple = SomethingSimple(foo=3, bar='two')

with open('simple.pickle', 'wb') as f:
    pickle.dump(simple, f)

with open('simple.pickle', 'rb') as f:
    simple_bis = pickle.load(f)

type(simple_bis), simple_bis.foo, simple_bis.bar
Out[3]:
(__main__.SomethingSimple, 3, 'two')
In [4]:
class SomethingSimple:
    def __init__(self, foo, bla):
        self.foo = foo
        self.bla = bla
In [5]:
with open('simple.pickle', 'rb') as f:
    simple_bis = pickle.load(f)

simple_bis.__dict__
Out[5]:
{'foo': 3, 'bar': 'two'}

Even worse when you have a simple class name change

In [6]:
# Simulate a name change of the SomethingSimple class
del SomethingSimple

class Simple:
    def __init__(self, foo, bar):
        self.foo = foo
        self.bar = bar

with open('simple.pickle', 'rb') as f:
    simple_bis = pickle.load(f)

simple_bis.__dict__
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-6-56e3756749ad> in <module>()
      8 
      9 with open('simple.pickle', 'rb') as f:
---> 10     simple_bis = pickle.load(f)
     11 
     12 simple_bis.__dict__

AttributeError: Can't get attribute 'SomethingSimple' on <module '__main__'>

JSON is still quite simple, and allows you a closer control

In [7]:
class SomethingNice:

    def __init__(self, foo, bar):
        self.foo = foo
        self.bar = bar

    @classmethod
    def from_json(cls, fname):
        with open(fname, 'r') as f:
            dump = json.load(f)
        return cls(**dump)

    def to_json(self, fname):
        with open(fname, 'w') as f:
            json.dump(self.__dict__, f)
In [8]:
so_nice = SomethingNice(foo=3, bar='two')
so_nice.__dict__
Out[8]:
{'foo': 3, 'bar': 'two'}
In [9]:
so_nice.to_json('nice.json')
!cat ./nice.json
{"foo": 3, "bar": "two"}
In [10]:
so_nice_again = SomethingNice.from_json('nice.json')
so_nice_again.__dict__
Out[10]:
{'foo': 3, 'bar': 'two'}
In [11]:
not_so_nice = SomethingNice(foo=3, bar=np.array([1.2, 3.4]))
not_so_nice.__dict__
Out[11]:
{'foo': 3, 'bar': array([1.2, 3.4])}
In [12]:
not_so_nice.to_json('not_so_nice.json')
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-12-e860ef19277d> in <module>()
----> 1 not_so_nice.to_json('not_so_nice.json')

<ipython-input-7-c9b082d8b74f> in to_json(self, fname)
     13     def to_json(self, fname):
     14         with open(fname, 'w') as f:
---> 15             json.dump(self.__dict__, f)

~/miniconda3/envs/bog/lib/python3.6/json/__init__.py in dump(obj, fp, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
    177     # could accelerate with writelines in some versions of Python, at
    178     # a debuggability cost
--> 179     for chunk in iterable:
    180         fp.write(chunk)
    181 

~/miniconda3/envs/bog/lib/python3.6/json/encoder.py in _iterencode(o, _current_indent_level)
    428             yield from _iterencode_list(o, _current_indent_level)
    429         elif isinstance(o, dict):
--> 430             yield from _iterencode_dict(o, _current_indent_level)
    431         else:
    432             if markers is not None:

~/miniconda3/envs/bog/lib/python3.6/json/encoder.py in _iterencode_dict(dct, _current_indent_level)
    402                 else:
    403                     chunks = _iterencode(value, _current_indent_level)
--> 404                 yield from chunks
    405         if newline_indent is not None:
    406             _current_indent_level -= 1

~/miniconda3/envs/bog/lib/python3.6/json/encoder.py in _iterencode(o, _current_indent_level)
    435                     raise ValueError("Circular reference detected")
    436                 markers[markerid] = o
--> 437             o = _default(o)
    438             yield from _iterencode(o, _current_indent_level)
    439             if markers is not None:

~/miniconda3/envs/bog/lib/python3.6/json/encoder.py in default(self, o)
    178         """
    179         raise TypeError("Object of type '%s' is not JSON serializable" %
--> 180                         o.__class__.__name__)
    181 
    182     def encode(self, o):

TypeError: Object of type 'ndarray' is not JSON serializable
In [13]:
class SomethingWorking:

    def __init__(self, foo, data):
        self.foo = foo
        self.data = data

    @classmethod
    def from_json(cls, fname):
        with open(fname, 'r') as f:
            dump = json.load(f)
        dump['data'] = np.array(dump['data'])
        return cls(**dump)

    def to_json(self, fname):
        dump = {
            'foo': self.foo,
            'data': self.data.tolist(),
        }
        with open(fname, 'w') as f:
            json.dump(dump, f)
In [14]:
not_so_nice = SomethingWorking(foo=3, data=np.array([[1, 2], [3,4 ]]))
not_so_nice.to_json('not_so_nice.json')
!cat not_so_nice.json
{"foo": 3, "data": [[1, 2], [3, 4]]}
In [ ]: