4.5 KiB
4.5 KiB
In [ ]:
import numpy as np
In [ ]:
# create a collection of time series
# in real life, this data comes from an experiment/simulation
n_series = 128
len_one_series = 2**20 # ➔ 2^20 = 1,048,576 items (8 B x 2^20 = 8,388,608 B = 8 M)
time_series = np.zeros((n_series, len_one_series), dtype='float64')
In [ ]:
# how much memory does one time series need?
ts_size = time_series[0].nbytes/2**20 # -> 2^20 is 1 M
total_size = time_series.nbytes/2**20
print(f'Size of one time series: {int(ts_size)} M')
print(f'Size of collection: {int(total_size)} M')
In [ ]:
# let's load the collection in one big array
def load_data_row(x, time_series):
"""Store one time series per raw"""
for row, ts in enumerate(time_series):
x[row,:] = ts
return x
In [ ]:
# let's load the collection in one big array
def load_data_column(x, time_series):
"""Store one time series per column"""
for column, ts in enumerate(time_series):
x[:,column] = ts
return x
In [ ]:
x = np.zeros((n_series, len_one_series), dtype='float64')
%timeit load_data_row(x, time_series)
In [ ]:
x = np.zeros((len_one_series, n_series), dtype='float64')
%timeit load_data_column(x, time_series)
In [ ]:
# on my machine: 31 ms vs 1240 ms ≈ 40x slowdown!!!