2024-heraklion-comp-arch/puzzle.ipynb

4.5 KiB

In [ ]:
import numpy as np
In [ ]:
# create a collection of time series
# in real life, this data comes from an experiment/simulation
n_series = 128
len_one_series = 2**15  # ➔ 2^15 = 32768 items (8 B x 2^15 = 262144 B = 256 K)
time_series = []
for idx in range(n_series):
    time_series.append(np.zeros((len_one_series,1), dtype='float64'))
In [ ]:
# how much memory does one time series need?
ts_size = time_series[0].nbytes/2**10 # -> 2^10 is 1 K
print(f'Size of one time series: {int(ts_size)} K')
print(f'Size of collection: {int(n_series*ts_size/1024)} M')
In [ ]:
# let's load the collection in one big array
def load_data_row(x, time_series):
    """Store one time series per raw"""
    for row, ts in enumerate(time_series):
        x[row,:] = ts
    return x
In [ ]:
# let's load the collection in one big array
def load_data_column(x, time_series):
    """Store one time series per column"""
    for column, ts in enumerate(time_series):
        x[:,column] = ts
    return x
In [ ]:
x = np.zeros((n_series, len_one_series, 1), dtype='float64')
%timeit load_data_row(x, time_series)
In [ ]:
x = np.zeros((len_one_series, n_series, 1), dtype='float64')
%timeit load_data_column(x, time_series)
In [ ]:
# on my machine: 2.2 ms vs 33.9 ms ≈ 15x slowdown!!!