2024-heraklion-comp-arch/puzzle.ipynb

4.5 KiB

In [ ]:
import numpy as np
In [ ]:
# create a collection of time series
# in real life, this data comes from an experiment/simulation
n_series = 128
len_one_series = 2**20  # ➔ 2^20 = 1,048,576 items (8 B x 2^20 = 8,388,608 B = 8 M)
time_series = np.zeros((n_series, len_one_series), dtype='float64')
In [ ]:
# how much memory does one time series need?
ts_size = time_series[0].nbytes/2**20 # -> 2^20 is 1 M
total_size = time_series.nbytes/2**20
print(f'Size of one time series: {int(ts_size)} M')
print(f'Size of collection: {int(total_size)} M')
In [ ]:
# let's load the collection in one big array
def load_data_row(x, time_series):
    """Store one time series per raw"""
    for row, ts in enumerate(time_series):
        x[row,:] = ts
    return x
In [ ]:
# let's load the collection in one big array
def load_data_column(x, time_series):
    """Store one time series per column"""
    for column, ts in enumerate(time_series):
        x[:,column] = ts
    return x
In [ ]:
x = np.zeros((n_series, len_one_series), dtype='float64')
%timeit load_data_row(x, time_series)
In [ ]:
x = np.zeros((len_one_series, n_series), dtype='float64')
%timeit load_data_column(x, time_series)
In [ ]:
# on my machine: 31 ms vs 1240 ms ≈ 40x slowdown!!!