add python benchmark

This commit is contained in:
Tiziano Zito 2024-08-05 16:33:30 +02:00
parent 57a8ab8eba
commit 3357e17e4f
6 changed files with 4915 additions and 0 deletions

View file

@ -0,0 +1,29 @@
## Benchmark
![](loading-timings-ns128.svg)
![](loading-slowdown-ns128.svg)
## Benchmarks details
- Create in memory a list of `N=128` one dimensional numpy arrays of length `L` starting from `L=2` up to `L=2²³=4_194_304` in steps of powers of two.
- Given that each item of the array is of type `float64`, i.e. 8 bytes, the size of the arrays goes from `16B` to `32M`
- The total memory required is at least `128 × 32M × 2 = 8G`
- Load the whole list in one big numpy array of size `N`x`L` (*good*) and `L`x`N` (*bad*). The corresponding loops are:
```python
# good loop (store each time series on a different row)
for row, time_series in enumerate(collection):
ts[row, :] = time_series
```
```python
# bad loop (store each time series on a different column)
for column, time_series in enumerate(collection):
ts[:, column] = time_series
```
- Time the *bad* and the *good* loop
- Plot the timings for the *good* and the *bad* loop as a function of `L`
- Plot `slowdown = time_bad/time_good` as a function of `L`
## Scripts
- [Benchmark script](bench.py) and [measurements](results_ns128)
- [Plotting script](bench_plot.py)

51
benchmark_python/bench.py Executable file
View file

@ -0,0 +1,51 @@
#!/usr/bin/python3
# Run it with
# export CPU=0; /usr/bin/taskset --cpu-list $CPU ./bench.py $CPU
import os
import sys
import timeit
import numpy as np
NSERIES = (128, )
POWS = 2**np.arange(2, 23, dtype=int)
# Size of one dimensional numpy arrays of dtype 'float64':
# A fix overhead of 96 bytes plus a variable size:
# (n_items x 8 bytes)
def load_data_row(x, time_series):
"""Store one time series per raw"""
for row, ts in enumerate(time_series):
x[row,:] = ts
return x
def load_data_column(x, time_series):
"""Store one time series per raw"""
for column, ts in enumerate(time_series):
x[:, column] = ts
return x
if __name__ == '__main__':
for nseries in NSERIES:
print(30*'=', '\n', nseries)
float_items = POWS
byte_sizes = (float_items*8) #+ 96
bads = []
goods = []
results = open(f'results_ns{nseries}', 'wt')
for i, len_one_series in enumerate(float_items):
time_series = np.zeros((nseries, len_one_series), dtype='float64')
x = np.empty((nseries, len_one_series), dtype='float64')
print('Timing good...')
good = min(timeit.repeat(lambda: load_data_row(x, time_series), number=5))/5
x = np.empty((len_one_series, nseries), dtype='float64')
print('Timing bad...')
bad = min(timeit.repeat(lambda: load_data_column(x, time_series), number=5))/5
print(f'{len_one_series}/{POWS[-1]} {good} {bad}')
bads.append(bad)
goods.append(good)
results.write(f'{byte_sizes[i]} {good} {bad}\n')
results.flush()
results.close()

106
benchmark_python/bench_plot.py Executable file
View file

@ -0,0 +1,106 @@
#!/usr/bin/python3
import os
import sys
import numpy as np
import matplotlib
from matplotlib import pyplot as plt
plt.style.use('ggplot')
matplotlib.rcParams['font.size'] = 12
matplotlib.rcParams['font.family'] = ['Exo 2', 'sans-serif']
from bench import NSERIES
def get_xlabels(x):
xlabels = []
for value in x:
b = int(2**value)
if b < 1024:
xlabels.append(f'{b}B')
elif b < 1048576:
xlabels.append(f'{b//1024}K')
elif b < 1073741824:
xlabels.append(f'{b//1024//1024}M')
else:
xlabels.append(f'{b//1024//1024//1024}G')
return xlabels
def get_ylabels(y):
ylabels = []
for power in y:
power = int(np.log10(power))
if power < -6:
value = 10**(power+9)
ylabels.append(f'{value}ns')
elif power < -3:
value = 10**(power+6)
ylabels.append(f'{value}μs')
elif power < 0:
value = 10**(power+3)
ylabels.append(f'{value}ms')
else:
value = 10**power
ylabels.append(f'{value}s')
return ylabels
prefix = 'results_ns'
maxy = 1e1
miny = 1e-6
for results in (f for f in os.listdir('.') if f.startswith(prefix)):
num_series = results.removeprefix(prefix)
sizes, bads, goods = [], [], []
with open(results, 'r') as fh:
for line in fh:
size, good, bad = line.split()
bads.append(float(bad))
goods.append(float(good))
sizes.append(int(size))
goods = np.array(goods)
bads = np.array(bads)
x = np.log2(sizes)
y1 = goods
y2 = bads
# generate two plots: good+bad timings and slowdown plot
plt.figure(figsize=(8.5, 7.5))
p1, = plt.semilogy(x, y1, 'o')
p2, = plt.semilogy(x, y2, 'o')
plt.xlabel('size of one time series')
plt.ylabel('loading time')
plt.grid(None)
plt.grid(which='both', axis='both')
plt.xticks(x, get_xlabels(x), rotation=60)
plt.ylim(miny, maxy)
yticks = np.logspace(int(np.log10(miny)),
int(np.log10(maxy)),
num=int(np.log10(maxy/miny))+1)
plt.yticks(yticks, get_ylabels(yticks))
plt.tick_params(axis='y', labelright=True, right=True)
lgd = plt.legend((p1, p2), ('good', 'bad'), frameon=True)
lgd.get_frame().set_edgecolor('black')
plt.title(f'Timings\n{num_series} time series')
plt.savefig(f'loading-timings-ns{num_series}.svg')
# slowdown plot
plt.figure(figsize=(8.5, 7.5))
p1, = plt.plot(x, bads/goods, 'og', label=r'$\frac{\mathrm{time\_bad}}{\mathrm{time\_good}}$')
plt.xlabel('size of one time series')
plt.ylabel('slowdown')
plt.grid(None)
plt.grid(which='both', axis='both')
plt.xticks(x, get_xlabels(x), rotation=60)
plt.tick_params(axis='y', which='both', reset=True, labelright=True, right=True)
lmaxy = (bads/goods).max()
yticks = range(0, int(np.ceil(lmaxy))+1)
yticks_labels = []
for i in yticks:
if not i%5:
yticks_labels.append(str(i))
else:
yticks_labels.append('')
plt.yticks(yticks, yticks_labels)
#plt.legend((p1,), ('time_bad/time_good',))
lgd = plt.legend(frameon=True, fontsize=16)
lgd.get_frame().set_edgecolor('black')
plt.title(f'Slowdown\n{num_series} time series')
plt.savefig(f'loading-slowdown-ns{num_series}.svg')

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 70 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 73 KiB

View file

@ -0,0 +1,21 @@
32 3.624640012276359e-05 3.637080008047633e-05
64 3.6378599907038735e-05 3.75960000383202e-05
128 3.5999999818159266e-05 3.659379981399979e-05
256 3.647979974630289e-05 3.8503600080730394e-05
512 3.6466600067797114e-05 4.5260399929247797e-05
1024 3.694279985211324e-05 6.389359987224452e-05
2048 3.771199990296736e-05 0.00010158859986404422
4096 3.9600799937034026e-05 0.00017927039989444892
8192 4.872880017501302e-05 0.00035019980023207606
16384 0.0001166390000435058 0.0009462125999561977
32768 0.00020714380007120782 0.0018965299997944385
65536 0.00041309340012958274 0.003799166400131071
131072 0.0009548601999995298 0.013321203599844011
262144 0.002020656999957282 0.03497214360031649
524288 0.004090915599954314 0.07588242120000359
1048576 0.008223017600175807 0.15468134920010926
2097152 0.01658681320004689 0.3135109368002304
4194304 0.03318921820027754 0.629940018599882
8388608 0.031091862199900788 1.2587968365998676
16777216 0.0620614524003031 2.517574722000063
33554432 0.125347068800329 5.040295794399936