added low level benchmark

This commit is contained in:
Tiziano Zito 2024-08-11 01:23:12 +02:00
parent f8b83015fa
commit 05e5e18e59
12 changed files with 7776 additions and 0 deletions

View file

@ -0,0 +1,57 @@
# Low Level Memory Benchmark
These are the results of a low level memory benchmark (written in C) on my [laptop](../architecture/README.md)
## Summary plots (details below)
![Memory Bandwidth P-core](bandwidth-t14.svg)
![Memory Latency P-core](latency-t14.svg)
## Benchmarks details:
- Bandwidth (read), [bw_mem_rd](http://lmbench.sourceforge.net/man/bw_mem_rd.8.html). Allocate the specified amount of memory, zero it, and then time the reading of that memory as a series of integer loads and adds. Each 4-byte integer is loaded and added to accumulator.
[Results](t14-bwr.csv) (block size in MB, bandwith in MB/s)
- Bandwidth (write),[bw_mem](http://lmbench.sourceforge.net/man/bw_mem.8.html). Allocate twice the specified amount of memory, zero it, and then time the copying of the first half to the second half.
[Results](t14-bww.csv) (block size in MB, bandwith in MB/s)
- Latency (sequential access), [lat_mem_rd](http://lmbench.sourceforge.net/man/lat_mem_rd.8.html). Run two nested loops. The outer loop is the stride size of 128 bytes. The inner loop is the block size. For each block size, create a ring of pointers that point backward one stride. Traverse the block by `p = (char **)*p` in a for loop and time the load ladency over block.
[Results](t14-lseq.csv) (block size in MB, latency in ns)
- Latency (random access). Like above, but with a stride size of 16 bytes.
[Results](t14-lrnd.csv) (block size in MB, latency in ns)
## Running the benchmarks on Linux:
- You need the [lmbench](http://lmbench.sourceforge.net/) library and [cpuset](https://github.com/SUSE/cpuset)
- All commands must be run as root after having killed as many processes/services as possible, so that the CPUs are almost idle
- Disable address space randomization:
```bash
echo 0 > /proc/sys/kernel/randomize_va_space
```
- Set scaling governor to performance for CPU0:
```bash
echo performance > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
```
- Reserve CPU 0 fro our benchmark, i.e. kick out (almost) all other processes
```bash
cset shield --cpu 0 --kthread=on
```
- If you are on INTEL and CPU0 is part of a SMT-pair (hyperthreading), disable the peer
```bash
echo 0 > /sys/devices/system/cpu/cpu1/online
```
- Disable turbo mode on INTEL:
```bash
echo 1 > /sys/devices/system/cpu/intel_pstate/no_turbo
```
- Run the configuration script for lmbench. Select only the `HARDWARE` set of benchmarks and set the maximum amount of memory to something like 1024MB
```bash
cd /usr/lib/lmbench/scripts
# the following command will create the configuration file /usr/lib/lmbench/bin/x86_64-linux-gnu/CONFIG.<hostname>
cset shield --exec -- ./config-run
# run the benchmark
cset shield --exec -- /usr/bin/lmbench-run
# results are in /var/lib/lmbench/results/x86_64-linux-gnu/<hostname>
```

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 50 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 49 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 52 KiB

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 52 KiB

View file

@ -0,0 +1,39 @@
import os
import sys
# prefix is something like results_
results = sys.argv[1]
name = results.removeprefix('results_')
types = {}
results = open(results, 'rt')
for idx, line in enumerate(results):
if line.startswith('Memory read bandwidth'):
types['bwr'] = idx
elif line.startswith('Memory write bandwidth'):
types['bww'] = idx
elif line.startswith('Memory load latency'):
types['lseq'] = idx
elif line.startswith('Random load latency'):
types['lrnd'] = idx
else:
pass
for typ, idx in types.items():
csv = open(f'{name}-{typ}.csv', 'wt')
results.seek(0)
for count, line in enumerate(results):
if count <= idx:
continue
if line.startswith('"'):
continue
try:
val1, val2 = line.split(" ")
except ValueError:
# we are at the end of the section
csv.close()
break
csv.write(f'{val1},{val2}')

111
benchmark_low_level/plot.py Normal file
View file

@ -0,0 +1,111 @@
import os
import sys
import numpy as np
import matplotlib
import itertools
from matplotlib import pyplot as plt
plt.style.use('ggplot')
matplotlib.rcParams['font.size'] = 12
name = 't14'
caches = (48*1024, 1280*1024, 12*1024*1024)
def get_labels(x):
xlabels = []
for value in x:
b = int(2**value)
if b < 1024:
xlabels.append(f'{b}B')
elif b < 1048576:
xlabels.append(f'{b//1024}K')
elif b < 1073741824:
xlabels.append(f'{b//1024//1024}M')
else:
xlabels.append(f'{b//1024//1024//1024}G')
return xlabels
# manually set ticks, to disable, set ticks = None
line = np.linspace(1, 10, 9, endpoint=False)
yticks = list(line)+list(line*10)+list(line[:2]*100)
ylabels = (1, 10, 100)
ticks = {'l': (yticks, [str(int(i)) if i in ylabels else '' for i in yticks]),
'bw': (range(5,46,5), range(5,46,5)),
}
# manually set limits, to disable set to ylim = None
ylim = {'l' : (1, 200),
'bw' : (5,45),
}
for type_ in ('bw', 'l'):
if type_ == 'bw':
suffix = ('r', 'w')
ylabel = ''
title = f'Memory Bandwidth ({name}) [GB/s]'
legend1, legend2 = 'read', 'write'
pic = f'bandwidth-{name}.svg'
plt_func = plt.plot
else:
suffix = ('seq', 'rnd')
ylabel = ''
title = f'Memory Latency ({name}) [ns]'
legend1, legend2 = 'sequential access', 'random access'
pic = f'latency-{name}.svg'
plt_func = plt.semilogy
data1 = np.loadtxt(f'{name}-{type_}{suffix[0]}.csv', delimiter=',')
data2 = np.loadtxt(f'{name}-{type_}{suffix[1]}.csv', delimiter=',')
# convert to bytes and then to the corresponding power of two
if type_ == 'bw':
x1 = np.log2(data1[:,0]*1024*1024).round()
y1 = data1[:,1]/1024
x2 = np.log2(data2[:,0]*1024*1024).round()
y2 = data2[:,1]/1024
else:
x1 = np.log2(data1[::2,0]*1024*1024).round()
y1 = data1[::2,1]
x2 = np.log2(data2[::2,0]*1024*1024).round()
y2 = data2[::2,1]
ylabels = None
xlabel = 'block size'
xlabels = get_labels(x1)
plt.figure(figsize=(8.5,7.5))
p1, = plt_func(x1, y1, 'o')
plt.ylabel(ylabel)
plt.xlabel(xlabel)
p2, = plt_func(x2, y2, 'o')
if ylim and type_ in ylim:
plt.ylim(*ylim[type_])
plt.xticks(x1, xlabels, rotation=60)
if ticks and type_ in ticks:
plt.yticks(*ticks[type_])
plt.legend((p1, p2), (legend1, legend2))
if ylim and type_ in ylim:
miny, maxy = ylim[type_]
else:
miny = min(y1.min(), y2.min())
maxy = max(y1.max(), y2.max())
# caches
for idx, cache in enumerate(caches):
level = idx + 1
size = np.log2(cache)
plt.plot((size, size), (miny, maxy),
color = 'darkblue', alpha=0.4)
plt.text(size-1, 2*miny, f'L{level}\n',
color='darkblue', verticalalignment='top')
plt.title(title)
plt.savefig(pic)

View file

@ -0,0 +1,485 @@
[lmbench3.0 results for Linux multivac 6.10.3-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.10.3-1 (2024-08-04) x86_64 GNU/Linux]
[LMBENCH_VER: 3.0-a9]
[BENCHMARK_HARDWARE: YES]
[BENCHMARK_OS: NO]
[ALL: 512 1k 2k 4k 8k 16k 32k 64k 128k 256k 512k 1m 2m 4m 8m 16m 32m 64m 128m 256m 512m 1024m]
[DISKS: ]
[DISK_DESC: ]
[ENOUGH: 5000]
[FAST: ]
[FASTMEM: NO]
[FILE: /var/tmp/lmbench/XXX]
[FSDIR: /var/tmp/lmbench]
[HALF: 512 1k 2k 4k 8k 16k 32k 64k 128k 256k 512k 1m 2m 4m 8m 16m 32m 64m 128m 256m 512m]
[INFO: INFO.multivac]
[LINE_SIZE: ]
[LOOP_O: 0.00000000]
[MB: 1024]
[MHZ: 1296 MHz, 0.7716 nanosec clock]
[MOTHERBOARD: ]
[NETWORKS: ]
[PROCESSORS: 11]
[REMOTE: ]
[SLOWFS: YES]
[OS: x86_64-linux-gnu]
[SYNC_MAX: 1]
[LMBENCH_SCHED: DEFAULT]
[TIMING_O: 0]
[LMBENCH VERSION: 3.0-20240810]
[USER: root]
[HOSTNAME: multivac]
[NODENAME: multivac]
[SYSNAME: Linux]
[PROCESSOR: unknown]
[MACHINE: x86_64]
[RELEASE: 6.10.3-amd64]
[VERSION: #1 SMP PREEMPT_DYNAMIC Debian 6.10.3-1 (2024-08-04)]
[Sat Aug 10 04:20:43 PM CEST 2024]
[ 16:20:43 up 1:18, 4 users, load average: 0.37, 0.94, 1.05]
[net: Kernel Interface table]
[net: Iface MTU RX-OK RX-ERR RX-DRP RX-OVR TX-OK TX-ERR TX-DRP TX-OVR Flg]
[net: eth0 1500 0 0 0 0 0 0 0 0 BMU]
[if: eth0: flags=4099<UP,BROADCAST,MULTICAST> mtu 1500]
[if: ether fc:5c:ee:4d:b5:eb txqueuelen 1000 (Ethernet)]
[if: RX packets 0 bytes 0 (0.0 B)]
[if: RX errors 0 dropped 0 overruns 0 frame 0]
[if: TX packets 0 bytes 0 (0.0 B)]
[if: TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0]
[if: device interrupt 16 memory 0xbc300000-bc320000]
[if: ]
[net: eth1 1500 34948 0 2352 0 7773 0 0 0 BMRU]
[if: eth1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500]
[if: inet 192.168.111.103 netmask 255.255.255.0 broadcast 192.168.111.255]
[if: inet6 fe80::44e3:4a35:5130:3045 prefixlen 64 scopeid 0x20<link>]
[if: inet6 2003:ef:2f2e:c900:e437:85c7:3d97:f353 prefixlen 64 scopeid 0x0<global>]
[if: ether b0:4f:13:ef:1a:3e txqueuelen 1000 (Ethernet)]
[if: RX packets 34948 bytes 33936985 (32.3 MiB)]
[if: RX errors 0 dropped 2352 overruns 0 frame 0]
[if: TX packets 7773 bytes 1213416 (1.1 MiB)]
[if: TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0]
[if: ]
[net: lo 65536 95 0 0 0 95 0 0 0 LRU]
[if: lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536]
[if: inet 127.0.0.1 netmask 255.0.0.0]
[if: inet6 ::1 prefixlen 128 scopeid 0x10<host>]
[if: loop txqueuelen 1000 (Local Loopback)]
[if: RX packets 95 bytes 5588 (5.4 KiB)]
[if: RX errors 0 dropped 0 overruns 0 frame 0]
[if: TX packets 95 bytes 5588 (5.4 KiB)]
[if: TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0]
[if: ]
[mount: sysfs on /sys type sysfs (rw,nosuid,nodev,noexec,relatime)]
[mount: proc on /proc type proc (rw,relatime)]
[mount: udev on /dev type devtmpfs (rw,nosuid,relatime,size=16228560k,nr_inodes=4057140,mode=755,inode64)]
[mount: devpts on /dev/pts type devpts (rw,nosuid,noexec,relatime,gid=5,mode=620,ptmxmode=000)]
[mount: tmpfs on /run type tmpfs (rw,nosuid,nodev,noexec,relatime,size=3251140k,mode=755,inode64)]
[mount: /dev/mapper/CRYPT-ROOT on / type ext4 (rw,relatime,errors=remount-ro)]
[mount: securityfs on /sys/kernel/security type securityfs (rw,nosuid,nodev,noexec,relatime)]
[mount: tmpfs on /dev/shm type tmpfs (rw,nosuid,nodev,inode64)]
[mount: cgroup2 on /sys/fs/cgroup type cgroup2 (rw,nosuid,nodev,noexec,relatime,nsdelegate,memory_recursiveprot)]
[mount: pstore on /sys/fs/pstore type pstore (rw,nosuid,nodev,noexec,relatime)]
[mount: efivarfs on /sys/firmware/efi/efivars type efivarfs (rw,nosuid,nodev,noexec,relatime)]
[mount: bpf on /sys/fs/bpf type bpf (rw,nosuid,nodev,noexec,relatime,mode=700)]
[mount: systemd-1 on /proc/sys/fs/binfmt_misc type autofs (rw,relatime,fd=39,pgrp=1,timeout=0,minproto=5,maxproto=5,direct,pipe_ino=67)]
[mount: hugetlbfs on /dev/hugepages type hugetlbfs (rw,nosuid,nodev,relatime,pagesize=2M)]
[mount: none on /sys/kernel/debug type debugfs (rw,nosuid,nodev,noexec,relatime)]
[mount: mqueue on /dev/mqueue type mqueue (rw,nosuid,nodev,noexec,relatime)]
[mount: tmpfs on /run/lock type tmpfs (rw,nosuid,nodev,noexec,relatime,size=5120k,inode64)]
[mount: tracefs on /sys/kernel/tracing type tracefs (rw,nosuid,nodev,noexec,relatime)]
[mount: tmpfs on /run/credentials/systemd-journald.service type tmpfs (ro,nosuid,nodev,noexec,relatime,nosymfollow,size=1024k,nr_inodes=1024,mode=700,inode64,noswap)]
[mount: tmpfs on /run/credentials/systemd-udev-load-credentials.service type tmpfs (ro,nosuid,nodev,noexec,relatime,nosymfollow,size=1024k,nr_inodes=1024,mode=700,inode64,noswap)]
[mount: tmpfs on /run/credentials/systemd-tmpfiles-setup-dev-early.service type tmpfs (ro,nosuid,nodev,noexec,relatime,nosymfollow,size=1024k,nr_inodes=1024,mode=700,inode64,noswap)]
[mount: configfs on /sys/kernel/config type configfs (rw,nosuid,nodev,noexec,relatime)]
[mount: fusectl on /sys/fs/fuse/connections type fusectl (rw,nosuid,nodev,noexec,relatime)]
[mount: tmpfs on /run/credentials/systemd-sysctl.service type tmpfs (ro,nosuid,nodev,noexec,relatime,nosymfollow,size=1024k,nr_inodes=1024,mode=700,inode64,noswap)]
[mount: tmpfs on /run/credentials/systemd-tmpfiles-setup-dev.service type tmpfs (ro,nosuid,nodev,noexec,relatime,nosymfollow,size=1024k,nr_inodes=1024,mode=700,inode64,noswap)]
[mount: tmpfs on /tmp type tmpfs (rw,nosuid,nodev,relatime,size=16777216k,inode64)]
[mount: tmpfs on /run/credentials/systemd-tmpfiles-setup.service type tmpfs (ro,nosuid,nodev,noexec,relatime,nosymfollow,size=1024k,nr_inodes=1024,mode=700,inode64,noswap)]
[mount: binfmt_misc on /proc/sys/fs/binfmt_misc type binfmt_misc (rw,nosuid,nodev,noexec,relatime)]
[mount: sunrpc on /run/rpc_pipefs type rpc_pipefs (rw,relatime)]
[mount: tmpfs on /run/user/1002 type tmpfs (rw,nosuid,nodev,relatime,size=3251136k,nr_inodes=812784,mode=700,uid=1002,gid=100,inode64)]
[mount: tmpfs on /run/credentials/getty@tty1.service type tmpfs (ro,nosuid,nodev,noexec,relatime,nosymfollow,size=1024k,nr_inodes=1024,mode=700,inode64,noswap)]
[mount: tmpfs on /run/user/0 type tmpfs (rw,nosuid,nodev,relatime,size=3251136k,nr_inodes=812784,mode=700,inode64)]
[mount: none on /cpusets type cgroup (rw,relatime,cpuset,noprefix,release_agent=/sbin/cpuset_release_agent)]
integer bit: 0.54 nanoseconds
integer add: 0.77 nanoseconds
integer div: 8.49 nanoseconds
integer mod: 12.58 nanoseconds
int64 bit: 0.52 nanoseconds
uint64 add: 0.77 nanoseconds
int64 div: 11.58 nanoseconds
int64 mod: 14.91 nanoseconds
float add: 1.54 nanoseconds
float mul: 3.09 nanoseconds
float div: 8.49 nanoseconds
double add: 1.54 nanoseconds
double mul: 3.09 nanoseconds
double div: 10.80 nanoseconds
float bogomflops: 1.16 nanoseconds
double bogomflops: 1.54 nanoseconds
integer bit parallelism: 2.77
integer add parallelism: 2.73
integer div parallelism: 1.83
integer mod parallelism: 2.83
int64 bit parallelism: 2.49
int64 add parallelism: 2.60
int64 div parallelism: 1.50
int64 mod parallelism: 1.90
float add parallelism: 4.00
float mul parallelism: 8.00
float div parallelism: 3.67
double add parallelism: 4.00
double mul parallelism: 8.00
double div parallelism: 3.50
unable to register (XACT_PROG, XACT_VERS, udp).
: RPC: Unable to receive
"libc bcopy unaligned
0.000512 41652.95
0.001024 47761.25
0.002048 50233.88
0.004096 55637.27
0.008192 64524.03
0.016384 67719.30
0.032768 18212.36
0.065536 18407.52
0.131072 18473.55
0.262144 18475.00
0.524288 14642.79
1.05 8957.30
2.10 8208.03
4.19 8208.03
8.39 9645.77
16.78 7631.79
33.55 7129.38
67.11 6951.41
134.22 6900.65
268.44 6848.89
536.87 6861.76
"libc bcopy aligned
0.000512 44106.76
0.001024 49354.68
0.002048 51472.69
0.004096 55925.21
0.008192 63828.24
0.016384 66379.51
0.032768 18202.45
0.065536 18336.03
0.131072 18457.77
0.262144 18327.76
0.524288 15715.46
1.05 8922.33
2.10 8367.89
4.19 8343.10
8.39 9679.16
16.78 7632.95
33.55 7179.72
67.11 6990.51
134.22 6911.31
268.44 6892.15
536.87 6891.97
Memory bzero bandwidth
0.000512 73586.23
0.001024 78019.46
0.002048 80349.42
0.004096 74573.30
0.008192 78524.11
0.016384 80567.79
0.032768 81708.84
0.065536 21219.16
0.131072 21299.79
0.262144 21333.96
0.524288 21347.23
1.05 19382.88
2.10 12829.98
4.19 12611.10
8.39 12606.02
16.78 10399.64
33.55 9537.93
67.11 9140.41
134.22 9007.90
268.44 8931.77
536.87 8918.57
1073.74 8908.13
"unrolled bcopy unaligned
0.000512 10357.22
0.001024 10363.21
0.002048 10356.95
0.004096 10357.76
0.008192 10343.49
0.016384 10351.27
0.032768 7899.27
0.065536 7893.76
0.131072 7873.84
0.262144 7832.99
0.524288 7281.78
1.05 6503.77
2.10 6418.22
4.19 6461.47
8.39 5194.99
16.78 4722.65
33.55 4639.72
67.11 4606.91
134.22 4593.51
268.44 4596.34
536.87 4587.27
"unrolled partial bcopy unaligned
0.000512 41402.69
0.001024 41453.86
0.002048 41452.30
0.004096 41425.45
0.008192 41418.12
0.016384 41333.58
0.032768 18957.19
0.065536 18955.39
0.131072 18962.49
0.262144 18969.69
0.524288 14659.04
1.05 8844.77
2.10 8192.00
4.19 8206.57
8.39 6326.25
16.78 5801.25
33.55 5644.14
67.11 5609.70
134.22 5600.81
268.44 5589.38
536.87 5591.24
Memory read bandwidth
0.000512 29201.61
0.001024 29294.55
0.002048 29363.12
0.004096 29433.86
0.008192 29442.59
0.016384 29285.40
0.032768 29336.30
0.065536 27978.05
0.131072 28392.59
0.262144 28408.05
0.524288 28424.68
1.05 28385.92
2.10 28385.92
4.19 28395.43
8.39 28334.85
16.78 26342.45
33.55 23489.28
67.11 22195.75
134.22 21644.53
268.44 21620.12
536.87 21505.80
1073.74 21526.50
Memory partial read bandwidth
0.000512 58916.90
0.001024 59661.44
0.002048 61203.68
0.004096 58783.21
0.008192 61320.45
0.016384 61266.70
0.032768 60940.09
0.065536 30488.23
0.131072 30517.76
0.262144 30516.13
0.524288 29627.83
1.05 24662.86
2.10 17384.93
4.19 17168.66
8.39 16915.36
16.78 13189.64
33.55 11584.48
67.11 11024.95
134.22 10892.53
268.44 10824.45
536.87 10781.84
1073.74 10759.80
Memory write bandwidth
0.000512 41405.52
0.001024 41396.47
0.002048 41429.93
0.004096 41445.34
0.008192 41401.00
0.016384 41398.70
0.032768 41426.50
0.065536 21381.05
0.131072 21388.82
0.262144 21374.31
0.524288 21370.17
1.05 18114.68
2.10 12417.83
4.19 12264.05
8.39 12250.61
16.78 9679.16
33.55 8978.98
67.11 8703.00
134.22 8589.38
268.44 8520.41
536.87 8543.59
1073.74 8544.75
Memory partial write bandwidth
0.000512 41406.05
0.001024 41431.27
0.002048 41414.90
0.004096 41425.45
0.008192 41431.04
0.016384 41453.60
0.032768 41366.48
0.065536 21392.21
0.131072 21364.37
0.262144 21381.05
0.524288 21366.56
1.05 18649.81
2.10 12411.48
4.19 12249.30
8.39 12300.01
16.78 9693.61
33.55 9024.86
67.11 8771.25
134.22 8618.06
268.44 8557.89
536.87 8549.44
1073.74 8543.32
Memory partial read/write bandwidth
0.000512 20712.63
0.001024 20714.87
0.002048 20703.88
0.004096 20718.77
0.008192 20719.28
0.016384 20715.33
0.032768 20722.87
0.065536 20693.70
0.131072 20690.58
0.262144 20638.28
0.524288 20665.37
1.05 18846.95
2.10 12887.53
4.19 12613.33
8.39 12576.62
16.78 10295.93
33.55 9551.50
67.11 9191.74
134.22 9087.19
268.44 9035.49
536.87 9018.95
1073.74 9023.04
Usage: tlb [-c] [-L <line size>] [-M len[K|M]] [-W <warmup>] [-N <repetitions>]
Memory load parallelism
Usage: par_mem [-L <line size>] [-M len[K|M]] [-W <warmup>] [-N <repetitions>]
STREAM copy latency: 1.48 nanoseconds
STREAM copy bandwidth: 10781.39 MB/sec
STREAM scale latency: 1.50 nanoseconds
STREAM scale bandwidth: 10668.00 MB/sec
STREAM add latency: 2.11 nanoseconds
STREAM add bandwidth: 11374.24 MB/sec
STREAM triad latency: 2.13 nanoseconds
STREAM triad bandwidth: 11264.63 MB/sec
STREAM2 fill latency: 0.89 nanoseconds
STREAM2 fill bandwidth: 8955.12 MB/sec
STREAM2 copy latency: 1.48 nanoseconds
STREAM2 copy bandwidth: 10775.60 MB/sec
STREAM2 daxpy latency: 1.81 nanoseconds
STREAM2 daxpy bandwidth: 13256.52 MB/sec
STREAM2 sum latency: 1.60 nanoseconds
STREAM2 sum bandwidth: 5006.72 MB/sec
Memory load latency
"stride=128
0.00049 3.859
0.00098 3.859
0.00195 3.859
0.00293 3.859
0.00391 3.861
0.00586 3.858
0.00781 3.858
0.01172 3.859
0.01562 3.859
0.02344 3.859
0.03125 3.859
0.04688 3.861
0.06250 11.580
0.09375 11.576
0.12500 11.577
0.18750 11.583
0.25000 11.577
0.37500 11.576
0.50000 11.579
0.75000 11.578
1.00000 11.590
1.50000 13.543
2.00000 13.936
3.00000 13.999
4.00000 13.996
6.00000 13.997
8.00000 14.002
12.00000 14.976
16.00000 19.832
24.00000 20.880
32.00000 21.339
48.00000 21.899
64.00000 22.023
96.00000 22.156
128.00000 22.213
192.00000 22.283
256.00000 22.320
384.00000 22.306
512.00000 22.325
768.00000 22.345
1024.00000 22.361
Random load latency
"stride=16
0.00049 3.859
0.00098 3.858
0.00195 3.858
0.00293 3.858
0.00391 3.858
0.00586 3.858
0.00781 3.859
0.01172 3.858
0.01562 3.858
0.02344 3.859
0.03125 3.859
0.04688 3.864
0.06250 11.575
0.09375 14.276
0.12500 15.462
0.18750 16.079
0.25000 16.646
0.37500 16.373
0.50000 16.352
0.75000 18.529
1.00000 18.245
1.50000 42.351
2.00000 55.350
3.00000 61.011
4.00000 62.143
6.00000 63.587
8.00000 65.259
12.00000 84.563
16.00000 107.165
24.00000 131.898
32.00000 141.864
48.00000 150.654
64.00000 156.245
96.00000 162.950
128.00000 167.497
192.00000 170.394
256.00000 171.779
384.00000 172.858
512.00000 172.877
768.00000 173.626
1024.00000 173.702
[Sat Aug 10 04:39:13 PM CEST 2024]

View file

@ -0,0 +1,22 @@
0.000512,29201.61
0.001024,29294.55
0.002048,29363.12
0.004096,29433.86
0.008192,29442.59
0.016384,29285.40
0.032768,29336.30
0.065536,27978.05
0.131072,28392.59
0.262144,28408.05
0.524288,28424.68
1.05,28385.92
2.10,28385.92
4.19,28395.43
8.39,28334.85
16.78,26342.45
33.55,23489.28
67.11,22195.75
134.22,21644.53
268.44,21620.12
536.87,21505.80
1073.74,21526.50
1 0.000512 29201.61
2 0.001024 29294.55
3 0.002048 29363.12
4 0.004096 29433.86
5 0.008192 29442.59
6 0.016384 29285.40
7 0.032768 29336.30
8 0.065536 27978.05
9 0.131072 28392.59
10 0.262144 28408.05
11 0.524288 28424.68
12 1.05 28385.92
13 2.10 28385.92
14 4.19 28395.43
15 8.39 28334.85
16 16.78 26342.45
17 33.55 23489.28
18 67.11 22195.75
19 134.22 21644.53
20 268.44 21620.12
21 536.87 21505.80
22 1073.74 21526.50

View file

@ -0,0 +1,22 @@
0.000512,41405.52
0.001024,41396.47
0.002048,41429.93
0.004096,41445.34
0.008192,41401.00
0.016384,41398.70
0.032768,41426.50
0.065536,21381.05
0.131072,21388.82
0.262144,21374.31
0.524288,21370.17
1.05,18114.68
2.10,12417.83
4.19,12264.05
8.39,12250.61
16.78,9679.16
33.55,8978.98
67.11,8703.00
134.22,8589.38
268.44,8520.41
536.87,8543.59
1073.74,8544.75
1 0.000512 41405.52
2 0.001024 41396.47
3 0.002048 41429.93
4 0.004096 41445.34
5 0.008192 41401.00
6 0.016384 41398.70
7 0.032768 41426.50
8 0.065536 21381.05
9 0.131072 21388.82
10 0.262144 21374.31
11 0.524288 21370.17
12 1.05 18114.68
13 2.10 12417.83
14 4.19 12264.05
15 8.39 12250.61
16 16.78 9679.16
17 33.55 8978.98
18 67.11 8703.00
19 134.22 8589.38
20 268.44 8520.41
21 536.87 8543.59
22 1073.74 8544.75

View file

@ -0,0 +1,41 @@
0.00049,3.859
0.00098,3.858
0.00195,3.858
0.00293,3.858
0.00391,3.858
0.00586,3.858
0.00781,3.859
0.01172,3.858
0.01562,3.858
0.02344,3.859
0.03125,3.859
0.04688,3.864
0.06250,11.575
0.09375,14.276
0.12500,15.462
0.18750,16.079
0.25000,16.646
0.37500,16.373
0.50000,16.352
0.75000,18.529
1.00000,18.245
1.50000,42.351
2.00000,55.350
3.00000,61.011
4.00000,62.143
6.00000,63.587
8.00000,65.259
12.00000,84.563
16.00000,107.165
24.00000,131.898
32.00000,141.864
48.00000,150.654
64.00000,156.245
96.00000,162.950
128.00000,167.497
192.00000,170.394
256.00000,171.779
384.00000,172.858
512.00000,172.877
768.00000,173.626
1024.00000,173.702
1 0.00049 3.859
2 0.00098 3.858
3 0.00195 3.858
4 0.00293 3.858
5 0.00391 3.858
6 0.00586 3.858
7 0.00781 3.859
8 0.01172 3.858
9 0.01562 3.858
10 0.02344 3.859
11 0.03125 3.859
12 0.04688 3.864
13 0.06250 11.575
14 0.09375 14.276
15 0.12500 15.462
16 0.18750 16.079
17 0.25000 16.646
18 0.37500 16.373
19 0.50000 16.352
20 0.75000 18.529
21 1.00000 18.245
22 1.50000 42.351
23 2.00000 55.350
24 3.00000 61.011
25 4.00000 62.143
26 6.00000 63.587
27 8.00000 65.259
28 12.00000 84.563
29 16.00000 107.165
30 24.00000 131.898
31 32.00000 141.864
32 48.00000 150.654
33 64.00000 156.245
34 96.00000 162.950
35 128.00000 167.497
36 192.00000 170.394
37 256.00000 171.779
38 384.00000 172.858
39 512.00000 172.877
40 768.00000 173.626
41 1024.00000 173.702

View file

@ -0,0 +1,41 @@
0.00049,3.859
0.00098,3.859
0.00195,3.859
0.00293,3.859
0.00391,3.861
0.00586,3.858
0.00781,3.858
0.01172,3.859
0.01562,3.859
0.02344,3.859
0.03125,3.859
0.04688,3.861
0.06250,11.580
0.09375,11.576
0.12500,11.577
0.18750,11.583
0.25000,11.577
0.37500,11.576
0.50000,11.579
0.75000,11.578
1.00000,11.590
1.50000,13.543
2.00000,13.936
3.00000,13.999
4.00000,13.996
6.00000,13.997
8.00000,14.002
12.00000,14.976
16.00000,19.832
24.00000,20.880
32.00000,21.339
48.00000,21.899
64.00000,22.023
96.00000,22.156
128.00000,22.213
192.00000,22.283
256.00000,22.320
384.00000,22.306
512.00000,22.325
768.00000,22.345
1024.00000,22.361
1 0.00049 3.859
2 0.00098 3.859
3 0.00195 3.859
4 0.00293 3.859
5 0.00391 3.861
6 0.00586 3.858
7 0.00781 3.858
8 0.01172 3.859
9 0.01562 3.859
10 0.02344 3.859
11 0.03125 3.859
12 0.04688 3.861
13 0.06250 11.580
14 0.09375 11.576
15 0.12500 11.577
16 0.18750 11.583
17 0.25000 11.577
18 0.37500 11.576
19 0.50000 11.579
20 0.75000 11.578
21 1.00000 11.590
22 1.50000 13.543
23 2.00000 13.936
24 3.00000 13.999
25 4.00000 13.996
26 6.00000 13.997
27 8.00000 14.002
28 12.00000 14.976
29 16.00000 19.832
30 24.00000 20.880
31 32.00000 21.339
32 48.00000 21.899
33 64.00000 22.023
34 96.00000 22.156
35 128.00000 22.213
36 192.00000 22.283
37 256.00000 22.320
38 384.00000 22.306
39 512.00000 22.325
40 768.00000 22.345
41 1024.00000 22.361