3.7 KiB
3.7 KiB
In [ ]:
import os
import pprint
import numpy as np
# you may need to pip-install this
import threadpoolctl as th
In [ ]:
N = 10000
x = np.zeros((N, N), dtype="float64")
In [ ]:
# open a terminal and look at htop while running this,
# then repeat by changing N ➔ notice how the workload is distributed and
# how the frequencies of the CPUs are adjusted!
y = x @ x
In [ ]:
# now control the number of OpenMP/BLAS threads with threadpoolctl
# monitor with htop -➔ see how the one process jumps around CPUs
In [ ]:
with th.threadpool_limits(limits=1, user_api='blas'):
y = x @ x
In [ ]:
# OpenMP/BLAS infos
pprint.pprint(th.threadpool_info())
In [ ]:
# How to limit the jumping around?
os.sched_getaffinity(0) # 0 is the "calling" process, i.e. this very process
In [ ]:
# let's make our process stick to CPU0!
with th.threadpool_limits(limits=1, user_api='blas'):
os.sched_setaffinity(0, {0})
y = x @ x
In [ ]:
# let's see what happens if we move it to a E-core
with th.threadpool_limits(limits=1, user_api='blas'):
os.sched_setaffinity(0, {10})
y = x @ x
In [ ]:
# and now let's try to force it to use the two physical P-cores, and go around HyperThreading ;-)
# note that we are changing to limits=2!
with th.threadpool_limits(limits=2, user_api='blas'):
os.sched_setaffinity(0, {0,2})
y = x @ x