In [None]:
import os
import pprint
import numpy as np
# you may need to pip-install this
import threadpoolctl as th

In [None]:
N = 10000
x = np.zeros((N, N), dtype="float64")

In [None]:
# open a terminal and look at htop while running this,
# then repeat by changing N ➔ notice how the workload is distributed and
# how the frequencies of the CPUs are adjusted!
y = x @ x

In [None]:
# now control the number of OpenMP/BLAS threads with threadpoolctl
# monitor with htop -➔ see how the one process jumps around CPUs

In [None]:
with th.threadpool_limits(limits=1, user_api='blas'):
    y = x @ x

In [None]:
# OpenMP/BLAS infos
pprint.pprint(th.threadpool_info())

In [None]:
# How to limit the jumping around?
os.sched_getaffinity(0) # 0 is the "calling" process, i.e. this very process

In [None]:
# let's make our process stick to CPU0!
with th.threadpool_limits(limits=1, user_api='blas'):
    os.sched_setaffinity(0, {0})
    y = x @ x

In [None]:
# let's see what happens if we move it to a E-core
with th.threadpool_limits(limits=1, user_api='blas'):
    os.sched_setaffinity(0, {10})
    y = x @ x

In [None]:
# and now let's try to force it to use the two physical P-cores, and go around HyperThreading ;-)
# note that we are changing to limits=2!
with th.threadpool_limits(limits=2, user_api='blas'):
    os.sched_setaffinity(0, {0,2})
    y = x @ x