2025-plovdiv-comp-arch/parallel.ipynb
2025-08-13 13:57:19 +02:00

150 lines
No EOL
3.7 KiB
Text

{
"cells": [
{
"cell_type": "code",
"source": [
"import os\n",
"import pprint\n",
"import numpy as np\n",
"# you may need to pip-install this\n",
"import threadpoolctl as th"
],
"outputs": [],
"execution_count": null,
"metadata": {},
"id": "7480c6b9"
},
{
"cell_type": "code",
"source": [
"N = 10000\n",
"x = np.zeros((N, N), dtype=\"float64\")"
],
"outputs": [],
"execution_count": null,
"metadata": {},
"id": "653a14c9"
},
{
"cell_type": "code",
"source": [
"# open a terminal and look at htop while running this,\n",
"# then repeat by changing N ➔ notice how the workload is distributed and\n",
"# how the frequencies of the CPUs are adjusted!\n",
"y = x @ x"
],
"outputs": [],
"execution_count": null,
"metadata": {},
"id": "3f3e7872"
},
{
"cell_type": "code",
"source": [
"# now control the number of OpenMP/BLAS threads with threadpoolctl\n",
"# monitor with htop -➔ see how the one process jumps around CPUs"
],
"outputs": [],
"execution_count": null,
"metadata": {},
"id": "eeec6203"
},
{
"cell_type": "code",
"source": [
"with th.threadpool_limits(limits=1, user_api='blas'):\n",
" y = x @ x"
],
"outputs": [],
"execution_count": null,
"metadata": {},
"id": "fbe3e515"
},
{
"cell_type": "code",
"source": [
"# OpenMP/BLAS infos\n",
"pprint.pprint(th.threadpool_info())"
],
"outputs": [],
"execution_count": null,
"metadata": {},
"id": "6b1ef107"
},
{
"cell_type": "code",
"source": [
"# How to limit the jumping around?\n",
"os.sched_getaffinity(0) # 0 is the \"calling\" process, i.e. this very process"
],
"outputs": [],
"execution_count": null,
"metadata": {},
"id": "9616d412"
},
{
"cell_type": "code",
"source": [
"# let's make our process stick to CPU0!\n",
"with th.threadpool_limits(limits=1, user_api='blas'):\n",
" os.sched_setaffinity(0, {0})\n",
" y = x @ x"
],
"outputs": [],
"execution_count": null,
"metadata": {},
"id": "a85806f8"
},
{
"cell_type": "code",
"source": [
"# let's see what happens if we move it to a E-core\n",
"with th.threadpool_limits(limits=1, user_api='blas'):\n",
" os.sched_setaffinity(0, {10})\n",
" y = x @ x"
],
"outputs": [],
"execution_count": null,
"metadata": {},
"id": "3efc9fa1"
},
{
"cell_type": "code",
"source": [
"# and now let's try to force it to use the two physical P-cores, and go around HyperThreading ;-)\n",
"# note that we are changing to limits=2!\n",
"with th.threadpool_limits(limits=2, user_api='blas'):\n",
" os.sched_setaffinity(0, {0,2})\n",
" y = x @ x"
],
"outputs": [],
"execution_count": null,
"metadata": {},
"id": "de5c8f61"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
},
"nteract": {
"version": "0.28.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}