upload exercises

This commit is contained in:
Aitor Morales-Gregorio 2025-09-24 09:09:07 +02:00
parent 32ad3f2662
commit d17339a560
75 changed files with 773 additions and 0 deletions

38
exercises/exerciseC/README.md Executable file
View file

@ -0,0 +1,38 @@
# Exercise C: blending processes and threads
Objective: investigate how the number of processes and threads impacts the
speed-up time of a computation.
## First
For each of the 19 images in the folder `images/`, the `process_images.py`:
(1) decomposes the image using a singular-value decomposition (SVD), (2) removes the
largest singular value and (3) returns the reconstructed image. The script also measures
the time for the computation and saves the result in `timings/`.
You can change the number of processes and threads on a set of images by calling the function
as follows:
```python process_images.py 3 2 images/*```
The code above will use 3 processes and 2 threads to analyse everything in the folder `images/`.
**TASKS**:
0. Familiarize yourself with the code in `process_images.py`. Where is the number of
threads set in the code? Why is it set there? Where is the number of processes set
in the code?
1. Hypothesize what would be a good number of processes and threads for this exercise.
2. Try a couple combinations of processes and threads, look at the saved timings, and see if
the results match your expectations.
## Second
This folder also includes a bash script called `run_with_all_configurations.sh`.
**TASKS**:
0. Open the bash script. What does it do?
1. Execute the bash script in the terminal:
`bash run_with_all_configurations.sh`
Observe what's printed to screen. Does it match your expectations?
2. Open `plot.py` and see what it does. Run the script and view the results. Do they
match your expectations?
3. Add the image as a comment to the Pull Request you opened in Exercise A (or make a
new Pull Request if you need one).

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.4 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.9 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 8.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.3 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.8 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.6 MiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 MiB

53
exercises/exerciseC/plot.py Executable file
View file

@ -0,0 +1,53 @@
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patheffects as PathEffects
N_processes = 5
N_threads = 5
# Load measured timings
times = np.empty((N_processes, N_threads))
for fname in os.listdir('timings'):
values = open(f'timings/{fname}').read().split()
n_processes = int(values[0])
n_threads = int(values[1])
dt = float(values[2])
times[n_processes-1][n_threads-1] = dt
print(times)
""" Plot measured time"""
fig_time, axs_time = plt.subplots()
im = axs_time.imshow(times.T, origin='lower')
axs_time.set_title('Computation time')
fig_time.colorbar(im, ax=axs_time, label='Measured computation time (s)')
""" Plot speedup """
workers = np.arange(N_processes + 1)[:, None] * np.arange(N_threads + 1)
speedup = times[0, 0] / times
fig_speedup, axs_speedup = plt.subplots()
im = axs_speedup.imshow(speedup.T, origin='lower')
axs_speedup.set_title('Computation speed-up')
fig_speedup.colorbar(im, ax=axs_speedup, label='Speed-up')
# Set same style for both plots
for axs, data in zip([axs_time, axs_speedup], [times, speedup]):
axs.set_xlabel('# processes')
axs.set_ylabel('# threads')
axs.set_xticks(np.arange(N_processes))
axs.set_xticklabels(np.arange(N_processes)+1)
axs.set_yticks(np.arange(N_threads))
axs.set_yticklabels(np.arange(N_threads)+1)
for i in range(N_processes):
for j in range(N_threads):
txt = axs.text(i, j, f'{data[i, j]:.2f}', fontsize=10, color='w',
ha='center', va='center', fontweight='bold')
txt.set_path_effects([PathEffects.withStroke(linewidth=0.5, foreground='k')])
axs.spines[['right', 'top']].set_visible(False)
# Save plots
fig_time.savefig('time.png', dpi=300)
fig_speedup.savefig('speedup.png', dpi=300)

View file

@ -0,0 +1,72 @@
import os
import sys
from multiprocessing import Pool as ProcessPool
import time
def process_image(input_tuple):
fname, A = input_tuple
n_threads = os.getenv('OMP_NUM_THREADS', '(unset)')
print(f"Worker {fname=} OMP_NUM_THREADS={n_threads}", flush=True)
# Decompose image
U, S, Vh = np.linalg.svd(A)
# Remove first singular value
S[0] = 0
smat = np.zeros(A.shape, dtype=complex)
smat[:min(A.shape), :min(A.shape)] = np.diag(S)
# Re-compose image
A = np.dot(U, np.dot(smat, Vh)).real
A = (256*(A - A.min())/A.max()).astype('uint8')
return A
if __name__ == '__main__':
n_processes = int(sys.argv[1])
n_threads = int(sys.argv[2])
fnames = sys.argv[3:]
# Check that the output folders exist, or create them if needed
if not os.path.isdir('timings'): os.mkdir('timings')
if not os.path.isdir('processed_images'): os.mkdir('processed_images')
print(f"Controller with {n_processes} processes and {n_threads} threads / worker", flush=True)
# The environment that is set in the parent is inherited by child workers,
# we need to set the variable before numpy is imported!
os.environ['OMP_NUM_THREADS'] = str(n_threads)
# We delay the import of numpy because we have to set OMP_NUM_THREADS before import.
# We delay the import of PIL in case it uses numpy internally.
import numpy as np
from PIL import Image
# I/O Load the images
image_arrays = []
for fname in fnames:
im = Image.open(fname)
A = np.array(im)
image_arrays.append((fname, A))
# Time the execution of the pool map
start_time = time.time()
with ProcessPool(n_processes) as p:
new_images = p.map(process_image, image_arrays)
elapsed_time = time.time() - start_time
# I/O save the processed images
for im, fname in zip(new_images, fnames):
im = Image.fromarray(im)
im.save(fname.replace('images', 'processed_images'))
print(f'{n_processes} processes and {n_threads} threads and {len(fnames)} jobs: {elapsed_time}\n',
flush=True)
# I/O: Save the timing to a unique txt file
filename = f'timings/{n_processes:02}_processes_{n_threads:02}_threads.txt'
with open(filename, 'w') as file:
file.write(f'{n_processes} {n_threads} {elapsed_time:.6f}')

Binary file not shown.

After

Width:  |  Height:  |  Size: 76 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 83 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 112 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 103 KiB

View file

@ -0,0 +1,9 @@
# This is bash
# It runs the python script multiple times with different arguments
for i in {1..5} # Number of processes
do
for j in {1..5} # Number of threads
do
python process_images.py $i $j images/*
done
done