diff --git a/Exercise2b/README.md b/Exercise2b/README.md new file mode 100644 index 0000000..e3d4857 --- /dev/null +++ b/Exercise2b/README.md @@ -0,0 +1,44 @@ +# Exercise 2b + +Execute numpy code with multiple threads. + +```NOTE``` Remember to use `htop` in your terminal to track what the CPUs are doing. + +## First + +Use the script `heavy_computation.py` with different numbers of threads. + +`OMP_NUM_THREADS` can be used to override the number of threads used: +``` +OMP_NUM_THREADS=7 python heavy_computation.py +``` + +The script will save the timing results into a `timings/` folder as `.txt` files. + +> What happens if `OMP_NUM_THREADS` is not set? How many threads are there? Why? + + +## Second + +Plot the timing results from the first part, we wrote the IO for you in `timing_plot.py`. + +1. Plot a graph of execution duration vs. the number of threads +2. Plot the execution speedup with respect to running a single-threaded process + +Open a PR with your plotting code and post your plots in the conversation, don't upload binaries to the Git remote! + +> What does the result tell us about the optimum number of threads? Why? + +> Does it take the same time as your colleagues to run? Why? + +## Extra + +Investigate the runtime variability. Systematically run multiple instances with the same number of threads by modifying `heavy_computation.py`. + +### Extra extra + +How is the runtime affected when the problem becomes bigger? Is the optimum number of threads always the same? + +How is the runtime affected when the memory is almost full? You can fill it up by creating a separate (unused) large numpy array. + +How about running on battery vs having your laptop plugged in? diff --git a/Exercise2b/heavy_computation.py b/Exercise2b/heavy_computation.py new file mode 100644 index 0000000..accec9f --- /dev/null +++ b/Exercise2b/heavy_computation.py @@ -0,0 +1,32 @@ +import os +import timeit +import numpy as np +from datetime import datetime +import time + +# Timestamp that will be put in the file name +timestamp = datetime.now().strftime("%H%M%S%f") + +# Get the environment variable for threads +threads = os.getenv('OMP_NUM_THREADS') + +# A relatively large matrix to work on +n = 5_000 +x = np.random.random(size=(n, n)) + +print(f"We are executed with OMP_NUM_THREADS={threads} for {n=}") + +# Measure the time required for matrix multiplication +start_time = time.time() +y = x @ x # The heavy compute +stop_time = time.time() +elapsed_time = stop_time - start_time + +print(f'Time used for matrix multiplication: {elapsed_time:.2f} s') + +# Check if timings folder exists +if not os.path.isdir('timings/'): os.mkdir('timings') + +# IO: Save the timing to a unique txt file +with open(f'timings/{threads}_threads_t{timestamp}.txt', 'w') as file: + file.write(f'{threads},{elapsed_time:.6f}') diff --git a/Exercise2b/plot.py b/Exercise2b/plot.py new file mode 100644 index 0000000..4130224 --- /dev/null +++ b/Exercise2b/plot.py @@ -0,0 +1,23 @@ +import os +import numpy as np +import matplotlib.pyplot as plt + +# IO: This loads the timings for you +threads, timings = [], [] +for file in os.listdir('timings'): + with open(f'timings/{file}', 'r') as f: + n, t = f.read().strip().split(',') + threads.append(int(n)) + timings.append(float(t)) +threads = np.array(threads) +timings = np.array(timings) + +print('This is the data I loaded: threads =', threads, ', timings =',timings) + +fig, axs = plt.subplots() + +# CREATE YOUR PLOT HERE +# Remember to label your axis +# Feel free to make it pretty + +plt.savefig('threads_v_timings.png', dpi=300)