2024-heraklion-parallel-python/exercises/exerciseC/process_images.py

72 lines
2.3 KiB
Python
Raw Normal View History

2024-08-29 17:30:29 +02:00
import os
import sys
from multiprocessing import Pool as ProcessPool
import time
2024-08-29 23:17:47 +02:00
def process_image(input_tuple):
2024-08-29 17:30:29 +02:00
2024-08-29 23:17:47 +02:00
fname, A = input_tuple
n_threads = os.getenv('OMP_NUM_THREADS', '(unset)')
print(f"Worker {fname=} OMP_NUM_THREADS={n_threads}", flush=True)
2024-08-29 17:30:29 +02:00
# Decompose image
U, S, Vh = np.linalg.svd(A)
# Remove first singular value
S[0] = 0
smat = np.zeros(A.shape, dtype=complex)
smat[:min(A.shape), :min(A.shape)] = np.diag(S)
# Re-compose image
A = np.dot(U, np.dot(smat, Vh)).real
A = (256*(A - A.min())/A.max()).astype('uint8')
return A
if __name__ == '__main__':
n_processes = int(sys.argv[1])
n_threads = int(sys.argv[2])
fnames = sys.argv[3:]
# Check that the output folders exist, or create them if needed
if not os.path.isdir('timings'): os.mkdir('timings')
2024-08-29 23:17:47 +02:00
if not os.path.isdir('processed_images'): os.mkdir('processed_images')
2024-08-29 17:30:29 +02:00
2024-08-29 23:17:47 +02:00
print(f"Controller with {n_processes} processes and {n_threads} threads / worker", flush=True)
2024-08-29 17:30:29 +02:00
# The environment that is set in the parent is inherited by child workers,
# we need to set the variable before numpy is imported!
os.environ['OMP_NUM_THREADS'] = str(n_threads)
2024-08-29 18:00:45 +02:00
# We delay the import of numpy because we have to set OMP_NUM_THREADS before import.
2024-08-29 17:30:29 +02:00
# We delay the import of PIL in case it uses numpy internally.
import numpy as np
from PIL import Image
2024-08-29 23:17:47 +02:00
# I/O Load the images
image_arrays = []
for fname in fnames:
im = Image.open(fname)
A = np.array(im)
image_arrays.append((fname, A))
# Time the execution of the pool map
2024-08-29 17:30:29 +02:00
start_time = time.time()
with ProcessPool(n_processes) as p:
2024-08-29 23:17:47 +02:00
new_images = p.map(process_image, image_arrays)
2024-08-29 17:30:29 +02:00
elapsed_time = time.time() - start_time
2024-08-29 23:17:47 +02:00
# I/O save the processed images
2024-08-29 17:30:29 +02:00
for im, fname in zip(new_images, fnames):
im = Image.fromarray(im)
im.save(fname.replace('images', 'processed_images'))
2024-08-29 23:17:47 +02:00
print(f'{n_processes} processes and {n_threads} threads and {len(fnames)} jobs: {elapsed_time}\n',
flush=True)
2024-08-29 17:30:29 +02:00
2024-08-29 23:17:47 +02:00
# I/O: Save the timing to a unique txt file
2024-08-29 17:30:29 +02:00
filename = f'timings/{n_processes:02}_processes_{n_threads:02}_threads.txt'
with open(filename, 'w') as file:
file.write(f'{n_processes} {n_threads} {elapsed_time:.6f}')