2024-heraklion-parallel-python/exercises/exerciseC/process_images.py

69 lines
2.3 KiB
Python
Raw Normal View History

2024-08-29 17:30:29 +02:00
import os
import sys
from multiprocessing import Pool as ProcessPool
import time
def process_image(fname):
n_threads = os.getenv('OMP_NUM_THREADS', '(unset)')
print(f"Worker {fname=} OMP_NUM_THREADS={n_threads}")
# An image is an array with width, height and three (RGB) color channels
# (Sometimes there is a transparency channel too: RGBA)
im = Image.open(fname)
try:
A = np.median(im, axis=2)[::4, ::4]
except:
A = np.array(im)[::4, ::4]
# Decompose image
U, S, Vh = np.linalg.svd(A)
# Remove first singular value
S[0] = 0
smat = np.zeros(A.shape, dtype=complex)
smat[:min(A.shape), :min(A.shape)] = np.diag(S)
# Re-compose image
A = np.dot(U, np.dot(smat, Vh)).real
A = (256*(A - A.min())/A.max()).astype('uint8')
return A
if __name__ == '__main__':
n_processes = int(sys.argv[1])
n_threads = int(sys.argv[2])
fnames = sys.argv[3:]
# Check that the output folders exist, or create them if needed
if not os.path.isdir('processed_images'): os.mkdir('processed_images')
if not os.path.isdir('timings'): os.mkdir('timings')
print(f"Controller with {n_processes} processes and {n_threads} threads / worker")
# The environment that is set in the parent is inherited by child workers,
# we need to set the variable before numpy is imported!
os.environ['OMP_NUM_THREADS'] = str(n_threads)
2024-08-29 18:00:45 +02:00
# We delay the import of numpy because we have to set OMP_NUM_THREADS before import.
2024-08-29 17:30:29 +02:00
# We delay the import of PIL in case it uses numpy internally.
import numpy as np
from PIL import Image
# Time the execution
start_time = time.time()
with ProcessPool(n_processes) as p:
new_images = p.map(process_image, fnames)
elapsed_time = time.time() - start_time
for im, fname in zip(new_images, fnames):
im = Image.fromarray(im)
im.save(fname.replace('images', 'processed_images'))
print(f'{n_processes} processes and {n_threads} threads and {len(fnames)} jobs: {elapsed_time}')
# IO: Save the timing to a unique txt file
filename = f'timings/{n_processes:02}_processes_{n_threads:02}_threads.txt'
with open(filename, 'w') as file:
file.write(f'{n_processes} {n_threads} {elapsed_time:.6f}')