{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-03-04T09:40:28.904Z", "iopub.status.busy": "2024-03-04T09:40:28.896Z", "iopub.status.idle": "2024-03-04T09:40:28.978Z", "shell.execute_reply": "2024-03-04T09:40:28.967Z" } }, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-03-04T10:02:39.062Z", "iopub.status.busy": "2024-03-04T10:02:39.057Z", "iopub.status.idle": "2024-03-04T10:02:39.068Z", "shell.execute_reply": "2024-03-04T10:02:39.071Z" } }, "outputs": [], "source": [ "# create a collection of time series\n", "# in real life, this data comes from an experiment/simulation\n", "n_series = 128\n", "len_one_series = 2**20 # ➔ 2^20 = 1,048,576 items (8 B x 2^20 = 8,388,608 B = 8 M)\n", "time_series = np.zeros((n_series, len_one_series), dtype='float64')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-03-04T10:02:41.027Z", "iopub.status.busy": "2024-03-04T10:02:41.020Z", "iopub.status.idle": "2024-03-04T10:02:41.036Z", "shell.execute_reply": "2024-03-04T10:02:41.040Z" } }, "outputs": [], "source": [ "# how much memory does one time series need?\n", "ts_size = time_series[0].nbytes/2**20 # -> 2^20 is 1 M\n", "total_size = time_series.nbytes/2**20\n", "print(f'Size of one time series: {int(ts_size)} M')\n", "print(f'Size of collection: {int(total_size)} M')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-03-04T10:06:08.461Z", "iopub.status.busy": "2024-03-04T10:06:08.459Z", "iopub.status.idle": "2024-03-04T10:06:08.466Z", "shell.execute_reply": "2024-03-04T10:06:08.468Z" } }, "outputs": [], "source": [ "# let's load the collection in one big array\n", "def load_data_row(x, time_series):\n", " \"\"\"Store one time series per raw\"\"\"\n", " for row, ts in enumerate(time_series):\n", " x[row,:] = ts\n", " return x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-03-04T10:06:10.280Z", "iopub.status.busy": "2024-03-04T10:06:10.277Z", "iopub.status.idle": "2024-03-04T10:06:10.284Z", "shell.execute_reply": "2024-03-04T10:06:10.288Z" } }, "outputs": [], "source": [ "# let's load the collection in one big array\n", "def load_data_column(x, time_series):\n", " \"\"\"Store one time series per column\"\"\"\n", " for column, ts in enumerate(time_series):\n", " x[:,column] = ts\n", " return x" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-03-04T10:06:14.959Z", "iopub.status.busy": "2024-03-04T10:06:14.956Z", "iopub.status.idle": "2024-03-04T10:06:17.437Z", "shell.execute_reply": "2024-03-04T10:06:17.443Z" } }, "outputs": [], "source": [ "x = np.zeros((n_series, len_one_series), dtype='float64')\n", "%timeit load_data_row(x, time_series)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2024-03-04T10:06:20.056Z", "iopub.status.busy": "2024-03-04T10:06:20.053Z", "iopub.status.idle": "2024-03-04T10:06:21.695Z", "shell.execute_reply": "2024-03-04T10:06:21.700Z" } }, "outputs": [], "source": [ "x = np.zeros((len_one_series, n_series), dtype='float64')\n", "%timeit load_data_column(x, time_series)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# on my machine: 31 ms vs 1240 ms ≈ 40x slowdown!!!" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" }, "nteract": { "version": "0.28.0" } }, "nbformat": 4, "nbformat_minor": 2 }