ASPP 2024 material

2024-08-27 15:27:53 +03:00 · 2024-08-27 15:27:53 +03:00 · 1f6bc07c51
commit 1f6bc07c51
90 changed files with 91689 additions and 0 deletions
--- a/notebooks/.DS_Store
+++ b/notebooks/.DS_Store
--- a/notebooks/.ipynb_checkpoints/notebook-1-sorting-examples-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/notebook-1-sorting-examples-checkpoint.ipynb
@ -0,0 +1,386 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8685ea3a",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import timeit\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "048881d0",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# Example: Find common words"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2464a282",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "source": [
+    "Problem: given two lists of words, extract all the words that are in common"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "71740eab",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# Implementation with 2x for-loops"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f175c775",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%timeit\n",
+    "\n",
+    "scaling_factor = 1 #10, 100\n",
+    "\n",
+    "words1 = ['apple', 'orange', 'banana', 'melon', 'peach'] * scaling_factor\n",
+    "words2 = ['orange', 'kiwi', 'avocado', 'apple', 'banana'] * scaling_factor\n",
+    "\n",
+    "common_for = []\n",
+    "for w in words1:\n",
+    "    if w in words2:\n",
+    "        common_for.append(w)   # 612 ns, 12.3 us, 928 us       "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "affab857",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "input_size = [1, 10, 100]\n",
+    "results_for_loop = [(612/10**9)/(612/10**9), (12.4 /10**6)/(612/10**9), (928/10**6)/(612/10**9)] # in seconds\n",
+    "\n",
+    "x = np.linspace(0,100,100)\n",
+    "fit1 = np.polyfit(input_size,results_for_loop,2)\n",
+    "eval1 = np.polyval(fit1, x)\n",
+    "\n",
+    "plt.plot(x,eval1,c = 'orange')\n",
+    "plt.scatter(input_size, results_for_loop, c = 'orange', s = 100, label = '2 for loops')\n",
+    "\n",
+    "plt.xlabel('input size')\n",
+    "plt.ylabel('processing time')\n",
+    "plt.yticks(results_for_loop, ['T', str(int((12.4 /10**6)/(513/10**9)))+ 'x T', str(int((928/10**6)/(513/10**9))) + 'x T'])\n",
+    "plt.legend()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2a61bf38",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "print('Data increase 1x, 10x, 100x')\n",
+    "print('Time increase 513 ns, 12.4 µs, 928 µs')\n",
+    "print('time1, ~ 24x time1, ~ 1800x time1')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "38e47397",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "source": [
+    "What is the big-O complexity of this implementation? "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4118b38d",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
+   "source": [
+    "n * n ~ O(n<sup>2</sup>)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "31cd0e74",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# Implementation with sorted lists"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c13a24f4",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%timeit\n",
+    "scaling_factor = 100 #10, 100\n",
+    "words1 = ['apple', 'orange', 'banana', 'melon', 'peach'] * scaling_factor\n",
+    "words2 = ['orange', 'kiwi', 'avocado', 'apple', 'banana'] *scaling_factor\n",
+    "words1 = sorted(words1)\n",
+    "words2 = sorted(words2)\n",
+    "\n",
+    "common_sort_list = []\n",
+    "idx2 = 0\n",
+    "for w in words1:\n",
+    "    while idx2 < len(words2) and words2[idx2] < w:\n",
+    "        idx2 += 1\n",
+    "    if idx2 >= len(words2):\n",
+    "        break\n",
+    "    if words2[idx2] == w:\n",
+    "        common_sort_list.append(w) #1.94 ns, 17.3 us, 204 us"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1e8fed2",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "notes"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# 1.9 * 10**6\n",
+    "# 17.9 * 10**6\n",
+    "# 205 * 10**6"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ce798ab",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "input_size = [1, 10, 100]\n",
+    "results_sorted_lists = [(1.9 * 10**6)/(1.9 * 10**6), (17.9 * 10**6)/(1.9 * 10**6),  (205 * 10**6)/(1.9 * 10**6)]\n",
+    "fit2 = np.polyfit(input_size, results_sorted_lists, 2)\n",
+    "eval2 = np.polyval(fit2, x)\n",
+    "plt.plot(x,eval1,c = 'orange')\n",
+    "plt.plot(x,eval2,c = 'pink')\n",
+    "plt.scatter(input_size, results_for_loop, c = 'orange', s = 100, label = '2 for loops')\n",
+    "plt.scatter(input_size, results_sorted_lists, c = 'pink', s = 100, label = 'sorted lists')\n",
+    "plt.xlabel('input size')\n",
+    "plt.ylabel('processing time')\n",
+    "plt.yticks(results_for_loop + results_sorted_lists[1:], ['T', str(int((12.4 /10**6)/(513/10**9)))+ 'x T', str(int((928/10**6)/(513/10**9))) + 'x T',\n",
+    "                                                        str(int((17.9 * 10**6)/(1.9 * 10**6)))+ 'x T', str(int((205 * 10**6)/(1.9 * 10**6))) + 'x T',])\n",
+    "plt.legend()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1da4c22f",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "source": [
+    "What is the big-O complexity of this implementation? "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b068a1b",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "source": [
+    "2 * sorting + traversing two lists = 2*n log<sub>2</sub> + 2*n  ~  O(n * log<sub>n</sub>)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "13c96239",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# Implementation with sets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "61edb9f3",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "%%timeit\n",
+    "\n",
+    "scaling_factor = 1\n",
+    "\n",
+    "words1 = ['apple', 'orange', 'banana', 'melon', 'peach'] * scaling_factor\n",
+    "words2 = ['orange', 'kiwi', 'avocado', 'apple', 'banana'] *scaling_factor\n",
+    "\n",
+    "words2 = set(words2)\n",
+    "\n",
+    "common_sets = []\n",
+    "for w in words1:\n",
+    "    if w in words2:\n",
+    "        common_sets.append(w)  # 630 ns, 3.13 us, 28.6 us"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c90d8e68",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "notes"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# 630 * 10**9\n",
+    "# 3.13 * 10**6\n",
+    "# 28.6 * 10**6"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "236c132d",
+   "metadata": {
+    "scrolled": true,
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "results_sets = [(630 * 10**9)/(630 * 10**9), (3.13 * 10**6)/(630 * 10**9), (28.6 * 10**6)/(630 * 10**9)]\n",
+    "fit3 = np.polyfit(input_size, results_sets, 2)\n",
+    "eval3 = np.polyval(fit3, x)\n",
+    "plt.plot(x,eval1,c = 'orange')\n",
+    "plt.plot(x,eval2,c = 'pink')\n",
+    "plt.plot(x, eval3, c = 'blue')\n",
+    "plt.scatter(input_size, results_for_loop, c = 'orange', s = 100, label = '2 for loops')\n",
+    "plt.scatter(input_size, results_sorted_lists, c = 'pink', s = 100, label = 'sorted lists')\n",
+    "plt.scatter(input_size, results_sets, c = 'blue', s = 100, label = 'sets')\n",
+    "plt.xlabel('input size')\n",
+    "plt.ylabel('processing time')\n",
+    "plt.yticks(results_for_loop + results_sorted_lists[1:], ['T', str(int((12.4 /10**6)/(513/10**9)))+ 'x T', str(int((928/10**6)/(513/10**9))) + 'x T',                                                       str(int((17.9 * 10**6)/(1.9 * 10**6)))+ 'x T', str(int((205 * 10**6)/(1.9 * 10**6))) + 'x T'])\n",
+    "plt.legend()\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c9780532",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "source": [
+    "What is the big-O complexity of this implementation? "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "297bcd7d",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "-"
+    }
+   },
+   "source": [
+    "transforming one list to set  + 1 for loop  = 2 * n ~  O(n)\n",
+    "\n",
+    "It’s the exact same code as for lists, but now looking up an element in sets \u000b",
+    "(if w in words2) takes constant time!\n",
+    "How could you have known that set lookup is fast? Learning about data structures!"
+   ]
+  }
+ ],
+ "metadata": {
+  "celltoolbar": "Slideshow",
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/.ipynb_checkpoints/notebook-2-numpy_SOLUTIONS-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/notebook-2-numpy_SOLUTIONS-checkpoint.ipynb
--- a/notebooks/.ipynb_checkpoints/numpy_views_and_copies-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/numpy_views_and_copies-checkpoint.ipynb
@ -0,0 +1,693 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "20df51b1",
+   "metadata": {},
+   "source": [
+    "# NumPy views and copies\n",
+    "\n",
+    "- Operations that only require changing the metadata always do so, and return a **view**\n",
+    "- Operations that cannot be executed by changing the metadata create a new memory block, and return a **copy**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "4ed67e38",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "\n",
+    "def print_info(a):\n",
+    "    \"\"\" Print the content of an array, and its metadata. \"\"\"\n",
+    "    \n",
+    "    txt = f\"\"\"\n",
+    "dtype\\t{a.dtype}\n",
+    "ndim\\t{a.ndim}\n",
+    "shape\\t{a.shape}\n",
+    "strides\\t{a.strides}\n",
+    "    \"\"\"\n",
+    "\n",
+    "    print(a)\n",
+    "    print(txt)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "53bd92f9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[ 0  1  2  3]\n",
+      " [ 4  5  6  7]\n",
+      " [ 8  9 10 11]]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t2\n",
+      "shape\t(3, 4)\n",
+      "strides\t(32, 8)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "x = np.arange(12).reshape(3, 4).copy()\n",
+    "print_info(x)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d2ee43d7",
+   "metadata": {},
+   "source": [
+    "# Views"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f4838e77",
+   "metadata": {},
+   "source": [
+    "Operations that only require changing the metadata always do so, and return a **view**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "f1b82845",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[ 1  3]\n",
+      " [ 9 11]]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t2\n",
+      "shape\t(2, 2)\n",
+      "strides\t(64, 16)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "y = x[0::2, 1::2]\n",
+    "print_info(y)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3199b45b",
+   "metadata": {},
+   "source": [
+    "A view shares the same memory block as the original array. \n",
+    "\n",
+    "CAREFUL: Modifying the view changes the original array and all an other views of that array as well!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "28ea1c71",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[ 0  1  2  3  4  5  6  7  8  9 10 11]]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t2\n",
+      "shape\t(1, 12)\n",
+      "strides\t(96, 8)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "z = x.reshape(1, 12)\n",
+    "print_info(z)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "46822b5a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[101 103]\n",
+      " [109 111]]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t2\n",
+      "shape\t(2, 2)\n",
+      "strides\t(64, 16)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "y += 100\n",
+    "print_info(y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "ad9a7950",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[  0 101   2 103]\n",
+      " [  4   5   6   7]\n",
+      " [  8 109  10 111]]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t2\n",
+      "shape\t(3, 4)\n",
+      "strides\t(32, 8)\n",
+      "    \n",
+      "[[  0 101   2 103   4   5   6   7   8 109  10 111]]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t2\n",
+      "shape\t(1, 12)\n",
+      "strides\t(96, 8)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "print_info(x)\n",
+    "print_info(z)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4fc789c1",
+   "metadata": {},
+   "source": [
+    "Functions that take an array as an input should avoid modifying it in place! \n",
+    "\n",
+    "Always make a copy or be super extra clear in the docstring."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "aa25ac4b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def robust_log(a, cte=1e-10):\n",
+    "    \"\"\" Returns the log of an array, avoiding troubles when a value is 0.\n",
+    "    \n",
+    "    Add a tiny constant to the values of `a` so that they are not 0. \n",
+    "    `a` is expected to have non-negative values.\n",
+    "    \"\"\"\n",
+    "    a[a == 0] += cte\n",
+    "    return np.log(a)\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "id": "471d9d6b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_48764/1018405258.py:2: RuntimeWarning: divide by zero encountered in log\n",
+      "  np.log(a)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([[-1.2039728 , -4.60517019],\n",
+       "       [       -inf,  0.        ]])"
+      ]
+     },
+     "execution_count": 57,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a = np.array([[0.3, 0.01], [0, 1]])\n",
+    "np.log(a)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "id": "6c05d356",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0. 1.]\n",
+      "\n",
+      "dtype\tfloat64\n",
+      "ndim\t1\n",
+      "shape\t(2,)\n",
+      "strides\t(8,)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "# This is a view of `a`\n",
+    "b = a[1, :]\n",
+    "print_info(b)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "9d96fb61",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ -1.2039728 ,  -4.60517019],\n",
+       "       [-23.02585093,   0.        ]])"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "robust_log(a)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 60,
+   "id": "35d0327d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[3.e-01, 1.e-02],\n",
+       "       [1.e-10, 1.e+00]])"
+      ]
+     },
+     "execution_count": 60,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 61,
+   "id": "4a2b95c5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([1.e-10, 1.e+00])"
+      ]
+     },
+     "execution_count": 61,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "b"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fa8cf77a",
+   "metadata": {},
+   "source": [
+    "Better to make a copy!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 62,
+   "id": "c5359eac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def robust_log(a, cte=1e-10):\n",
+    "    \"\"\" Returns the log of an array, avoiding troubles when a value is 0.\n",
+    "    \n",
+    "    Add a tiny constant to the values of `a` so that they are not 0. \n",
+    "    `a` is expected to have non-negative values.\n",
+    "    \"\"\"\n",
+    "    a = a.copy()\n",
+    "    a[a == 0] += cte\n",
+    "    return np.log(a)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "id": "0bf9b2d5",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ -1.2039728 ,  -4.60517019],\n",
+       "       [-23.02585093,   0.        ]])"
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a = np.array([[0.3, 0.01], [0, 1]])\n",
+    "b = a[1, :]\n",
+    "\n",
+    "robust_log(a)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 67,
+   "id": "895209ce",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[0.3 , 0.01],\n",
+       "       [0.  , 1.  ]])"
+      ]
+     },
+     "execution_count": 67,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "id": "18004050",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0., 1.])"
+      ]
+     },
+     "execution_count": 68,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "b"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d664b462",
+   "metadata": {},
+   "source": [
+    "# Copies\n",
+    "\n",
+    "Operations that cannot be executed by changing the metadata create a new memory block, and return a **copy**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "id": "8c8f77e1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[ 0  1  2  3]\n",
+      " [ 4  5  6  7]\n",
+      " [ 8  9 10 11]]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t2\n",
+      "shape\t(3, 4)\n",
+      "strides\t(32, 8)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "x = np.arange(12).reshape(3, 4).copy()\n",
+    "print_info(x)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "716aec53",
+   "metadata": {},
+   "source": [
+    "Choosing row, columns, or individual elements of an array by giving explicitly their indices (a.k.a \"fancy indexing\") it's an operation that in general cannot be executed by changing the metadata alone.\n",
+    "\n",
+    "Therefore, **fancy indexing always returns a copy**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 77,
+   "id": "40fb1777",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[0 1]\n",
+      " [4 5]\n",
+      " [8 9]]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t2\n",
+      "shape\t(3, 2)\n",
+      "strides\t(8, 24)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "# Get the first and second column\n",
+    "y = x[:, [0, 1]]\n",
+    "print_info(y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "id": "b8ed81d5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[2000 2001]\n",
+      " [2004 2005]\n",
+      " [2008 2009]]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t2\n",
+      "shape\t(3, 2)\n",
+      "strides\t(8, 24)\n",
+      "    \n",
+      "[[ 0  1  2  3]\n",
+      " [ 4  5  6  7]\n",
+      " [ 8  9 10 11]]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t2\n",
+      "shape\t(3, 4)\n",
+      "strides\t(32, 8)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "y += 1000\n",
+    "print_info(y)\n",
+    "# the original array is unchanged => not a view!\n",
+    "print_info(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "id": "6c50e46e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 1  0 11]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t1\n",
+      "shape\t(3,)\n",
+      "strides\t(8,)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "y = x[[0, 0, 2], [1, 0, 3]]\n",
+    "print_info(y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "id": "9d65a5c3",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1001 1000 1011]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t1\n",
+      "shape\t(3,)\n",
+      "strides\t(8,)\n",
+      "    \n",
+      "[[ 0  1  2  3]\n",
+      " [ 4  5  6  7]\n",
+      " [ 8  9 10 11]]\n",
+      "\n",
+      "dtype\tint64\n",
+      "ndim\t2\n",
+      "shape\t(3, 4)\n",
+      "strides\t(32, 8)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "y += 1000\n",
+    "print_info(y)\n",
+    "# the original array is unchanged => not a view!\n",
+    "print_info(x)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5e76ea7a",
+   "metadata": {},
+   "source": [
+    "Any operation that computes new values also returns a copy."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "id": "b8a3d44c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[[ 0.   7.1 14.2 21.3]\n",
+      " [28.4 35.5 42.6 49.7]\n",
+      " [56.8 63.9 71.  78.1]]\n",
+      "\n",
+      "dtype\tfloat64\n",
+      "ndim\t2\n",
+      "shape\t(3, 4)\n",
+      "strides\t(32, 8)\n",
+      "    \n"
+     ]
+    }
+   ],
+   "source": [
+    "y = x * 7.1\n",
+    "print_info(y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9e50edfd",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "022e7b98",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/.ipynb_checkpoints/when_copying_is_convenient-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/when_copying_is_convenient-checkpoint.ipynb
@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/.ipynb_checkpoints/which_data_structure_intro-checkpoint.ipynb
+++ b/notebooks/.ipynb_checkpoints/which_data_structure_intro-checkpoint.ipynb
@ -0,0 +1,103 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "3ae332a0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "aa7bbab6",
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [],
+   "source": [
+    "sound_data = np.random.rand(100)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "626eafc7",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0.66709183, 0.55973494, 0.95416669, 0.60810949, 0.05188879,\n",
+       "       0.58619063, 0.25555136, 0.72451477, 0.2646681 , 0.08694215,\n",
+       "       0.75592186, 0.67261696, 0.62847452, 0.06232598, 0.20549438,\n",
+       "       0.11718457, 0.25184725, 0.48625729, 0.8103058 , 0.18100915,\n",
+       "       0.81113341, 0.62055231, 0.9046905 , 0.56664205, 0.73235338,\n",
+       "       0.74382869, 0.64856368, 0.80644398, 0.46199345, 0.78516632,\n",
+       "       0.91298397, 0.48290914, 0.20847714, 0.99162659, 0.26374781,\n",
+       "       0.3602381 , 0.07173351, 0.8584085 , 0.32248766, 0.39167573,\n",
+       "       0.67944923, 0.00930429, 0.21714217, 0.58810089, 0.17668711,\n",
+       "       0.57444803, 0.25760187, 0.43785728, 0.39119371, 0.68268063,\n",
+       "       0.95954499, 0.45934239, 0.03616905, 0.23896063, 0.61872801,\n",
+       "       0.76332531, 0.96272817, 0.57169277, 0.50225193, 0.01361629,\n",
+       "       0.15357459, 0.8057233 , 0.0642748 , 0.95013941, 0.38712684,\n",
+       "       0.97231498, 0.20261775, 0.74184693, 0.26629893, 0.84672705,\n",
+       "       0.67662718, 0.96055977, 0.64942314, 0.66487937, 0.86867536,\n",
+       "       0.40815661, 0.1139344 , 0.95638066, 0.87436447, 0.18407227,\n",
+       "       0.64457074, 0.19233097, 0.24012179, 0.90399279, 0.39093908,\n",
+       "       0.26389161, 0.97537645, 0.14209784, 0.75261696, 0.10078122,\n",
+       "       0.87468408, 0.77990102, 0.92983283, 0.45841805, 0.61470669,\n",
+       "       0.87939755, 0.09266009, 0.41177209, 0.46973971, 0.43152144])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "sound_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ef55bee9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "synonyms = {\n",
+    "    'hot': ['blazing', 'boiling', 'heated'],\n",
+    "    'airplane': ['aircraft', 'airliner', \n",
+    "                 'cab', 'jet', 'plane'],\n",
+    "    'beach': ['coast', 'shore', 'waterfront'],\n",
+    "    # ...\n",
+    "}"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/010_data_structures/.DS_Store
+++ b/notebooks/010_data_structures/.DS_Store
--- a/notebooks/010_data_structures/.ipynb_checkpoints/big_O_example_with_plots-checkpoint.ipynb
+++ b/notebooks/010_data_structures/.ipynb_checkpoints/big_O_example_with_plots-checkpoint.ipynb
--- a/notebooks/020_numpy/.DS_Store
+++ b/notebooks/020_numpy/.DS_Store
--- a/notebooks/020_numpy/001_numpy_views_and_copies.ipynb
+++ b/notebooks/020_numpy/001_numpy_views_and_copies.ipynb
@ -0,0 +1,452 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "86b10564",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "def print_info(a):\n",
+    "    \"\"\" Print the content of an array, and its metadata. \"\"\"\n",
+    "    \n",
+    "    txt = f\"\"\"\n",
+    "dtype\\t{a.dtype}\n",
+    "ndim\\t{a.ndim}\n",
+    "shape\\t{a.shape}\n",
+    "strides\\t{a.strides}\n",
+    "    \"\"\"\n",
+    "\n",
+    "    print(a)\n",
+    "    print(txt)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a5bbf650",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# NumPy views and copies\n",
+    "\n",
+    "- Operations that only require changing the metadata always do so, and return a **view**\n",
+    "- Operations that cannot be executed by changing the metadata create a new memory block, and return a **copy**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53bd92f9",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "x = np.arange(12).reshape(3, 4).copy()\n",
+    "print_info(x)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d2ee43d7",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# Views"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f4838e77",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "source": [
+    "Operations that only require changing the metadata always do so, and return a **view**"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1b82845",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# slice\n",
+    "y = x[0::2, 1::2]\n",
+    "print_info(y)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3199b45b",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "A view shares the same memory block as the original array. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "28ea1c71",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "z = x.reshape(1, 12)\n",
+    "print_info(z)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d88fbf5d",
+   "metadata": {},
+   "source": [
+    "CAREFUL: Modifying the view **changes the original array** and all other views of that array as well!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7f35dcc3",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "##### in place operations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "46822b5a",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "y += 100\n",
+    "print_info(y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ad9a7950",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "print_info(x)\n",
+    "print_info(z)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4fc789c1",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "Functions that take an array as an input should **avoid modifying it in place!***\n",
+    "\n",
+    "Always make a copy or be super extra clear in the docstring."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aa25ac4b",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def robust_log(x, cte=1e-10):\n",
+    "    \"\"\" Returns the log of an array, deals with values that are 0.\n",
+    "\n",
+    "    `x` is expected to have non-negative values.\n",
+    "    \"\"\"\n",
+    "    x[x == 0] += cte\n",
+    "    return np.log(x)\n",
+    "    \n",
+    "# this is not being very clear"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "471d9d6b",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "a = np.array([[0.3, 0.01], [0, 1]])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6c05d356",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# This is a view of `a`\n",
+    "b = a[1, :]\n",
+    "print_info(b)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9d96fb61",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# what is the output?\n",
+    "robust_log(a)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35d0327d",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# what is the output?\n",
+    "b   # what about b??"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fa8cf77a",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "Better to make a copy!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c5359eac",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "def robust_log(x, cte=1e-10):\n",
+    "    \"\"\" Returns the log of an array, deals with values that are 0.\n",
+    "\n",
+    "    `x` is expected to have non-negative values.\n",
+    "    \"\"\"\n",
+    "    x = x.copy()\n",
+    "    x[x == 0] += cte\n",
+    "    return np.log(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0bf9b2d5",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "a = np.array([[0.3, 0.01], [0, 1]])\n",
+    "b = a[1, :]\n",
+    "\n",
+    "#robust_sqrt(a)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "895209ce",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "fragment"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "a  # what is the output?   \n",
+    "# b"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d664b462",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "# Copies\n",
+    "\n",
+    "- Operations that cannot be executed by changing the metadata create a new memory block, and return a **copy**\n",
+    "\n",
+    "- How to find out view or copy?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "716aec53",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "source": [
+    "Choosing row, columns, or individual elements of an array by giving explicitly their indices (a.k.a \"fancy indexing\") it's an operation that in general cannot be executed by changing the metadata alone.\n",
+    "\n",
+    "Therefore, **fancy indexing always returns a copy**."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fbcf3100",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x = np.arange(12).reshape(3, 4).copy()\n",
+    "print_info(x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6c50e46e",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "#print(x)\n",
+    "z = x[[0, 0, 2], [1, 0, 3]]\n",
+    "# Can you guess what's z equal to?\n",
+    "\n",
+    "print_info(z)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9d65a5c3",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "z += 1000\n",
+    "print_info(z)\n",
+    "\n",
+    "# the original array is unchanged => not a view!\n",
+    "print_info(x)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "25aa99a4",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "subslide"
+    }
+   },
+   "source": [
+    "**Views** are created, when you use other strides to read your data. Slicing and regular indexing allows that, as you know how many byte steps you need to take to get the data.\n",
+    "\n",
+    "**Fancy indexing** does not allow that, because the data you are asking **cannot** be obtained by just changing the strides. Thus, numpy needs to create a **copy** of it in memory."
+   ]
+  }
+ ],
+ "metadata": {
+  "celltoolbar": "Slideshow",
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/020_numpy/if_time_002_broadcasting.ipynb
+++ b/notebooks/020_numpy/if_time_002_broadcasting.ipynb
--- a/notebooks/020_numpy/if_time_003_vectorization.ipynb
+++ b/notebooks/020_numpy/if_time_003_vectorization.ipynb
--- a/notebooks/020_numpy/images/Notcompatible.png
+++ b/notebooks/020_numpy/images/Notcompatible.png
--- a/notebooks/020_numpy/images/broadcast_1D.png
+++ b/notebooks/020_numpy/images/broadcast_1D.png
--- a/notebooks/020_numpy/images/fancy_indexing_lookup.png
+++ b/notebooks/020_numpy/images/fancy_indexing_lookup.png
--- a/notebooks/020_numpy/images/newaxis.png
+++ b/notebooks/020_numpy/images/newaxis.png
--- a/notebooks/020_numpy/images/strides.png
+++ b/notebooks/020_numpy/images/strides.png
--- a/notebooks/030_tabular_data/.ipynb_checkpoints/010_pandas_introduction-checkpoint.ipynb
+++ b/notebooks/030_tabular_data/.ipynb_checkpoints/010_pandas_introduction-checkpoint.ipynb
--- a/notebooks/030_tabular_data/.ipynb_checkpoints/011_pandas_introduction_tutor-checkpoint.ipynb
+++ b/notebooks/030_tabular_data/.ipynb_checkpoints/011_pandas_introduction_tutor-checkpoint.ipynb
@ -0,0 +1,316 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "8cc1c960",
+   "metadata": {},
+   "source": [
+    "# Pandas, quick introduction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0f55dab1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b377c42",
+   "metadata": {},
+   "source": [
+    "# Pandas introduces a tabular data structure, the DataFrame\n",
+    "\n",
+    "* Columns can be of any C-native type\n",
+    "* Columns and rows have indices, i.e. labels that identify each column or row"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ec75edbe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(\n",
+    "    data = [\n",
+    "        ['Anthony', 28, 1.53], \n",
+    "        ['Maria', 31, 1.76], \n",
+    "        ['Emma', 26, 1.83], \n",
+    "        ['Philip', 41, 1.81], \n",
+    "        ['Bill', 27, None],\n",
+    "    ],\n",
+    "    columns = ['name', 'age', 'height'],\n",
+    "    index=['A484', 'C012', 'A123', 'B663', 'A377'],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "37318480",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fe1c5739",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dedad6f3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e31f21c6",
+   "metadata": {},
+   "source": [
+    "## DataFrame attributes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4109f1eb",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "708f9bb5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb2f33b9",
+   "metadata": {},
+   "source": [
+    "## Indexing rows and columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19ef2738",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f354ffc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "94563f03",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "43ab5233",
+   "metadata": {},
+   "source": [
+    "## Examining a column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2cb544c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "86388f86",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc081b90",
+   "metadata": {},
+   "source": [
+    "# Filtering"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "263ae06c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "318da062",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a570023a",
+   "metadata": {},
+   "source": [
+    "# Basic operations are by column (unlike NumPy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7260d212",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "49b7057a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f5a0f053",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e1ffe32",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7cf9b5d7",
+   "metadata": {},
+   "source": [
+    "# Operations on strings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b78bc237",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0236069f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5761725b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce3d54ad",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8c5584db",
+   "metadata": {},
+   "source": [
+    "# Adding new columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f6e09176",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f9a552f0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e354ace",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/030_tabular_data/.ipynb_checkpoints/020_join_operations-checkpoint.ipynb
+++ b/notebooks/030_tabular_data/.ipynb_checkpoints/020_join_operations-checkpoint.ipynb
--- a/notebooks/030_tabular_data/.ipynb_checkpoints/021_join_operations_tutor-checkpoint.ipynb
+++ b/notebooks/030_tabular_data/.ipynb_checkpoints/021_join_operations_tutor-checkpoint.ipynb
@ -0,0 +1,462 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "37957eb0",
+   "metadata": {},
+   "source": [
+    "# Combine information across tables: joins and anti-joins"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "b6f949f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6a7fcf90",
+   "metadata": {},
+   "source": [
+    "# \"Load\" some experimental data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a9450803",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>condition_id</th>\n",
+       "      <th>response_time</th>\n",
+       "      <th>response</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.12</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.37</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>312</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.68</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>711</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>4.01</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>711</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.44</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>313</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.07</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>313</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.08</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>712</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>3.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>314</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>714</td>\n",
+       "      <td>B2</td>\n",
+       "      <td>3.32</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>314</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.14</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>314</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.73</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>713</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>5.74</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   subject_id condition_id  response_time response\n",
+       "0         312           A1           0.12     LEFT\n",
+       "1         312           A2           0.37     LEFT\n",
+       "2         312           C2           0.68     LEFT\n",
+       "3         711           A1           4.01    RIGHT\n",
+       "4         711           A2           0.44     LEFT\n",
+       "5         313           A1           0.07    RIGHT\n",
+       "6         313           B1           0.08    RIGHT\n",
+       "7         712           A2           3.29     LEFT\n",
+       "8         314           A2           0.29     LEFT\n",
+       "9         714           B2           3.32    RIGHT\n",
+       "10        314           B1           0.14    RIGHT\n",
+       "11        314           C2           0.73    RIGHT\n",
+       "12        713           B1           5.74     LEFT"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.DataFrame(\n",
+    "    data=[\n",
+    "        ['312', 'A1', 0.12, 'LEFT'],\n",
+    "        ['312', 'A2', 0.37, 'LEFT'],\n",
+    "        ['312', 'C2', 0.68, 'LEFT'],\n",
+    "        ['711', 'A1', 4.01, 'RIGHT'],\n",
+    "        ['711', 'A2', 0.44, 'LEFT'],\n",
+    "        ['313', 'A1', 0.07, 'RIGHT'],\n",
+    "        ['313', 'B1', 0.08, 'RIGHT'],\n",
+    "        ['712', 'A2', 3.29, 'LEFT'],\n",
+    "        ['314', 'A2', 0.29, 'LEFT'],\n",
+    "        ['714', 'B2', 3.32, 'RIGHT'],\n",
+    "        ['314', 'B1', 0.14, 'RIGHT'],\n",
+    "        ['314', 'C2', 0.73, 'RIGHT'],\n",
+    "        ['713', 'B1', 5.74, 'LEFT'],\n",
+    "    ],\n",
+    "    columns=['subject_id', 'condition_id', 'response_time', 'response'],\n",
+    ")\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9f6de0d6",
+   "metadata": {},
+   "source": [
+    "Each experiment belongs to one experimental condition, but the parameters of each condition are not in the table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "455471d7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "condition_to_orientation = {\n",
+    "    'A1': 0,\n",
+    "    'A2': 0,\n",
+    "    'B1': 45,\n",
+    "    'B2': 45,\n",
+    "    'C1': 90,\n",
+    "}\n",
+    "\n",
+    "condition_to_duration = {\n",
+    "    'A1': 0.1,\n",
+    "    'A2': 0.01,\n",
+    "    'B1': 0.1,\n",
+    "    'B2': 0.01,\n",
+    "    'C1': 0.2,\n",
+    "}\n",
+    "\n",
+    "condition_to_surround = {\n",
+    "    'A1': 'FULL',\n",
+    "    'A2': 'NONE',\n",
+    "    'B1': 'NONE',\n",
+    "    'B2': 'FULL',\n",
+    "    'C1': 'FULL',\n",
+    "}\n",
+    "\n",
+    "\n",
+    "condition_to_stimulus_type = {\n",
+    "    'A1': 'LINES',\n",
+    "    'A2': 'DOTS',\n",
+    "    'B1': 'PLAID',\n",
+    "    'B2': 'PLAID',\n",
+    "    'C1': 'WIGGLES',\n",
+    "}\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5ccfd7e7",
+   "metadata": {},
+   "source": [
+    "# Manually adding the condition parameters to the table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "id": "cc32110c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_with_properties = data.copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "06263dc6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b96962b2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d6e71b13",
+   "metadata": {},
+   "source": [
+    "# Using a join operation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d9835d7c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>orientation</th>\n",
+       "      <th>duration</th>\n",
+       "      <th>surround</th>\n",
+       "      <th>stimulus_type</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>A1</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>FULL</td>\n",
+       "      <td>LINES</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>A2</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>NONE</td>\n",
+       "      <td>DOTS</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B1</th>\n",
+       "      <td>45</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>NONE</td>\n",
+       "      <td>PLAID</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B2</th>\n",
+       "      <td>45</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>FULL</td>\n",
+       "      <td>PLAID</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C1</th>\n",
+       "      <td>90</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>FULL</td>\n",
+       "      <td>WIGGLES</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   orientation duration surround stimulus_type\n",
+       "A1           0      0.1     FULL         LINES\n",
+       "A2           0     0.01     NONE          DOTS\n",
+       "B1          45      0.1     NONE         PLAID\n",
+       "B2          45     0.01     FULL         PLAID\n",
+       "C1          90      0.2     FULL       WIGGLES"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Often, this is done using a spreadsheet\n",
+    "condition_properties = pd.DataFrame(\n",
+    "    [condition_to_orientation, condition_to_duration, condition_to_surround, condition_to_stimulus_type],\n",
+    "    index=['orientation', 'duration', 'surround', 'stimulus_type'],\n",
+    ").T\n",
+    "condition_properties"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c27ea9f3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e563cd0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cba9534f",
+   "metadata": {},
+   "source": [
+    "# Anti-join: filter out unwanted data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "1cb2bbdb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We are given a list of subjects that are outliers and should be disregarded in the analysis\n",
+    "outliers = pd.DataFrame([['711'], ['712'], ['713'], ['714'], ['888']], columns=['subject_id'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0e2c3c5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "90d92640",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/030_tabular_data/.ipynb_checkpoints/030_split-apply-combine-checkpoint.ipynb
+++ b/notebooks/030_tabular_data/.ipynb_checkpoints/030_split-apply-combine-checkpoint.ipynb
@ -0,0 +1,814 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "247bbf84",
+   "metadata": {},
+   "source": [
+    "# Split-apply-combine operations for tabular data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "44584190",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ba193f3f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>condition_id</th>\n",
+       "      <th>response_time</th>\n",
+       "      <th>response</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.12</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.37</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>312</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.68</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>313</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.07</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>313</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.08</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>314</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>314</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.14</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>314</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.73</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>711</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>4.01</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>712</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>3.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>713</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>5.74</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>714</td>\n",
+       "      <td>B2</td>\n",
+       "      <td>3.32</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   subject_id condition_id  response_time response\n",
+       "0         312           A1           0.12     LEFT\n",
+       "1         312           A2           0.37     LEFT\n",
+       "2         312           C2           0.68     LEFT\n",
+       "3         313           A1           0.07    RIGHT\n",
+       "4         313           B1           0.08    RIGHT\n",
+       "5         314           A2           0.29     LEFT\n",
+       "6         314           B1           0.14    RIGHT\n",
+       "7         314           C2           0.73    RIGHT\n",
+       "8         711           A1           4.01    RIGHT\n",
+       "9         712           A2           3.29     LEFT\n",
+       "10        713           B1           5.74     LEFT\n",
+       "11        714           B2           3.32    RIGHT"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.DataFrame(\n",
+    "    data=[\n",
+    "        ['312', 'A1', 0.12, 'LEFT'],\n",
+    "        ['312', 'A2', 0.37, 'LEFT'],\n",
+    "        ['312', 'C2', 0.68, 'LEFT'],\n",
+    "        ['313', 'A1', 0.07, 'RIGHT'],\n",
+    "        ['313', 'B1', 0.08, 'RIGHT'],\n",
+    "        ['314', 'A2', 0.29, 'LEFT'],\n",
+    "        ['314', 'B1', 0.14, 'RIGHT'],\n",
+    "        ['314', 'C2', 0.73, 'RIGHT'],\n",
+    "        ['711', 'A1', 4.01, 'RIGHT'],\n",
+    "        ['712', 'A2', 3.29, 'LEFT'],\n",
+    "        ['713', 'B1', 5.74, 'LEFT'],\n",
+    "        ['714', 'B2', 3.32, 'RIGHT'],\n",
+    "    ],\n",
+    "    columns=['subject_id', 'condition_id', 'response_time', 'response'],\n",
+    ")\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a239e0c",
+   "metadata": {},
+   "source": [
+    "# Group-by"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "31eba91e",
+   "metadata": {},
+   "source": [
+    "We want to compute the mean response time by condition.\n",
+    "\n",
+    "Let's start by doing it by hand, using for loops!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "e8331039",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conditions = data['condition_id'].unique()\n",
+    "results_dict = {}\n",
+    "for condition in conditions:\n",
+    "    group = data[data['condition_id'] == condition]\n",
+    "    results_dict[condition] = group['response_time'].mean()\n",
+    "\n",
+    "results = pd.DataFrame([results_dict], index=['response_time']).T"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "09cb04c4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>response_time</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>A1</th>\n",
+       "      <td>1.400000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>A2</th>\n",
+       "      <td>1.316667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C2</th>\n",
+       "      <td>0.705000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B1</th>\n",
+       "      <td>1.986667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B2</th>\n",
+       "      <td>3.320000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    response_time\n",
+       "A1       1.400000\n",
+       "A2       1.316667\n",
+       "C2       0.705000\n",
+       "B1       1.986667\n",
+       "B2       3.320000"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2bc09c66",
+   "metadata": {},
+   "source": [
+    "This is a basic operation, and we would need to repeat his pattern a million times!\n",
+    "\n",
+    "Pandas and all other tools for tabular data provide a command for performing operations on groups."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "0500cd4a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x14ff67a90>"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# df.groupby(column_name) groups a DataFrame by the values in the column\n",
+    "data.groupby('condition_id')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "c5857c4e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "condition_id\n",
+       "A1    3\n",
+       "A2    3\n",
+       "B1    3\n",
+       "B2    1\n",
+       "C2    2\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# The group-by object can by used as a DataFrame. \n",
+    "# Operations are executed on each group individually, then aggregated\n",
+    "data.groupby('condition_id').size()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "5c865cc1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "condition_id\n",
+       "A1    1.400000\n",
+       "A2    1.316667\n",
+       "B1    1.986667\n",
+       "B2    3.320000\n",
+       "C2    0.705000\n",
+       "Name: response_time, dtype: float64"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.groupby('condition_id')['response_time'].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "615a4515",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "condition_id\n",
+       "A1    4.01\n",
+       "A2    3.29\n",
+       "B1    5.74\n",
+       "B2    3.32\n",
+       "C2    0.73\n",
+       "Name: response_time, dtype: float64"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.groupby('condition_id')['response_time'].max()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b0441458",
+   "metadata": {},
+   "source": [
+    "# Pivot tables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3feec98d",
+   "metadata": {},
+   "source": [
+    "We want to look at response time biases when the subjects respond LEFT vs RIGHT. In principle, we expect them to have the same response time in both cases.\n",
+    "\n",
+    "We compute a summary table with 1) condition_id on the rows; 2) response on the columns; 3) the average response time for all experiments with a that condition and response\n",
+    "\n",
+    "We can do it with `groupby`, with some table manipulation commands."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "4a8a7d0d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "condition_id  response\n",
+       "A1            LEFT        0.120000\n",
+       "              RIGHT       2.040000\n",
+       "A2            LEFT        1.316667\n",
+       "B1            LEFT        5.740000\n",
+       "              RIGHT       0.110000\n",
+       "B2            RIGHT       3.320000\n",
+       "C2            LEFT        0.680000\n",
+       "              RIGHT       0.730000\n",
+       "Name: response_time, dtype: float64"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "summary = data.groupby(['condition_id', 'response'])['response_time'].mean()\n",
+    "summary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "e5a645e0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>response</th>\n",
+       "      <th>LEFT</th>\n",
+       "      <th>RIGHT</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>condition_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>A1</th>\n",
+       "      <td>0.120000</td>\n",
+       "      <td>2.04</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>A2</th>\n",
+       "      <td>1.316667</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B1</th>\n",
+       "      <td>5.740000</td>\n",
+       "      <td>0.11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B2</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.32</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C2</th>\n",
+       "      <td>0.680000</td>\n",
+       "      <td>0.73</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "response          LEFT  RIGHT\n",
+       "condition_id                 \n",
+       "A1            0.120000   2.04\n",
+       "A2            1.316667    NaN\n",
+       "B1            5.740000   0.11\n",
+       "B2                 NaN   3.32\n",
+       "C2            0.680000   0.73"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "summary.unstack(level=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3307fcc6",
+   "metadata": {},
+   "source": [
+    "Pandas has a command called `pivot_table` that can be used to perform this kind of operation straightforwardly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "8941edfe",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>response</th>\n",
+       "      <th>LEFT</th>\n",
+       "      <th>RIGHT</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>condition_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>A1</th>\n",
+       "      <td>0.120000</td>\n",
+       "      <td>2.04</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>A2</th>\n",
+       "      <td>1.316667</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B1</th>\n",
+       "      <td>5.740000</td>\n",
+       "      <td>0.11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B2</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.32</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C2</th>\n",
+       "      <td>0.680000</td>\n",
+       "      <td>0.73</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "response          LEFT  RIGHT\n",
+       "condition_id                 \n",
+       "A1            0.120000   2.04\n",
+       "A2            1.316667    NaN\n",
+       "B1            5.740000   0.11\n",
+       "B2                 NaN   3.32\n",
+       "C2            0.680000   0.73"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.pivot_table(index='condition_id', columns='response', values='response_time', aggfunc='mean')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "a7d1d998",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"2\" halign=\"left\">mean</th>\n",
+       "      <th colspan=\"2\" halign=\"left\">std</th>\n",
+       "      <th colspan=\"2\" halign=\"left\">count</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>response</th>\n",
+       "      <th>LEFT</th>\n",
+       "      <th>RIGHT</th>\n",
+       "      <th>LEFT</th>\n",
+       "      <th>RIGHT</th>\n",
+       "      <th>LEFT</th>\n",
+       "      <th>RIGHT</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>condition_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>A1</th>\n",
+       "      <td>0.120000</td>\n",
+       "      <td>2.04</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2.786001</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>A2</th>\n",
+       "      <td>1.316667</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.709425</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B1</th>\n",
+       "      <td>5.740000</td>\n",
+       "      <td>0.11</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.042426</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B2</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.32</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C2</th>\n",
+       "      <td>0.680000</td>\n",
+       "      <td>0.73</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  mean             std           count      \n",
+       "response          LEFT RIGHT      LEFT     RIGHT  LEFT RIGHT\n",
+       "condition_id                                                \n",
+       "A1            0.120000  2.04       NaN  2.786001   1.0   2.0\n",
+       "A2            1.316667   NaN  1.709425       NaN   3.0   NaN\n",
+       "B1            5.740000  0.11       NaN  0.042426   1.0   2.0\n",
+       "B2                 NaN  3.32       NaN       NaN   NaN   1.0\n",
+       "C2            0.680000  0.73       NaN       NaN   1.0   1.0"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(\n",
+    "    data\n",
+    "    .pivot_table(\n",
+    "        index='condition_id', \n",
+    "        columns='response', \n",
+    "        values='response_time', \n",
+    "        aggfunc=['mean', 'std', 'count'],\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a770b812",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0234ccf2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c77c2dc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/030_tabular_data/.ipynb_checkpoints/031_split-apply-combine_tutor-checkpoint.ipynb
+++ b/notebooks/030_tabular_data/.ipynb_checkpoints/031_split-apply-combine_tutor-checkpoint.ipynb
@ -0,0 +1,335 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "247bbf84",
+   "metadata": {},
+   "source": [
+    "# Split-apply-combine operations for tabular data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "44584190",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ba193f3f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>condition_id</th>\n",
+       "      <th>response_time</th>\n",
+       "      <th>response</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.12</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.37</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>312</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.68</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>313</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.07</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>313</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.08</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>314</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>314</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.14</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>314</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.73</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>711</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>4.01</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>712</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>3.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>713</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>5.74</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>714</td>\n",
+       "      <td>B2</td>\n",
+       "      <td>3.32</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   subject_id condition_id  response_time response\n",
+       "0         312           A1           0.12     LEFT\n",
+       "1         312           A2           0.37     LEFT\n",
+       "2         312           C2           0.68     LEFT\n",
+       "3         313           A1           0.07    RIGHT\n",
+       "4         313           B1           0.08    RIGHT\n",
+       "5         314           A2           0.29     LEFT\n",
+       "6         314           B1           0.14    RIGHT\n",
+       "7         314           C2           0.73    RIGHT\n",
+       "8         711           A1           4.01    RIGHT\n",
+       "9         712           A2           3.29     LEFT\n",
+       "10        713           B1           5.74     LEFT\n",
+       "11        714           B2           3.32    RIGHT"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.DataFrame(\n",
+    "    data=[\n",
+    "        ['312', 'A1', 0.12, 'LEFT'],\n",
+    "        ['312', 'A2', 0.37, 'LEFT'],\n",
+    "        ['312', 'C2', 0.68, 'LEFT'],\n",
+    "        ['313', 'A1', 0.07, 'RIGHT'],\n",
+    "        ['313', 'B1', 0.08, 'RIGHT'],\n",
+    "        ['314', 'A2', 0.29, 'LEFT'],\n",
+    "        ['314', 'B1', 0.14, 'RIGHT'],\n",
+    "        ['314', 'C2', 0.73, 'RIGHT'],\n",
+    "        ['711', 'A1', 4.01, 'RIGHT'],\n",
+    "        ['712', 'A2', 3.29, 'LEFT'],\n",
+    "        ['713', 'B1', 5.74, 'LEFT'],\n",
+    "        ['714', 'B2', 3.32, 'RIGHT'],\n",
+    "    ],\n",
+    "    columns=['subject_id', 'condition_id', 'response_time', 'response'],\n",
+    ")\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a239e0c",
+   "metadata": {},
+   "source": [
+    "# Group-by"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "31eba91e",
+   "metadata": {},
+   "source": [
+    "We want to compute the mean response time by condition.\n",
+    "\n",
+    "Let's start by doing it by hand, using for loops!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ff3f890b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "805d04c7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2bc09c66",
+   "metadata": {},
+   "source": [
+    "This is a basic operation, and we would need to repeat his pattern a million times!\n",
+    "\n",
+    "Pandas and all other tools for tabular data provide a command for performing operations on groups."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dcc8c9c7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "818b8346",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b0441458",
+   "metadata": {},
+   "source": [
+    "# Pivot tables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3feec98d",
+   "metadata": {},
+   "source": [
+    "We want to look at response time biases when the subjects respond LEFT vs RIGHT. In principle, we expect them to have the same response time in both cases.\n",
+    "\n",
+    "We compute a summary table with 1) condition_id on the rows; 2) response on the columns; 3) the average response time for all experiments with a that condition and response\n",
+    "\n",
+    "We can do it with `groupby`, with some table manipulation commands."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "04f6ff60",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62600721",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3307fcc6",
+   "metadata": {},
+   "source": [
+    "Pandas has a command called `pivot_table` that can be used to perform this kind of operation straightforwardly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a770b812",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0234ccf2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c77c2dc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/030_tabular_data/.ipynb_checkpoints/040_window_functions-checkpoint.ipynb
+++ b/notebooks/030_tabular_data/.ipynb_checkpoints/040_window_functions-checkpoint.ipynb
--- a/notebooks/030_tabular_data/.ipynb_checkpoints/041_window_functions_tutor-checkpoint.ipynb
+++ b/notebooks/030_tabular_data/.ipynb_checkpoints/041_window_functions_tutor-checkpoint.ipynb
@ -0,0 +1,320 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "247bbf84",
+   "metadata": {},
+   "source": [
+    "# Window functions for tabular data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "44584190",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "83bbd275",
+   "metadata": {},
+   "source": [
+    "# Load experimental data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "88b9e189",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('timed_responses.csv', index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "987a3518",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>time (ms)</th>\n",
+       "      <th>response</th>\n",
+       "      <th>accuracy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>574</th>\n",
+       "      <td>3</td>\n",
+       "      <td>540</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.04</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1190</th>\n",
+       "      <td>2</td>\n",
+       "      <td>552</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.43</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1895</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1036</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>53</th>\n",
+       "      <td>3</td>\n",
+       "      <td>257</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>158</th>\n",
+       "      <td>2</td>\n",
+       "      <td>743</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.32</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>551</th>\n",
+       "      <td>3</td>\n",
+       "      <td>619</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1602</th>\n",
+       "      <td>1</td>\n",
+       "      <td>43</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.65</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>413</th>\n",
+       "      <td>1</td>\n",
+       "      <td>471</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.80</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>785</th>\n",
+       "      <td>1</td>\n",
+       "      <td>121</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1393</th>\n",
+       "      <td>2</td>\n",
+       "      <td>903</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.33</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>629</th>\n",
+       "      <td>2</td>\n",
+       "      <td>353</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1829</th>\n",
+       "      <td>3</td>\n",
+       "      <td>768</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.26</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>902</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1093</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.34</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1486</th>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.29</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      subject_id  time (ms) response  accuracy\n",
+       "574            3        540    RIGHT      0.04\n",
+       "1190           2        552     LEFT      0.43\n",
+       "1895           2       1036     LEFT      0.36\n",
+       "53             3        257    RIGHT      0.11\n",
+       "158            2        743    RIGHT      0.32\n",
+       "551            3        619     LEFT      0.25\n",
+       "1602           1         43    RIGHT      0.65\n",
+       "413            1        471     LEFT      0.80\n",
+       "785            1        121     LEFT      0.10\n",
+       "1393           2        903    RIGHT      0.33\n",
+       "629            2        353     LEFT      0.17\n",
+       "1829           3        768    RIGHT      0.26\n",
+       "902            1       1093     LEFT      0.34\n",
+       "1486           2          3    RIGHT      0.29"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c41cd93",
+   "metadata": {},
+   "source": [
+    "# Split-apply-combine operations return one aggregated value per group"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0234ccf2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.groupby('subject_id')['accuracy'].max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2b2a1796",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2bb99152",
+   "metadata": {},
+   "source": [
+    "# However, for some calculations we need to have a value per row\n",
+    "\n",
+    "For example: for each subject, rank the responses by decreasing accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3aed0755",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "17f3d40f",
+   "metadata": {},
+   "source": [
+    "# In many cases, a window functions is combined with a sorting operation\n",
+    "\n",
+    "For example: for each subject, count the number of \"LEFT\" responses up until any moment in the experiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67efdd56",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a00b4f39",
+   "metadata": {},
+   "source": [
+    "# Window functions are also useful to compute changes in the data for each group\n",
+    "\n",
+    "In this case, the window function often uses the `shift(n)` method that lags the data by `n` rows"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e553c17f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2973e3d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c9ca46b0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/030_tabular_data/010_pandas_introduction.ipynb
+++ b/notebooks/030_tabular_data/010_pandas_introduction.ipynb
--- a/notebooks/030_tabular_data/011_pandas_introduction_tutor.ipynb
+++ b/notebooks/030_tabular_data/011_pandas_introduction_tutor.ipynb
@ -0,0 +1,316 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "8cc1c960",
+   "metadata": {},
+   "source": [
+    "# Pandas, quick introduction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "0f55dab1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b377c42",
+   "metadata": {},
+   "source": [
+    "# Pandas introduces a tabular data structure, the DataFrame\n",
+    "\n",
+    "* Columns can be of any C-native type\n",
+    "* Columns and rows have indices, i.e. labels that identify each column or row"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ec75edbe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.DataFrame(\n",
+    "    data = [\n",
+    "        ['Anthony', 28, 1.53], \n",
+    "        ['Maria', 31, 1.76], \n",
+    "        ['Emma', 26, 1.83], \n",
+    "        ['Philip', 41, 1.81], \n",
+    "        ['Bill', 27, None],\n",
+    "    ],\n",
+    "    columns = ['name', 'age', 'height'],\n",
+    "    index=['A484', 'C012', 'A123', 'B663', 'A377'],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "37318480",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fe1c5739",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dedad6f3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e31f21c6",
+   "metadata": {},
+   "source": [
+    "## DataFrame attributes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4109f1eb",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "708f9bb5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb2f33b9",
+   "metadata": {},
+   "source": [
+    "## Indexing rows and columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19ef2738",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8f354ffc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "94563f03",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "43ab5233",
+   "metadata": {},
+   "source": [
+    "## Examining a column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2cb544c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "86388f86",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "fc081b90",
+   "metadata": {},
+   "source": [
+    "# Filtering"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "263ae06c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "318da062",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a570023a",
+   "metadata": {},
+   "source": [
+    "# Basic operations are by column (unlike NumPy)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7260d212",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "49b7057a",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f5a0f053",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e1ffe32",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "7cf9b5d7",
+   "metadata": {},
+   "source": [
+    "# Operations on strings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b78bc237",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0236069f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5761725b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce3d54ad",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8c5584db",
+   "metadata": {},
+   "source": [
+    "# Adding new columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f6e09176",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f9a552f0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e354ace",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/030_tabular_data/020_join_operations.ipynb
+++ b/notebooks/030_tabular_data/020_join_operations.ipynb
--- a/notebooks/030_tabular_data/021_join_operations_tutor.ipynb
+++ b/notebooks/030_tabular_data/021_join_operations_tutor.ipynb
@ -0,0 +1,462 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "37957eb0",
+   "metadata": {},
+   "source": [
+    "# Combine information across tables: joins and anti-joins"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "b6f949f7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6a7fcf90",
+   "metadata": {},
+   "source": [
+    "# \"Load\" some experimental data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a9450803",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>condition_id</th>\n",
+       "      <th>response_time</th>\n",
+       "      <th>response</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.12</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.37</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>312</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.68</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>711</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>4.01</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>711</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.44</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>313</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.07</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>313</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.08</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>712</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>3.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>314</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>714</td>\n",
+       "      <td>B2</td>\n",
+       "      <td>3.32</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>314</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.14</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>314</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.73</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>713</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>5.74</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   subject_id condition_id  response_time response\n",
+       "0         312           A1           0.12     LEFT\n",
+       "1         312           A2           0.37     LEFT\n",
+       "2         312           C2           0.68     LEFT\n",
+       "3         711           A1           4.01    RIGHT\n",
+       "4         711           A2           0.44     LEFT\n",
+       "5         313           A1           0.07    RIGHT\n",
+       "6         313           B1           0.08    RIGHT\n",
+       "7         712           A2           3.29     LEFT\n",
+       "8         314           A2           0.29     LEFT\n",
+       "9         714           B2           3.32    RIGHT\n",
+       "10        314           B1           0.14    RIGHT\n",
+       "11        314           C2           0.73    RIGHT\n",
+       "12        713           B1           5.74     LEFT"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.DataFrame(\n",
+    "    data=[\n",
+    "        ['312', 'A1', 0.12, 'LEFT'],\n",
+    "        ['312', 'A2', 0.37, 'LEFT'],\n",
+    "        ['312', 'C2', 0.68, 'LEFT'],\n",
+    "        ['711', 'A1', 4.01, 'RIGHT'],\n",
+    "        ['711', 'A2', 0.44, 'LEFT'],\n",
+    "        ['313', 'A1', 0.07, 'RIGHT'],\n",
+    "        ['313', 'B1', 0.08, 'RIGHT'],\n",
+    "        ['712', 'A2', 3.29, 'LEFT'],\n",
+    "        ['314', 'A2', 0.29, 'LEFT'],\n",
+    "        ['714', 'B2', 3.32, 'RIGHT'],\n",
+    "        ['314', 'B1', 0.14, 'RIGHT'],\n",
+    "        ['314', 'C2', 0.73, 'RIGHT'],\n",
+    "        ['713', 'B1', 5.74, 'LEFT'],\n",
+    "    ],\n",
+    "    columns=['subject_id', 'condition_id', 'response_time', 'response'],\n",
+    ")\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9f6de0d6",
+   "metadata": {},
+   "source": [
+    "Each experiment belongs to one experimental condition, but the parameters of each condition are not in the table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "455471d7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "condition_to_orientation = {\n",
+    "    'A1': 0,\n",
+    "    'A2': 0,\n",
+    "    'B1': 45,\n",
+    "    'B2': 45,\n",
+    "    'C1': 90,\n",
+    "}\n",
+    "\n",
+    "condition_to_duration = {\n",
+    "    'A1': 0.1,\n",
+    "    'A2': 0.01,\n",
+    "    'B1': 0.1,\n",
+    "    'B2': 0.01,\n",
+    "    'C1': 0.2,\n",
+    "}\n",
+    "\n",
+    "condition_to_surround = {\n",
+    "    'A1': 'FULL',\n",
+    "    'A2': 'NONE',\n",
+    "    'B1': 'NONE',\n",
+    "    'B2': 'FULL',\n",
+    "    'C1': 'FULL',\n",
+    "}\n",
+    "\n",
+    "\n",
+    "condition_to_stimulus_type = {\n",
+    "    'A1': 'LINES',\n",
+    "    'A2': 'DOTS',\n",
+    "    'B1': 'PLAID',\n",
+    "    'B2': 'PLAID',\n",
+    "    'C1': 'WIGGLES',\n",
+    "}\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5ccfd7e7",
+   "metadata": {},
+   "source": [
+    "# Manually adding the condition parameters to the table"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 73,
+   "id": "cc32110c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_with_properties = data.copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "06263dc6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b96962b2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d6e71b13",
+   "metadata": {},
+   "source": [
+    "# Using a join operation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "d9835d7c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>orientation</th>\n",
+       "      <th>duration</th>\n",
+       "      <th>surround</th>\n",
+       "      <th>stimulus_type</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>A1</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>FULL</td>\n",
+       "      <td>LINES</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>A2</th>\n",
+       "      <td>0</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>NONE</td>\n",
+       "      <td>DOTS</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B1</th>\n",
+       "      <td>45</td>\n",
+       "      <td>0.1</td>\n",
+       "      <td>NONE</td>\n",
+       "      <td>PLAID</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B2</th>\n",
+       "      <td>45</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>FULL</td>\n",
+       "      <td>PLAID</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C1</th>\n",
+       "      <td>90</td>\n",
+       "      <td>0.2</td>\n",
+       "      <td>FULL</td>\n",
+       "      <td>WIGGLES</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   orientation duration surround stimulus_type\n",
+       "A1           0      0.1     FULL         LINES\n",
+       "A2           0     0.01     NONE          DOTS\n",
+       "B1          45      0.1     NONE         PLAID\n",
+       "B2          45     0.01     FULL         PLAID\n",
+       "C1          90      0.2     FULL       WIGGLES"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Often, this is done using a spreadsheet\n",
+    "condition_properties = pd.DataFrame(\n",
+    "    [condition_to_orientation, condition_to_duration, condition_to_surround, condition_to_stimulus_type],\n",
+    "    index=['orientation', 'duration', 'surround', 'stimulus_type'],\n",
+    ").T\n",
+    "condition_properties"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c27ea9f3",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5e563cd0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cba9534f",
+   "metadata": {},
+   "source": [
+    "# Anti-join: filter out unwanted data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "1cb2bbdb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# We are given a list of subjects that are outliers and should be disregarded in the analysis\n",
+    "outliers = pd.DataFrame([['711'], ['712'], ['713'], ['714'], ['888']], columns=['subject_id'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0e2c3c5",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "90d92640",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/030_tabular_data/030_split-apply-combine.ipynb
+++ b/notebooks/030_tabular_data/030_split-apply-combine.ipynb
@ -0,0 +1,814 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "247bbf84",
+   "metadata": {},
+   "source": [
+    "# Split-apply-combine operations for tabular data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "44584190",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ba193f3f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>condition_id</th>\n",
+       "      <th>response_time</th>\n",
+       "      <th>response</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.12</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.37</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>312</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.68</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>313</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.07</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>313</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.08</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>314</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>314</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.14</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>314</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.73</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>711</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>4.01</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>712</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>3.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>713</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>5.74</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>714</td>\n",
+       "      <td>B2</td>\n",
+       "      <td>3.32</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   subject_id condition_id  response_time response\n",
+       "0         312           A1           0.12     LEFT\n",
+       "1         312           A2           0.37     LEFT\n",
+       "2         312           C2           0.68     LEFT\n",
+       "3         313           A1           0.07    RIGHT\n",
+       "4         313           B1           0.08    RIGHT\n",
+       "5         314           A2           0.29     LEFT\n",
+       "6         314           B1           0.14    RIGHT\n",
+       "7         314           C2           0.73    RIGHT\n",
+       "8         711           A1           4.01    RIGHT\n",
+       "9         712           A2           3.29     LEFT\n",
+       "10        713           B1           5.74     LEFT\n",
+       "11        714           B2           3.32    RIGHT"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.DataFrame(\n",
+    "    data=[\n",
+    "        ['312', 'A1', 0.12, 'LEFT'],\n",
+    "        ['312', 'A2', 0.37, 'LEFT'],\n",
+    "        ['312', 'C2', 0.68, 'LEFT'],\n",
+    "        ['313', 'A1', 0.07, 'RIGHT'],\n",
+    "        ['313', 'B1', 0.08, 'RIGHT'],\n",
+    "        ['314', 'A2', 0.29, 'LEFT'],\n",
+    "        ['314', 'B1', 0.14, 'RIGHT'],\n",
+    "        ['314', 'C2', 0.73, 'RIGHT'],\n",
+    "        ['711', 'A1', 4.01, 'RIGHT'],\n",
+    "        ['712', 'A2', 3.29, 'LEFT'],\n",
+    "        ['713', 'B1', 5.74, 'LEFT'],\n",
+    "        ['714', 'B2', 3.32, 'RIGHT'],\n",
+    "    ],\n",
+    "    columns=['subject_id', 'condition_id', 'response_time', 'response'],\n",
+    ")\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a239e0c",
+   "metadata": {},
+   "source": [
+    "# Group-by"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "31eba91e",
+   "metadata": {},
+   "source": [
+    "We want to compute the mean response time by condition.\n",
+    "\n",
+    "Let's start by doing it by hand, using for loops!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "e8331039",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conditions = data['condition_id'].unique()\n",
+    "results_dict = {}\n",
+    "for condition in conditions:\n",
+    "    group = data[data['condition_id'] == condition]\n",
+    "    results_dict[condition] = group['response_time'].mean()\n",
+    "\n",
+    "results = pd.DataFrame([results_dict], index=['response_time']).T"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "09cb04c4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>response_time</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>A1</th>\n",
+       "      <td>1.400000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>A2</th>\n",
+       "      <td>1.316667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C2</th>\n",
+       "      <td>0.705000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B1</th>\n",
+       "      <td>1.986667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B2</th>\n",
+       "      <td>3.320000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    response_time\n",
+       "A1       1.400000\n",
+       "A2       1.316667\n",
+       "C2       0.705000\n",
+       "B1       1.986667\n",
+       "B2       3.320000"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2bc09c66",
+   "metadata": {},
+   "source": [
+    "This is a basic operation, and we would need to repeat his pattern a million times!\n",
+    "\n",
+    "Pandas and all other tools for tabular data provide a command for performing operations on groups."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "0500cd4a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<pandas.core.groupby.generic.DataFrameGroupBy object at 0x14ff67a90>"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# df.groupby(column_name) groups a DataFrame by the values in the column\n",
+    "data.groupby('condition_id')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "c5857c4e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "condition_id\n",
+       "A1    3\n",
+       "A2    3\n",
+       "B1    3\n",
+       "B2    1\n",
+       "C2    2\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# The group-by object can by used as a DataFrame. \n",
+    "# Operations are executed on each group individually, then aggregated\n",
+    "data.groupby('condition_id').size()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "5c865cc1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "condition_id\n",
+       "A1    1.400000\n",
+       "A2    1.316667\n",
+       "B1    1.986667\n",
+       "B2    3.320000\n",
+       "C2    0.705000\n",
+       "Name: response_time, dtype: float64"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.groupby('condition_id')['response_time'].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "615a4515",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "condition_id\n",
+       "A1    4.01\n",
+       "A2    3.29\n",
+       "B1    5.74\n",
+       "B2    3.32\n",
+       "C2    0.73\n",
+       "Name: response_time, dtype: float64"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.groupby('condition_id')['response_time'].max()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b0441458",
+   "metadata": {},
+   "source": [
+    "# Pivot tables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3feec98d",
+   "metadata": {},
+   "source": [
+    "We want to look at response time biases when the subjects respond LEFT vs RIGHT. In principle, we expect them to have the same response time in both cases.\n",
+    "\n",
+    "We compute a summary table with 1) condition_id on the rows; 2) response on the columns; 3) the average response time for all experiments with a that condition and response\n",
+    "\n",
+    "We can do it with `groupby`, with some table manipulation commands."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "id": "4a8a7d0d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "condition_id  response\n",
+       "A1            LEFT        0.120000\n",
+       "              RIGHT       2.040000\n",
+       "A2            LEFT        1.316667\n",
+       "B1            LEFT        5.740000\n",
+       "              RIGHT       0.110000\n",
+       "B2            RIGHT       3.320000\n",
+       "C2            LEFT        0.680000\n",
+       "              RIGHT       0.730000\n",
+       "Name: response_time, dtype: float64"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "summary = data.groupby(['condition_id', 'response'])['response_time'].mean()\n",
+    "summary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "e5a645e0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>response</th>\n",
+       "      <th>LEFT</th>\n",
+       "      <th>RIGHT</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>condition_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>A1</th>\n",
+       "      <td>0.120000</td>\n",
+       "      <td>2.04</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>A2</th>\n",
+       "      <td>1.316667</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B1</th>\n",
+       "      <td>5.740000</td>\n",
+       "      <td>0.11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B2</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.32</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C2</th>\n",
+       "      <td>0.680000</td>\n",
+       "      <td>0.73</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "response          LEFT  RIGHT\n",
+       "condition_id                 \n",
+       "A1            0.120000   2.04\n",
+       "A2            1.316667    NaN\n",
+       "B1            5.740000   0.11\n",
+       "B2                 NaN   3.32\n",
+       "C2            0.680000   0.73"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "summary.unstack(level=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3307fcc6",
+   "metadata": {},
+   "source": [
+    "Pandas has a command called `pivot_table` that can be used to perform this kind of operation straightforwardly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "8941edfe",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th>response</th>\n",
+       "      <th>LEFT</th>\n",
+       "      <th>RIGHT</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>condition_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>A1</th>\n",
+       "      <td>0.120000</td>\n",
+       "      <td>2.04</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>A2</th>\n",
+       "      <td>1.316667</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B1</th>\n",
+       "      <td>5.740000</td>\n",
+       "      <td>0.11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B2</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.32</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C2</th>\n",
+       "      <td>0.680000</td>\n",
+       "      <td>0.73</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "response          LEFT  RIGHT\n",
+       "condition_id                 \n",
+       "A1            0.120000   2.04\n",
+       "A2            1.316667    NaN\n",
+       "B1            5.740000   0.11\n",
+       "B2                 NaN   3.32\n",
+       "C2            0.680000   0.73"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data.pivot_table(index='condition_id', columns='response', values='response_time', aggfunc='mean')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "id": "a7d1d998",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"2\" halign=\"left\">mean</th>\n",
+       "      <th colspan=\"2\" halign=\"left\">std</th>\n",
+       "      <th colspan=\"2\" halign=\"left\">count</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>response</th>\n",
+       "      <th>LEFT</th>\n",
+       "      <th>RIGHT</th>\n",
+       "      <th>LEFT</th>\n",
+       "      <th>RIGHT</th>\n",
+       "      <th>LEFT</th>\n",
+       "      <th>RIGHT</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>condition_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>A1</th>\n",
+       "      <td>0.120000</td>\n",
+       "      <td>2.04</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2.786001</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>A2</th>\n",
+       "      <td>1.316667</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.709425</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B1</th>\n",
+       "      <td>5.740000</td>\n",
+       "      <td>0.11</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.042426</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>B2</th>\n",
+       "      <td>NaN</td>\n",
+       "      <td>3.32</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>C2</th>\n",
+       "      <td>0.680000</td>\n",
+       "      <td>0.73</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                  mean             std           count      \n",
+       "response          LEFT RIGHT      LEFT     RIGHT  LEFT RIGHT\n",
+       "condition_id                                                \n",
+       "A1            0.120000  2.04       NaN  2.786001   1.0   2.0\n",
+       "A2            1.316667   NaN  1.709425       NaN   3.0   NaN\n",
+       "B1            5.740000  0.11       NaN  0.042426   1.0   2.0\n",
+       "B2                 NaN  3.32       NaN       NaN   NaN   1.0\n",
+       "C2            0.680000  0.73       NaN       NaN   1.0   1.0"
+      ]
+     },
+     "execution_count": 59,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(\n",
+    "    data\n",
+    "    .pivot_table(\n",
+    "        index='condition_id', \n",
+    "        columns='response', \n",
+    "        values='response_time', \n",
+    "        aggfunc=['mean', 'std', 'count'],\n",
+    "    )\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a770b812",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0234ccf2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c77c2dc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/030_tabular_data/031_split-apply-combine_tutor.ipynb
+++ b/notebooks/030_tabular_data/031_split-apply-combine_tutor.ipynb
@ -0,0 +1,335 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "247bbf84",
+   "metadata": {},
+   "source": [
+    "# Split-apply-combine operations for tabular data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "44584190",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ba193f3f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>condition_id</th>\n",
+       "      <th>response_time</th>\n",
+       "      <th>response</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.12</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>312</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.37</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>312</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.68</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>313</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>0.07</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>313</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.08</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>314</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>0.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>314</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>0.14</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>314</td>\n",
+       "      <td>C2</td>\n",
+       "      <td>0.73</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>711</td>\n",
+       "      <td>A1</td>\n",
+       "      <td>4.01</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>712</td>\n",
+       "      <td>A2</td>\n",
+       "      <td>3.29</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>713</td>\n",
+       "      <td>B1</td>\n",
+       "      <td>5.74</td>\n",
+       "      <td>LEFT</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>714</td>\n",
+       "      <td>B2</td>\n",
+       "      <td>3.32</td>\n",
+       "      <td>RIGHT</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   subject_id condition_id  response_time response\n",
+       "0         312           A1           0.12     LEFT\n",
+       "1         312           A2           0.37     LEFT\n",
+       "2         312           C2           0.68     LEFT\n",
+       "3         313           A1           0.07    RIGHT\n",
+       "4         313           B1           0.08    RIGHT\n",
+       "5         314           A2           0.29     LEFT\n",
+       "6         314           B1           0.14    RIGHT\n",
+       "7         314           C2           0.73    RIGHT\n",
+       "8         711           A1           4.01    RIGHT\n",
+       "9         712           A2           3.29     LEFT\n",
+       "10        713           B1           5.74     LEFT\n",
+       "11        714           B2           3.32    RIGHT"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "data = pd.DataFrame(\n",
+    "    data=[\n",
+    "        ['312', 'A1', 0.12, 'LEFT'],\n",
+    "        ['312', 'A2', 0.37, 'LEFT'],\n",
+    "        ['312', 'C2', 0.68, 'LEFT'],\n",
+    "        ['313', 'A1', 0.07, 'RIGHT'],\n",
+    "        ['313', 'B1', 0.08, 'RIGHT'],\n",
+    "        ['314', 'A2', 0.29, 'LEFT'],\n",
+    "        ['314', 'B1', 0.14, 'RIGHT'],\n",
+    "        ['314', 'C2', 0.73, 'RIGHT'],\n",
+    "        ['711', 'A1', 4.01, 'RIGHT'],\n",
+    "        ['712', 'A2', 3.29, 'LEFT'],\n",
+    "        ['713', 'B1', 5.74, 'LEFT'],\n",
+    "        ['714', 'B2', 3.32, 'RIGHT'],\n",
+    "    ],\n",
+    "    columns=['subject_id', 'condition_id', 'response_time', 'response'],\n",
+    ")\n",
+    "data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8a239e0c",
+   "metadata": {},
+   "source": [
+    "# Group-by"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "31eba91e",
+   "metadata": {},
+   "source": [
+    "We want to compute the mean response time by condition.\n",
+    "\n",
+    "Let's start by doing it by hand, using for loops!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ff3f890b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "805d04c7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2bc09c66",
+   "metadata": {},
+   "source": [
+    "This is a basic operation, and we would need to repeat his pattern a million times!\n",
+    "\n",
+    "Pandas and all other tools for tabular data provide a command for performing operations on groups."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dcc8c9c7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "818b8346",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b0441458",
+   "metadata": {},
+   "source": [
+    "# Pivot tables"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3feec98d",
+   "metadata": {},
+   "source": [
+    "We want to look at response time biases when the subjects respond LEFT vs RIGHT. In principle, we expect them to have the same response time in both cases.\n",
+    "\n",
+    "We compute a summary table with 1) condition_id on the rows; 2) response on the columns; 3) the average response time for all experiments with a that condition and response\n",
+    "\n",
+    "We can do it with `groupby`, with some table manipulation commands."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "04f6ff60",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62600721",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3307fcc6",
+   "metadata": {},
+   "source": [
+    "Pandas has a command called `pivot_table` that can be used to perform this kind of operation straightforwardly."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a770b812",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0234ccf2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0c77c2dc",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/030_tabular_data/040_window_functions.ipynb
+++ b/notebooks/030_tabular_data/040_window_functions.ipynb
--- a/notebooks/030_tabular_data/041_window_functions_tutor.ipynb
+++ b/notebooks/030_tabular_data/041_window_functions_tutor.ipynb
@ -0,0 +1,320 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "247bbf84",
+   "metadata": {},
+   "source": [
+    "# Window functions for tabular data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "44584190",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "83bbd275",
+   "metadata": {},
+   "source": [
+    "# Load experimental data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "88b9e189",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv('timed_responses.csv', index_col=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "987a3518",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>time (ms)</th>\n",
+       "      <th>response</th>\n",
+       "      <th>accuracy</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>574</th>\n",
+       "      <td>3</td>\n",
+       "      <td>540</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.04</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1190</th>\n",
+       "      <td>2</td>\n",
+       "      <td>552</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.43</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1895</th>\n",
+       "      <td>2</td>\n",
+       "      <td>1036</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.36</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>53</th>\n",
+       "      <td>3</td>\n",
+       "      <td>257</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>158</th>\n",
+       "      <td>2</td>\n",
+       "      <td>743</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.32</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>551</th>\n",
+       "      <td>3</td>\n",
+       "      <td>619</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1602</th>\n",
+       "      <td>1</td>\n",
+       "      <td>43</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.65</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>413</th>\n",
+       "      <td>1</td>\n",
+       "      <td>471</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.80</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>785</th>\n",
+       "      <td>1</td>\n",
+       "      <td>121</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1393</th>\n",
+       "      <td>2</td>\n",
+       "      <td>903</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.33</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>629</th>\n",
+       "      <td>2</td>\n",
+       "      <td>353</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.17</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1829</th>\n",
+       "      <td>3</td>\n",
+       "      <td>768</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.26</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>902</th>\n",
+       "      <td>1</td>\n",
+       "      <td>1093</td>\n",
+       "      <td>LEFT</td>\n",
+       "      <td>0.34</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1486</th>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>RIGHT</td>\n",
+       "      <td>0.29</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      subject_id  time (ms) response  accuracy\n",
+       "574            3        540    RIGHT      0.04\n",
+       "1190           2        552     LEFT      0.43\n",
+       "1895           2       1036     LEFT      0.36\n",
+       "53             3        257    RIGHT      0.11\n",
+       "158            2        743    RIGHT      0.32\n",
+       "551            3        619     LEFT      0.25\n",
+       "1602           1         43    RIGHT      0.65\n",
+       "413            1        471     LEFT      0.80\n",
+       "785            1        121     LEFT      0.10\n",
+       "1393           2        903    RIGHT      0.33\n",
+       "629            2        353     LEFT      0.17\n",
+       "1829           3        768    RIGHT      0.26\n",
+       "902            1       1093     LEFT      0.34\n",
+       "1486           2          3    RIGHT      0.29"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5c41cd93",
+   "metadata": {},
+   "source": [
+    "# Split-apply-combine operations return one aggregated value per group"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0234ccf2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.groupby('subject_id')['accuracy'].max()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2b2a1796",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2bb99152",
+   "metadata": {},
+   "source": [
+    "# However, for some calculations we need to have a value per row\n",
+    "\n",
+    "For example: for each subject, rank the responses by decreasing accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3aed0755",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "17f3d40f",
+   "metadata": {},
+   "source": [
+    "# In many cases, a window functions is combined with a sorting operation\n",
+    "\n",
+    "For example: for each subject, count the number of \"LEFT\" responses up until any moment in the experiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "67efdd56",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a00b4f39",
+   "metadata": {},
+   "source": [
+    "# Window functions are also useful to compute changes in the data for each group\n",
+    "\n",
+    "In this case, the window function often uses the `shift(n)` method that lags the data by `n` rows"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e553c17f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f2973e3d",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c9ca46b0",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/notebooks/030_tabular_data/timed_responses.csv
+++ b/notebooks/030_tabular_data/timed_responses.csv
@ -0,0 +1,15 @@
+,subject_id,time (ms),response,accuracy
+574,3,540,RIGHT,0.04
+1190,2,552,LEFT,0.43
+1895,2,1036,LEFT,0.36
+53,3,257,RIGHT,0.11
+158,2,743,RIGHT,0.32
+551,3,619,LEFT,0.25
+1602,1,43,RIGHT,0.65
+413,1,471,LEFT,0.8
+785,1,121,LEFT,0.1
+1393,2,903,RIGHT,0.33
+629,2,353,LEFT,0.17
+1829,3,768,RIGHT,0.26
+902,1,1093,LEFT,0.34
+1486,2,3,RIGHT,0.29