336 lines
8.4 KiB
Plaintext
336 lines
8.4 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "247bbf84",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Split-apply-combine operations for tabular data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"id": "44584190",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 2,
|
||
|
"id": "ba193f3f",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>subject_id</th>\n",
|
||
|
" <th>condition_id</th>\n",
|
||
|
" <th>response_time</th>\n",
|
||
|
" <th>response</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>0.12</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>0.37</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>C2</td>\n",
|
||
|
" <td>0.68</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>313</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>0.07</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>313</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>0.08</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>0.29</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>0.14</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>C2</td>\n",
|
||
|
" <td>0.73</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>711</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>4.01</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>712</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>3.29</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10</th>\n",
|
||
|
" <td>713</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>5.74</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>714</td>\n",
|
||
|
" <td>B2</td>\n",
|
||
|
" <td>3.32</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" subject_id condition_id response_time response\n",
|
||
|
"0 312 A1 0.12 LEFT\n",
|
||
|
"1 312 A2 0.37 LEFT\n",
|
||
|
"2 312 C2 0.68 LEFT\n",
|
||
|
"3 313 A1 0.07 RIGHT\n",
|
||
|
"4 313 B1 0.08 RIGHT\n",
|
||
|
"5 314 A2 0.29 LEFT\n",
|
||
|
"6 314 B1 0.14 RIGHT\n",
|
||
|
"7 314 C2 0.73 RIGHT\n",
|
||
|
"8 711 A1 4.01 RIGHT\n",
|
||
|
"9 712 A2 3.29 LEFT\n",
|
||
|
"10 713 B1 5.74 LEFT\n",
|
||
|
"11 714 B2 3.32 RIGHT"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 2,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data = pd.DataFrame(\n",
|
||
|
" data=[\n",
|
||
|
" ['312', 'A1', 0.12, 'LEFT'],\n",
|
||
|
" ['312', 'A2', 0.37, 'LEFT'],\n",
|
||
|
" ['312', 'C2', 0.68, 'LEFT'],\n",
|
||
|
" ['313', 'A1', 0.07, 'RIGHT'],\n",
|
||
|
" ['313', 'B1', 0.08, 'RIGHT'],\n",
|
||
|
" ['314', 'A2', 0.29, 'LEFT'],\n",
|
||
|
" ['314', 'B1', 0.14, 'RIGHT'],\n",
|
||
|
" ['314', 'C2', 0.73, 'RIGHT'],\n",
|
||
|
" ['711', 'A1', 4.01, 'RIGHT'],\n",
|
||
|
" ['712', 'A2', 3.29, 'LEFT'],\n",
|
||
|
" ['713', 'B1', 5.74, 'LEFT'],\n",
|
||
|
" ['714', 'B2', 3.32, 'RIGHT'],\n",
|
||
|
" ],\n",
|
||
|
" columns=['subject_id', 'condition_id', 'response_time', 'response'],\n",
|
||
|
")\n",
|
||
|
"data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "8a239e0c",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Group-by"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "31eba91e",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"We want to compute the mean response time by condition.\n",
|
||
|
"\n",
|
||
|
"Let's start by doing it by hand, using for loops!"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "ff3f890b",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "805d04c7",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "2bc09c66",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"This is a basic operation, and we would need to repeat his pattern a million times!\n",
|
||
|
"\n",
|
||
|
"Pandas and all other tools for tabular data provide a command for performing operations on groups."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "dcc8c9c7",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "818b8346",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "b0441458",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Pivot tables"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "3feec98d",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"We want to look at response time biases when the subjects respond LEFT vs RIGHT. In principle, we expect them to have the same response time in both cases.\n",
|
||
|
"\n",
|
||
|
"We compute a summary table with 1) condition_id on the rows; 2) response on the columns; 3) the average response time for all experiments with a that condition and response\n",
|
||
|
"\n",
|
||
|
"We can do it with `groupby`, with some table manipulation commands."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "04f6ff60",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "62600721",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "3307fcc6",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Pandas has a command called `pivot_table` that can be used to perform this kind of operation straightforwardly."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "a770b812",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "0234ccf2",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "0c77c2dc",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.11.3"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|