2024-heraklion-data/exercises/tabular_window_functions/.ipynb_checkpoints/window_functions-checkpoint.ipynb

408 lines
14 KiB
Plaintext
Raw Permalink Normal View History

2024-08-27 14:27:53 +02:00
{
"cells": [
{
"cell_type": "markdown",
"id": "6f6aa857",
"metadata": {},
"source": [
"# Exercise: For each patcher, compute the average number of days they waited between experiments\n",
"\n",
"Here is how to proceed\n",
"1. Use a window function to compute the number of days that elapse between experiment (i.e., the distance between `date`), for each `patcher`. Add that as a new column, `'days from prev'`\n",
"2. Compute the average `'days from prev'` per patcher\n",
"\n",
"With your new awesome vectorization skills, it should take two lines!"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "8f9bc8b1",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"# Set some Pandas options: maximum number of rows/columns it's going to display\n",
"pd.set_option('display.max_rows', 1000)\n",
"pd.set_option('display.max_columns', 100)"
]
},
{
"cell_type": "markdown",
"id": "1be11d54",
"metadata": {},
"source": [
"# Load the neural data"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "b6b1abff",
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('shuffled_QC_passed_2024-07-04_collected_v1.csv', parse_dates=['date'])"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "c0237922",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>OP</th>\n",
" <th>patcher</th>\n",
" <th>date</th>\n",
" <th>slice</th>\n",
" <th>cell_ch</th>\n",
" <th>cell_ID</th>\n",
" <th>day</th>\n",
" <th>treatment</th>\n",
" <th>hrs_incubation</th>\n",
" <th>repatch</th>\n",
" <th>hrs_after_OP</th>\n",
" <th>Rs</th>\n",
" <th>Rin</th>\n",
" <th>resting_potential</th>\n",
" <th>max_spikes</th>\n",
" <th>Rheobase</th>\n",
" <th>AP_heigth</th>\n",
" <th>TH</th>\n",
" <th>max_depol</th>\n",
" <th>max_repol</th>\n",
" <th>membra_time_constant_tau</th>\n",
" <th>capacitance</th>\n",
" <th>comments</th>\n",
" <th>rheo_ramp</th>\n",
" <th>AP_halfwidth</th>\n",
" <th>Rheobse_ramp</th>\n",
" <th>Unnamed: 27</th>\n",
" <th>rheos_ramp</th>\n",
" <th>comment</th>\n",
" <th></th>\n",
" <th>high K concentration</th>\n",
" <th>RMP_from_char</th>\n",
" <th>tissue_source</th>\n",
" <th>area</th>\n",
" <th>patient_age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>OP211209</td>\n",
" <td>Verji</td>\n",
" <td>2024-03-13</td>\n",
" <td>S2</td>\n",
" <td>8</td>\n",
" <td>21d10S2c8</td>\n",
" <td>D1</td>\n",
" <td>Ctrl</td>\n",
" <td>0.0</td>\n",
" <td>no</td>\n",
" <td>13.298889</td>\n",
" <td>14.470281</td>\n",
" <td>166.878916</td>\n",
" <td>-67.962646</td>\n",
" <td>34</td>\n",
" <td>50.0</td>\n",
" <td>83.190918</td>\n",
" <td>-36.132812</td>\n",
" <td>302.124023</td>\n",
" <td>-72.631836</td>\n",
" <td>20.75</td>\n",
" <td>152.623120</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.966102</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>15 mM</td>\n",
" <td>-59.101382</td>\n",
" <td>Bielefeld</td>\n",
" <td>temporal</td>\n",
" <td>27.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>OP221024</td>\n",
" <td>Verji</td>\n",
" <td>2024-06-16</td>\n",
" <td>S3</td>\n",
" <td>4</td>\n",
" <td>22o24S3c4</td>\n",
" <td>D1</td>\n",
" <td>Ctrl</td>\n",
" <td>0.0</td>\n",
" <td>no</td>\n",
" <td>23.964167</td>\n",
" <td>11.521243</td>\n",
" <td>137.820797</td>\n",
" <td>-71.789551</td>\n",
" <td>41</td>\n",
" <td>50.0</td>\n",
" <td>93.322754</td>\n",
" <td>-42.968750</td>\n",
" <td>465.820312</td>\n",
" <td>-83.740234</td>\n",
" <td>14.85</td>\n",
" <td>124.324170</td>\n",
" <td>17</td>\n",
" <td>NaN</td>\n",
" <td>0.959995</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>8 mM</td>\n",
" <td>-62.265689</td>\n",
" <td>Bielefeld</td>\n",
" <td>temporal</td>\n",
" <td>42.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>OP230810</td>\n",
" <td>Verji</td>\n",
" <td>2024-05-14</td>\n",
" <td>S2</td>\n",
" <td>5</td>\n",
" <td>23810S2c5</td>\n",
" <td>D1</td>\n",
" <td>TTX</td>\n",
" <td>0.0</td>\n",
" <td>no</td>\n",
" <td>7.043056</td>\n",
" <td>10.120637</td>\n",
" <td>67.739416</td>\n",
" <td>-70.629883</td>\n",
" <td>47</td>\n",
" <td>100.0</td>\n",
" <td>91.973877</td>\n",
" <td>-37.817383</td>\n",
" <td>415.771484</td>\n",
" <td>-107.666016</td>\n",
" <td>13.00</td>\n",
" <td>228.654858</td>\n",
" <td>10</td>\n",
" <td>402.013400</td>\n",
" <td>0.760052</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>8 mM</td>\n",
" <td>-61.329228</td>\n",
" <td>Mitte</td>\n",
" <td>temporal</td>\n",
" <td>63.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>OP230209</td>\n",
" <td>Verji</td>\n",
" <td>2024-04-27</td>\n",
" <td>S2_D2</td>\n",
" <td>3</td>\n",
" <td>23209S2_D2c3</td>\n",
" <td>D2</td>\n",
" <td>high K</td>\n",
" <td>25.0</td>\n",
" <td>no</td>\n",
" <td>21.848333</td>\n",
" <td>7.745503</td>\n",
" <td>43.009610</td>\n",
" <td>-68.371582</td>\n",
" <td>31</td>\n",
" <td>500.0</td>\n",
" <td>67.163086</td>\n",
" <td>-29.284668</td>\n",
" <td>212.036133</td>\n",
" <td>-61.645508</td>\n",
" <td>11.05</td>\n",
" <td>215.784505</td>\n",
" <td>30</td>\n",
" <td>672.202407</td>\n",
" <td>0.958735</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>8 mM</td>\n",
" <td>-62.577472</td>\n",
" <td>Bielefeld</td>\n",
" <td>temporal</td>\n",
" <td>63.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>OP240321</td>\n",
" <td>Verji</td>\n",
" <td>2024-04-11</td>\n",
" <td>S2</td>\n",
" <td>4</td>\n",
" <td>24321S2c4</td>\n",
" <td>D1</td>\n",
" <td>Ctrl</td>\n",
" <td>0.0</td>\n",
" <td>no</td>\n",
" <td>11.530278</td>\n",
" <td>7.632941</td>\n",
" <td>32.884808</td>\n",
" <td>-52.453613</td>\n",
" <td>21</td>\n",
" <td>200.0</td>\n",
" <td>84.008789</td>\n",
" <td>-36.785889</td>\n",
" <td>403.442383</td>\n",
" <td>-71.899414</td>\n",
" <td>14.80</td>\n",
" <td>695.791105</td>\n",
" <td>8</td>\n",
" <td>NaN</td>\n",
" <td>1.063838</td>\n",
" <td>324.520817</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>8 mM</td>\n",
" <td>-63.149769</td>\n",
" <td>Bielefeld</td>\n",
" <td>temporal</td>\n",
" <td>31.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" OP patcher date slice cell_ch cell_ID day treatment \\\n",
"0 OP211209 Verji 2024-03-13 S2 8 21d10S2c8 D1 Ctrl \n",
"1 OP221024 Verji 2024-06-16 S3 4 22o24S3c4 D1 Ctrl \n",
"2 OP230810 Verji 2024-05-14 S2 5 23810S2c5 D1 TTX \n",
"3 OP230209 Verji 2024-04-27 S2_D2 3 23209S2_D2c3 D2 high K \n",
"4 OP240321 Verji 2024-04-11 S2 4 24321S2c4 D1 Ctrl \n",
"\n",
" hrs_incubation repatch hrs_after_OP Rs Rin \\\n",
"0 0.0 no 13.298889 14.470281 166.878916 \n",
"1 0.0 no 23.964167 11.521243 137.820797 \n",
"2 0.0 no 7.043056 10.120637 67.739416 \n",
"3 25.0 no 21.848333 7.745503 43.009610 \n",
"4 0.0 no 11.530278 7.632941 32.884808 \n",
"\n",
" resting_potential max_spikes Rheobase AP_heigth TH max_depol \\\n",
"0 -67.962646 34 50.0 83.190918 -36.132812 302.124023 \n",
"1 -71.789551 41 50.0 93.322754 -42.968750 465.820312 \n",
"2 -70.629883 47 100.0 91.973877 -37.817383 415.771484 \n",
"3 -68.371582 31 500.0 67.163086 -29.284668 212.036133 \n",
"4 -52.453613 21 200.0 84.008789 -36.785889 403.442383 \n",
"\n",
" max_repol membra_time_constant_tau capacitance comments rheo_ramp \\\n",
"0 -72.631836 20.75 152.623120 NaN NaN \n",
"1 -83.740234 14.85 124.324170 17 NaN \n",
"2 -107.666016 13.00 228.654858 10 402.013400 \n",
"3 -61.645508 11.05 215.784505 30 672.202407 \n",
"4 -71.899414 14.80 695.791105 8 NaN \n",
"\n",
" AP_halfwidth Rheobse_ramp Unnamed: 27 rheos_ramp comment \\\n",
"0 0.966102 NaN NaN NaN NaN NaN \n",
"1 0.959995 NaN NaN NaN NaN NaN \n",
"2 0.760052 NaN NaN NaN NaN NaN \n",
"3 0.958735 NaN NaN NaN NaN NaN \n",
"4 1.063838 324.520817 NaN NaN NaN NaN \n",
"\n",
" high K concentration RMP_from_char tissue_source area patient_age \n",
"0 15 mM -59.101382 Bielefeld temporal 27.0 \n",
"1 8 mM -62.265689 Bielefeld temporal 42.0 \n",
"2 8 mM -61.329228 Mitte temporal 63.0 \n",
"3 8 mM -62.577472 Bielefeld temporal 63.0 \n",
"4 8 mM -63.149769 Bielefeld temporal 31.0 "
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f7c93d25",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ff2c32f0",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "612215d0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}