{ "cells": [ { "cell_type": "markdown", "id": "f11a76bf", "metadata": {}, "source": [ "# Exercise: Add experiment information to electrophysiology data" ] }, { "cell_type": "code", "execution_count": 1, "id": "b6f2742b", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# Set some Pandas options: maximum number of rows/columns it's going to display\n", "pd.set_option('display.max_rows', 1000)\n", "pd.set_option('display.max_columns', 100)" ] }, { "cell_type": "markdown", "id": "2967c84e", "metadata": {}, "source": [ "# Load electrophysiology data" ] }, { "cell_type": "code", "execution_count": 2, "id": "ed626ee3", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv('../../data/QC_passed_2024-07-04_collected.csv')\n", "info = pd.read_csv('../../data/op_info.csv')" ] }, { "cell_type": "code", "execution_count": 3, "id": "48d5375f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OPfilenameslicecell_chcell_IDdaytreatmenthrs_incubationrepatchhrs_after_OPRsRinresting_potentialmax_spikesRheobaseAP_heigthTHmax_depolmax_repolmembra_time_constant_taucapacitancecommentsrheo_rampAP_halfwidthRheobse_rampUnnamed: 27rheos_rampcommenthigh K concentrationRMP_from_char
0OP23042023420003.abfS1123420S1c1D1TTX0.0no10.4163896.67564339.025301-74.28588924200.080.749512-35.278320336.181641-60.79101619.40510.6017670753.3801131.151009NaNNaNNaNNaNNaN8 mM-61.828554
1OP23042023420003.abfS1323420S1c3D1TTX0.0no10.4163897.86717448.728367-69.57397526300.078.448486-32.043457350.097656-67.13867217.30393.3979181585.1028371.006321NaNNaNNaNNaNNaN8 mM-60.460298
2OP23042023420003.abfS1623420S1c6D1TTX0.0no10.4163898.82013435.971082-54.95605522300.076.660156-29.827881270.629883-52.24609414.85426.0987743173.9157971.266335NaNNaNNaNNaNNaN8 mM-59.615979
3OP23042023420003.abfS1723420S1c7D1TTX0.0yes10.4163897.26919539.186101-69.26879924300.075.030518-29.699707242.553711-71.41113317.15478.2733624598.0799360.994396NaNNaNNaNNaNNaN8 mM-61.173839
4OP23042023420003.abfS1823420S1c8D1TTX0.0yes10.4163896.00040031.599917-70.55053722350.081.011963-33.068848309.448242-61.40136716.65575.5139245786.9278981.182830NaNNaNNaNNaNNaN8 mM-60.956350
\n", "
" ], "text/plain": [ " OP filename slice cell_ch cell_ID day treatment \\\n", "0 OP230420 23420003.abf S1 1 23420S1c1 D1 TTX \n", "1 OP230420 23420003.abf S1 3 23420S1c3 D1 TTX \n", "2 OP230420 23420003.abf S1 6 23420S1c6 D1 TTX \n", "3 OP230420 23420003.abf S1 7 23420S1c7 D1 TTX \n", "4 OP230420 23420003.abf S1 8 23420S1c8 D1 TTX \n", "\n", " hrs_incubation repatch hrs_after_OP Rs Rin \\\n", "0 0.0 no 10.416389 6.675643 39.025301 \n", "1 0.0 no 10.416389 7.867174 48.728367 \n", "2 0.0 no 10.416389 8.820134 35.971082 \n", "3 0.0 yes 10.416389 7.269195 39.186101 \n", "4 0.0 yes 10.416389 6.000400 31.599917 \n", "\n", " resting_potential max_spikes Rheobase AP_heigth TH max_depol \\\n", "0 -74.285889 24 200.0 80.749512 -35.278320 336.181641 \n", "1 -69.573975 26 300.0 78.448486 -32.043457 350.097656 \n", "2 -54.956055 22 300.0 76.660156 -29.827881 270.629883 \n", "3 -69.268799 24 300.0 75.030518 -29.699707 242.553711 \n", "4 -70.550537 22 350.0 81.011963 -33.068848 309.448242 \n", "\n", " max_repol membra_time_constant_tau capacitance comments rheo_ramp \\\n", "0 -60.791016 19.40 510.601767 0 753.380113 \n", "1 -67.138672 17.30 393.397918 1 585.102837 \n", "2 -52.246094 14.85 426.098774 3 173.915797 \n", "3 -71.411133 17.15 478.273362 4 598.079936 \n", "4 -61.401367 16.65 575.513924 5 786.927898 \n", "\n", " AP_halfwidth Rheobse_ramp Unnamed: 27 rheos_ramp comment \\\n", "0 1.151009 NaN NaN NaN NaN NaN \n", "1 1.006321 NaN NaN NaN NaN NaN \n", "2 1.266335 NaN NaN NaN NaN NaN \n", "3 0.994396 NaN NaN NaN NaN NaN \n", "4 1.182830 NaN NaN NaN NaN NaN \n", "\n", " high K concentration RMP_from_char \n", "0 8 mM -61.828554 \n", "1 8 mM -60.460298 \n", "2 8 mM -59.615979 \n", "3 8 mM -61.173839 \n", "4 8 mM -60.956350 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "id": "47191528", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OPtissue_sourceareapatient_agepatcher
0OP201027Mittetemporal33Rosie
1OP201029Mittetemporal47Rosie
2OP210323Virchowtemporal10Rosie
3OP210615Virchowtemporal19Rosie
4OP211123Bielefeldtemporal68Rosie
\n", "
" ], "text/plain": [ " OP tissue_source area patient_age patcher\n", "0 OP201027 Mitte temporal 33 Rosie\n", "1 OP201029 Mitte temporal 47 Rosie\n", "2 OP210323 Virchow temporal 10 Rosie\n", "3 OP210615 Virchow temporal 19 Rosie\n", "4 OP211123 Bielefeld temporal 68 Rosie" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "info.head()" ] }, { "cell_type": "markdown", "id": "2fef4d37", "metadata": {}, "source": [ "# 1. Add experiment information to the electrophysiology results\n", "\n", "* Is there information for every experiment?\n", "* How many experiments did each patcher perform? (i.e., individual OPs, or rows in `info`)\n", "* How many samples did each patcher analyze? (i.e., individual rows in `df`)" ] }, { "cell_type": "code", "execution_count": 5, "id": "35e19a53", "metadata": {}, "outputs": [], "source": [ "df_with_info = df.merge(info, on='OP', how='left')" ] }, { "cell_type": "code", "execution_count": 6, "id": "eac1244f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "OP 827\n", "filename 827\n", "slice 827\n", "cell_ch 827\n", "cell_ID 827\n", "day 827\n", "treatment 827\n", "hrs_incubation 827\n", "repatch 827\n", "hrs_after_OP 827\n", "Rs 827\n", "Rin 827\n", "resting_potential 827\n", "max_spikes 827\n", "Rheobase 824\n", "AP_heigth 824\n", "TH 824\n", "max_depol 824\n", "max_repol 824\n", "membra_time_constant_tau 827\n", "capacitance 827\n", "comments 742\n", "rheo_ramp 120\n", "AP_halfwidth 820\n", "Rheobse_ramp 160\n", "Unnamed: 27 0\n", "rheos_ramp 32\n", "comment 5\n", " 37\n", "high K concentration 827\n", "RMP_from_char 827\n", "tissue_source 800\n", "area 800\n", "patient_age 800\n", "patcher 800\n", "dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_with_info.count()" ] }, { "cell_type": "code", "execution_count": 7, "id": "2f6724ce", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Verji 35\n", "Rosie 8\n", "Anna 2\n", "Name: patcher, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "info['patcher'].value_counts()" ] }, { "cell_type": "code", "execution_count": 8, "id": "8f996049", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Verji 594\n", "Rosie 206\n", "Name: patcher, dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_with_info['patcher'].value_counts()" ] }, { "cell_type": "markdown", "id": "44031178", "metadata": {}, "source": [ "# 2. Remove outliers from the table\n", "\n", "1. Load the list of outliers in `outliers.csv`\n", "2. Use an anti-join to remove the outliers from the table\n", "3. How many samples (rows) are left in the data?" ] }, { "cell_type": "code", "execution_count": 9, "id": "d1d4cc27", "metadata": {}, "outputs": [], "source": [ "outliers = pd.read_csv('outliers.csv')" ] }, { "cell_type": "code", "execution_count": 10, "id": "fbebbd97", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(134, 2)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "outliers.shape" ] }, { "cell_type": "code", "execution_count": 11, "id": "8a3c7943", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OPcell_ID
0OP24020124201S2c2
1OP2103232021_03_25_0S4_D2c6
2OP23080823808S2c6
3OP24050324503S1c6
4OP2301092311S3c2
\n", "
" ], "text/plain": [ " OP cell_ID\n", "0 OP240201 24201S2c2\n", "1 OP210323 2021_03_25_0S4_D2c6\n", "2 OP230808 23808S2c6\n", "3 OP240503 24503S1c6\n", "4 OP230109 2311S3c2" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "outliers.head()" ] }, { "cell_type": "code", "execution_count": 12, "id": "573687e7", "metadata": {}, "outputs": [], "source": [ "temp = df_with_info.merge(outliers, on=['OP', 'cell_ID'], how='outer', indicator=True)" ] }, { "cell_type": "code", "execution_count": 13, "id": "a4a6574b", "metadata": {}, "outputs": [], "source": [ "df_without_outliers = temp[temp['_merge'] == 'left_only'].drop('_merge', axis=1)" ] }, { "cell_type": "code", "execution_count": 14, "id": "8fd89a40", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(659, 35)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_without_outliers.shape" ] }, { "cell_type": "code", "execution_count": 15, "id": "07f4776a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OPfilenameslicecell_chcell_IDdaytreatmenthrs_incubationrepatchhrs_after_OPRsRinresting_potentialmax_spikesRheobaseAP_heigthTHmax_depolmax_repolmembra_time_constant_taucapacitancecommentsrheo_rampAP_halfwidthRheobse_rampUnnamed: 27rheos_rampcommenthigh K concentrationRMP_from_chartissue_sourceareapatient_agepatcher
0OP23042023420003.abfS1123420S1c1D1TTX0.0no10.4163896.67564339.025301-74.28588924200.080.749512-35.278320336.181641-60.79101619.40510.6017670753.3801131.151009NaNNaNNaNNaNNaN8 mM-61.828554Bielefeldtemporal13.0Verji
1OP23042023420003.abfS1323420S1c3D1TTX0.0no10.4163897.86717448.728367-69.57397526300.078.448486-32.043457350.097656-67.13867217.30393.3979181585.1028371.006321NaNNaNNaNNaNNaN8 mM-60.460298Bielefeldtemporal13.0Verji
2OP23042023420003.abfS1623420S1c6D1TTX0.0no10.4163898.82013435.971082-54.95605522300.076.660156-29.827881270.629883-52.24609414.85426.0987743173.9157971.266335NaNNaNNaNNaNNaN8 mM-59.615979Bielefeldtemporal13.0Verji
5OP23042023420003.abfS1823420S1c8D1TTX0.0yes10.4163896.00040031.599917-70.55053722350.081.011963-33.068848309.448242-61.40136716.65575.5139245786.9278981.182830NaNNaNNaNNaNNaN8 mM-60.956350Bielefeldtemporal13.0Verji
6OP23042023420061.abfS1_D2823420S1c8D2TTX19.0yes29.6333338.27161430.607259-70.74585011300.048.883057-20.855713100.952148-27.46582013.25864.89243029565.9388651.504127NaNNaNNaNNaNNaN8 mM-61.283967Bielefeldtemporal13.0Verji
\n", "
" ], "text/plain": [ " OP filename slice cell_ch cell_ID day treatment \\\n", "0 OP230420 23420003.abf S1 1 23420S1c1 D1 TTX \n", "1 OP230420 23420003.abf S1 3 23420S1c3 D1 TTX \n", "2 OP230420 23420003.abf S1 6 23420S1c6 D1 TTX \n", "5 OP230420 23420003.abf S1 8 23420S1c8 D1 TTX \n", "6 OP230420 23420061.abf S1_D2 8 23420S1c8 D2 TTX \n", "\n", " hrs_incubation repatch hrs_after_OP Rs Rin \\\n", "0 0.0 no 10.416389 6.675643 39.025301 \n", "1 0.0 no 10.416389 7.867174 48.728367 \n", "2 0.0 no 10.416389 8.820134 35.971082 \n", "5 0.0 yes 10.416389 6.000400 31.599917 \n", "6 19.0 yes 29.633333 8.271614 30.607259 \n", "\n", " resting_potential max_spikes Rheobase AP_heigth TH max_depol \\\n", "0 -74.285889 24 200.0 80.749512 -35.278320 336.181641 \n", "1 -69.573975 26 300.0 78.448486 -32.043457 350.097656 \n", "2 -54.956055 22 300.0 76.660156 -29.827881 270.629883 \n", "5 -70.550537 22 350.0 81.011963 -33.068848 309.448242 \n", "6 -70.745850 1 1300.0 48.883057 -20.855713 100.952148 \n", "\n", " max_repol membra_time_constant_tau capacitance comments rheo_ramp \\\n", "0 -60.791016 19.40 510.601767 0 753.380113 \n", "1 -67.138672 17.30 393.397918 1 585.102837 \n", "2 -52.246094 14.85 426.098774 3 173.915797 \n", "5 -61.401367 16.65 575.513924 5 786.927898 \n", "6 -27.465820 13.25 864.892430 29 565.938865 \n", "\n", " AP_halfwidth Rheobse_ramp Unnamed: 27 rheos_ramp comment \\\n", "0 1.151009 NaN NaN NaN NaN NaN \n", "1 1.006321 NaN NaN NaN NaN NaN \n", "2 1.266335 NaN NaN NaN NaN NaN \n", "5 1.182830 NaN NaN NaN NaN NaN \n", "6 1.504127 NaN NaN NaN NaN NaN \n", "\n", " high K concentration RMP_from_char tissue_source area patient_age \\\n", "0 8 mM -61.828554 Bielefeld temporal 13.0 \n", "1 8 mM -60.460298 Bielefeld temporal 13.0 \n", "2 8 mM -59.615979 Bielefeld temporal 13.0 \n", "5 8 mM -60.956350 Bielefeld temporal 13.0 \n", "6 8 mM -61.283967 Bielefeld temporal 13.0 \n", "\n", " patcher \n", "0 Verji \n", "1 Verji \n", "2 Verji \n", "5 Verji \n", "6 Verji " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_without_outliers.head()" ] }, { "cell_type": "markdown", "id": "84270332", "metadata": {}, "source": [ "# 3. Save final result in `processed_QC_passed_2024-07-04_collected_v1.csv`\n", "\n", "1. Using the `.to_csv` method of Pandas DataFrames" ] }, { "cell_type": "code", "execution_count": 16, "id": "85902eea", "metadata": {}, "outputs": [], "source": [ "df_without_outliers.to_csv('processed_QC_passed_2024-07-04_collected_v1.csv', index=None)" ] }, { "cell_type": "code", "execution_count": null, "id": "c7bcff45", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3" } }, "nbformat": 4, "nbformat_minor": 5 }