1010 lines
28 KiB
Plaintext
1010 lines
28 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "86d2536c",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Combine information across tables: joins and anti-joins"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 1,
|
||
|
"id": "b6f949f7",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "1d2a4eab",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# \"Load\" some experimental data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "a9450803",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"data = pd.DataFrame(\n",
|
||
|
" data=[\n",
|
||
|
" ['312', 'A1', 0.12, 'LEFT'],\n",
|
||
|
" ['312', 'A2', 0.37, 'LEFT'],\n",
|
||
|
" ['312', 'C2', 0.68, 'LEFT'],\n",
|
||
|
" ['711', 'A1', 4.01, 'RIGHT'],\n",
|
||
|
" ['711', 'A2', 0.44, 'LEFT'],\n",
|
||
|
" ['313', 'A1', 0.07, 'RIGHT'],\n",
|
||
|
" ['313', 'B1', 0.08, 'RIGHT'],\n",
|
||
|
" ['712', 'A2', 3.29, 'LEFT'],\n",
|
||
|
" ['314', 'A2', 0.29, 'LEFT'],\n",
|
||
|
" ['714', 'B2', 3.32, 'RIGHT'],\n",
|
||
|
" ['314', 'B1', 0.14, 'RIGHT'],\n",
|
||
|
" ['314', 'C2', 0.73, 'RIGHT'],\n",
|
||
|
" ['713', 'B1', 5.74, 'LEFT'],\n",
|
||
|
" ],\n",
|
||
|
" columns=['subject_id', 'condition_id', 'response_time', 'response'],\n",
|
||
|
")\n",
|
||
|
"data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "a7e8b09b",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Each experiment belongs to one experimental condition, but the parameters of each condition are not in the table"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 3,
|
||
|
"id": "455471d7",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"condition_to_orientation = {\n",
|
||
|
" 'A1': 0,\n",
|
||
|
" 'A2': 0,\n",
|
||
|
" 'B1': 45,\n",
|
||
|
" 'B2': 45,\n",
|
||
|
" 'C1': 90,\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"condition_to_duration = {\n",
|
||
|
" 'A1': 0.1,\n",
|
||
|
" 'A2': 0.01,\n",
|
||
|
" 'B1': 0.1,\n",
|
||
|
" 'B2': 0.01,\n",
|
||
|
" 'C1': 0.2,\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"condition_to_surround = {\n",
|
||
|
" 'A1': 'FULL',\n",
|
||
|
" 'A2': 'NONE',\n",
|
||
|
" 'B1': 'NONE',\n",
|
||
|
" 'B2': 'FULL',\n",
|
||
|
" 'C1': 'FULL',\n",
|
||
|
"}\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"condition_to_stimulus_type = {\n",
|
||
|
" 'A1': 'LINES',\n",
|
||
|
" 'A2': 'DOTS',\n",
|
||
|
" 'B1': 'PLAID',\n",
|
||
|
" 'B2': 'PLAID',\n",
|
||
|
" 'C1': 'WIGGLES',\n",
|
||
|
"}\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "5ccfd7e7",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Manually adding the condition parameters to the table"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 73,
|
||
|
"id": "cc32110c",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"data_with_properties = data.copy()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 74,
|
||
|
"id": "c322a9af",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"0 A1\n",
|
||
|
"1 A2\n",
|
||
|
"2 C2\n",
|
||
|
"3 A1\n",
|
||
|
"4 A2\n",
|
||
|
"5 A1\n",
|
||
|
"6 B1\n",
|
||
|
"7 A2\n",
|
||
|
"8 A2\n",
|
||
|
"9 B2\n",
|
||
|
"10 B1\n",
|
||
|
"11 C2\n",
|
||
|
"12 B1\n",
|
||
|
"Name: condition_id, dtype: object"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 74,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data_with_properties['condition_id']"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 75,
|
||
|
"id": "0dbee78b",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": [
|
||
|
"0 0.0\n",
|
||
|
"1 0.0\n",
|
||
|
"2 NaN\n",
|
||
|
"3 0.0\n",
|
||
|
"4 0.0\n",
|
||
|
"5 0.0\n",
|
||
|
"6 45.0\n",
|
||
|
"7 0.0\n",
|
||
|
"8 0.0\n",
|
||
|
"9 45.0\n",
|
||
|
"10 45.0\n",
|
||
|
"11 NaN\n",
|
||
|
"12 45.0\n",
|
||
|
"Name: condition_id, dtype: float64"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 75,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data_with_properties['condition_id'].map(condition_to_orientation)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 76,
|
||
|
"id": "3fb3e3af",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"data_with_properties['orientation'] = data_with_properties['condition_id'].map(condition_to_orientation)\n",
|
||
|
"data_with_properties['duration'] = data_with_properties['condition_id'].map(condition_to_duration)\n",
|
||
|
"data_with_properties['surround'] = data_with_properties['condition_id'].map(condition_to_surround)\n",
|
||
|
"data_with_properties['stimulus_type'] = data_with_properties['condition_id'].map(condition_to_stimulus_type)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 77,
|
||
|
"id": "995eff91",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>subject_id</th>\n",
|
||
|
" <th>condition_id</th>\n",
|
||
|
" <th>response_time</th>\n",
|
||
|
" <th>response</th>\n",
|
||
|
" <th>orientation</th>\n",
|
||
|
" <th>duration</th>\n",
|
||
|
" <th>surround</th>\n",
|
||
|
" <th>stimulus_type</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>0.12</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.10</td>\n",
|
||
|
" <td>FULL</td>\n",
|
||
|
" <td>LINES</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>0.37</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.01</td>\n",
|
||
|
" <td>NONE</td>\n",
|
||
|
" <td>DOTS</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>C2</td>\n",
|
||
|
" <td>0.68</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>711</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>4.01</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.10</td>\n",
|
||
|
" <td>FULL</td>\n",
|
||
|
" <td>LINES</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>711</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>0.44</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.01</td>\n",
|
||
|
" <td>NONE</td>\n",
|
||
|
" <td>DOTS</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>313</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>0.07</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.10</td>\n",
|
||
|
" <td>FULL</td>\n",
|
||
|
" <td>LINES</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>313</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>0.08</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>45.0</td>\n",
|
||
|
" <td>0.10</td>\n",
|
||
|
" <td>NONE</td>\n",
|
||
|
" <td>PLAID</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>712</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>3.29</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.01</td>\n",
|
||
|
" <td>NONE</td>\n",
|
||
|
" <td>DOTS</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>0.29</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>0.0</td>\n",
|
||
|
" <td>0.01</td>\n",
|
||
|
" <td>NONE</td>\n",
|
||
|
" <td>DOTS</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>714</td>\n",
|
||
|
" <td>B2</td>\n",
|
||
|
" <td>3.32</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>45.0</td>\n",
|
||
|
" <td>0.01</td>\n",
|
||
|
" <td>FULL</td>\n",
|
||
|
" <td>PLAID</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>0.14</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>45.0</td>\n",
|
||
|
" <td>0.10</td>\n",
|
||
|
" <td>NONE</td>\n",
|
||
|
" <td>PLAID</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>C2</td>\n",
|
||
|
" <td>0.73</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>12</th>\n",
|
||
|
" <td>713</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>5.74</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>45.0</td>\n",
|
||
|
" <td>0.10</td>\n",
|
||
|
" <td>NONE</td>\n",
|
||
|
" <td>PLAID</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" subject_id condition_id response_time response orientation duration \\\n",
|
||
|
"0 312 A1 0.12 LEFT 0.0 0.10 \n",
|
||
|
"1 312 A2 0.37 LEFT 0.0 0.01 \n",
|
||
|
"2 312 C2 0.68 LEFT NaN NaN \n",
|
||
|
"3 711 A1 4.01 RIGHT 0.0 0.10 \n",
|
||
|
"4 711 A2 0.44 LEFT 0.0 0.01 \n",
|
||
|
"5 313 A1 0.07 RIGHT 0.0 0.10 \n",
|
||
|
"6 313 B1 0.08 RIGHT 45.0 0.10 \n",
|
||
|
"7 712 A2 3.29 LEFT 0.0 0.01 \n",
|
||
|
"8 314 A2 0.29 LEFT 0.0 0.01 \n",
|
||
|
"9 714 B2 3.32 RIGHT 45.0 0.01 \n",
|
||
|
"10 314 B1 0.14 RIGHT 45.0 0.10 \n",
|
||
|
"11 314 C2 0.73 RIGHT NaN NaN \n",
|
||
|
"12 713 B1 5.74 LEFT 45.0 0.10 \n",
|
||
|
"\n",
|
||
|
" surround stimulus_type \n",
|
||
|
"0 FULL LINES \n",
|
||
|
"1 NONE DOTS \n",
|
||
|
"2 NaN NaN \n",
|
||
|
"3 FULL LINES \n",
|
||
|
"4 NONE DOTS \n",
|
||
|
"5 FULL LINES \n",
|
||
|
"6 NONE PLAID \n",
|
||
|
"7 NONE DOTS \n",
|
||
|
"8 NONE DOTS \n",
|
||
|
"9 FULL PLAID \n",
|
||
|
"10 NONE PLAID \n",
|
||
|
"11 NaN NaN \n",
|
||
|
"12 NONE PLAID "
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 77,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data_with_properties"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "d6e71b13",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Using a join operation"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"id": "d9835d7c",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>orientation</th>\n",
|
||
|
" <th>duration</th>\n",
|
||
|
" <th>surround</th>\n",
|
||
|
" <th>stimulus_type</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A1</th>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>FULL</td>\n",
|
||
|
" <td>LINES</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>A2</th>\n",
|
||
|
" <td>0</td>\n",
|
||
|
" <td>0.01</td>\n",
|
||
|
" <td>NONE</td>\n",
|
||
|
" <td>DOTS</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B1</th>\n",
|
||
|
" <td>45</td>\n",
|
||
|
" <td>0.1</td>\n",
|
||
|
" <td>NONE</td>\n",
|
||
|
" <td>PLAID</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>B2</th>\n",
|
||
|
" <td>45</td>\n",
|
||
|
" <td>0.01</td>\n",
|
||
|
" <td>FULL</td>\n",
|
||
|
" <td>PLAID</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>C1</th>\n",
|
||
|
" <td>90</td>\n",
|
||
|
" <td>0.2</td>\n",
|
||
|
" <td>FULL</td>\n",
|
||
|
" <td>WIGGLES</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" orientation duration surround stimulus_type\n",
|
||
|
"A1 0 0.1 FULL LINES\n",
|
||
|
"A2 0 0.01 NONE DOTS\n",
|
||
|
"B1 45 0.1 NONE PLAID\n",
|
||
|
"B2 45 0.01 FULL PLAID\n",
|
||
|
"C1 90 0.2 FULL WIGGLES"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Often, this is done using a spreadsheet\n",
|
||
|
"condition_properties = pd.DataFrame(\n",
|
||
|
" [condition_to_orientation, condition_to_duration, condition_to_surround, condition_to_stimulus_type],\n",
|
||
|
" index=['orientation', 'duration', 'surround', 'stimulus_type'],\n",
|
||
|
").T\n",
|
||
|
"condition_properties"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "a9087876",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"data.merge(condition_properties, left_on='condition_id', right_index=True)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "61cb65be",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"data.merge(condition_properties, left_on='condition_id', right_index=True, how='left')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "7b4d23df",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"data.merge(condition_properties, left_on='condition_id', right_index=True, how='outer')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"id": "cba9534f",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"# Anti-join: filter out unwanted data"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 5,
|
||
|
"id": "1cb2bbdb",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# We are given a list of subjects that are outliers and should be disregarded in the analysis\n",
|
||
|
"outliers = pd.DataFrame([['711'], ['712'], ['713'], ['714'], ['888']], columns=['subject_id'])"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 6,
|
||
|
"id": "e2e627d5",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>subject_id</th>\n",
|
||
|
" <th>condition_id</th>\n",
|
||
|
" <th>response_time</th>\n",
|
||
|
" <th>response</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>711</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>4.01</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>711</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>0.44</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>712</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>3.29</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>714</td>\n",
|
||
|
" <td>B2</td>\n",
|
||
|
" <td>3.32</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>713</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>5.74</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" subject_id condition_id response_time response\n",
|
||
|
"0 711 A1 4.01 RIGHT\n",
|
||
|
"1 711 A2 0.44 LEFT\n",
|
||
|
"2 712 A2 3.29 LEFT\n",
|
||
|
"3 714 B2 3.32 RIGHT\n",
|
||
|
"4 713 B1 5.74 LEFT"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 6,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data.merge(outliers, on='subject_id')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 7,
|
||
|
"id": "eb809fe0",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>subject_id</th>\n",
|
||
|
" <th>condition_id</th>\n",
|
||
|
" <th>response_time</th>\n",
|
||
|
" <th>response</th>\n",
|
||
|
" <th>_merge</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>0.12</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>0.37</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>C2</td>\n",
|
||
|
" <td>0.68</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>3</th>\n",
|
||
|
" <td>711</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>4.01</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>both</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>4</th>\n",
|
||
|
" <td>711</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>0.44</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>both</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>313</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>0.07</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>313</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>0.08</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>7</th>\n",
|
||
|
" <td>712</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>3.29</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>both</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>0.29</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>0.14</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>C2</td>\n",
|
||
|
" <td>0.73</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>11</th>\n",
|
||
|
" <td>714</td>\n",
|
||
|
" <td>B2</td>\n",
|
||
|
" <td>3.32</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>both</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>12</th>\n",
|
||
|
" <td>713</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>5.74</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>both</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>13</th>\n",
|
||
|
" <td>888</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>NaN</td>\n",
|
||
|
" <td>right_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" subject_id condition_id response_time response _merge\n",
|
||
|
"0 312 A1 0.12 LEFT left_only\n",
|
||
|
"1 312 A2 0.37 LEFT left_only\n",
|
||
|
"2 312 C2 0.68 LEFT left_only\n",
|
||
|
"3 711 A1 4.01 RIGHT both\n",
|
||
|
"4 711 A2 0.44 LEFT both\n",
|
||
|
"5 313 A1 0.07 RIGHT left_only\n",
|
||
|
"6 313 B1 0.08 RIGHT left_only\n",
|
||
|
"7 712 A2 3.29 LEFT both\n",
|
||
|
"8 314 A2 0.29 LEFT left_only\n",
|
||
|
"9 314 B1 0.14 RIGHT left_only\n",
|
||
|
"10 314 C2 0.73 RIGHT left_only\n",
|
||
|
"11 714 B2 3.32 RIGHT both\n",
|
||
|
"12 713 B1 5.74 LEFT both\n",
|
||
|
"13 888 NaN NaN NaN right_only"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 7,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"data.merge(outliers, on='subject_id', how='outer', indicator=True)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 8,
|
||
|
"id": "6fdb696e",
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/html": [
|
||
|
"<div>\n",
|
||
|
"<style scoped>\n",
|
||
|
" .dataframe tbody tr th:only-of-type {\n",
|
||
|
" vertical-align: middle;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe tbody tr th {\n",
|
||
|
" vertical-align: top;\n",
|
||
|
" }\n",
|
||
|
"\n",
|
||
|
" .dataframe thead th {\n",
|
||
|
" text-align: right;\n",
|
||
|
" }\n",
|
||
|
"</style>\n",
|
||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
||
|
" <thead>\n",
|
||
|
" <tr style=\"text-align: right;\">\n",
|
||
|
" <th></th>\n",
|
||
|
" <th>subject_id</th>\n",
|
||
|
" <th>condition_id</th>\n",
|
||
|
" <th>response_time</th>\n",
|
||
|
" <th>response</th>\n",
|
||
|
" <th>_merge</th>\n",
|
||
|
" </tr>\n",
|
||
|
" </thead>\n",
|
||
|
" <tbody>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>0</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>0.12</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>1</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>0.37</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>2</th>\n",
|
||
|
" <td>312</td>\n",
|
||
|
" <td>C2</td>\n",
|
||
|
" <td>0.68</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>5</th>\n",
|
||
|
" <td>313</td>\n",
|
||
|
" <td>A1</td>\n",
|
||
|
" <td>0.07</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>6</th>\n",
|
||
|
" <td>313</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>0.08</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>8</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>A2</td>\n",
|
||
|
" <td>0.29</td>\n",
|
||
|
" <td>LEFT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>9</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>B1</td>\n",
|
||
|
" <td>0.14</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" <tr>\n",
|
||
|
" <th>10</th>\n",
|
||
|
" <td>314</td>\n",
|
||
|
" <td>C2</td>\n",
|
||
|
" <td>0.73</td>\n",
|
||
|
" <td>RIGHT</td>\n",
|
||
|
" <td>left_only</td>\n",
|
||
|
" </tr>\n",
|
||
|
" </tbody>\n",
|
||
|
"</table>\n",
|
||
|
"</div>"
|
||
|
],
|
||
|
"text/plain": [
|
||
|
" subject_id condition_id response_time response _merge\n",
|
||
|
"0 312 A1 0.12 LEFT left_only\n",
|
||
|
"1 312 A2 0.37 LEFT left_only\n",
|
||
|
"2 312 C2 0.68 LEFT left_only\n",
|
||
|
"5 313 A1 0.07 RIGHT left_only\n",
|
||
|
"6 313 B1 0.08 RIGHT left_only\n",
|
||
|
"8 314 A2 0.29 LEFT left_only\n",
|
||
|
"9 314 B1 0.14 RIGHT left_only\n",
|
||
|
"10 314 C2 0.73 RIGHT left_only"
|
||
|
]
|
||
|
},
|
||
|
"execution_count": 8,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"temp = data.merge(outliers, on='subject_id', how='outer', indicator=True)\n",
|
||
|
"data_without_outliers = temp[temp['_merge'] == 'left_only']\n",
|
||
|
"data_without_outliers"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"id": "6c3e6baa",
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": []
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.11.3"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|