{
"cells": [
{
"cell_type": "markdown",
"id": "37957eb0",
"metadata": {},
"source": [
"# Combine information across tables: joins and anti-joins"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "b6f949f7",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"id": "6a7fcf90",
"metadata": {},
"source": [
"# \"Load\" some experimental data"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "a9450803",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subject_id | \n",
" condition_id | \n",
" response_time | \n",
" response | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 312 | \n",
" A1 | \n",
" 0.12 | \n",
" LEFT | \n",
"
\n",
" \n",
" 1 | \n",
" 312 | \n",
" A2 | \n",
" 0.37 | \n",
" LEFT | \n",
"
\n",
" \n",
" 2 | \n",
" 312 | \n",
" C2 | \n",
" 0.68 | \n",
" LEFT | \n",
"
\n",
" \n",
" 3 | \n",
" 711 | \n",
" A1 | \n",
" 4.01 | \n",
" RIGHT | \n",
"
\n",
" \n",
" 4 | \n",
" 711 | \n",
" A2 | \n",
" 0.44 | \n",
" LEFT | \n",
"
\n",
" \n",
" 5 | \n",
" 313 | \n",
" A1 | \n",
" 0.07 | \n",
" RIGHT | \n",
"
\n",
" \n",
" 6 | \n",
" 313 | \n",
" B1 | \n",
" 0.08 | \n",
" RIGHT | \n",
"
\n",
" \n",
" 7 | \n",
" 712 | \n",
" A2 | \n",
" 3.29 | \n",
" LEFT | \n",
"
\n",
" \n",
" 8 | \n",
" 314 | \n",
" A2 | \n",
" 0.29 | \n",
" LEFT | \n",
"
\n",
" \n",
" 9 | \n",
" 714 | \n",
" B2 | \n",
" 3.32 | \n",
" RIGHT | \n",
"
\n",
" \n",
" 10 | \n",
" 314 | \n",
" B1 | \n",
" 0.14 | \n",
" RIGHT | \n",
"
\n",
" \n",
" 11 | \n",
" 314 | \n",
" C2 | \n",
" 0.73 | \n",
" RIGHT | \n",
"
\n",
" \n",
" 12 | \n",
" 713 | \n",
" B1 | \n",
" 5.74 | \n",
" LEFT | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subject_id condition_id response_time response\n",
"0 312 A1 0.12 LEFT\n",
"1 312 A2 0.37 LEFT\n",
"2 312 C2 0.68 LEFT\n",
"3 711 A1 4.01 RIGHT\n",
"4 711 A2 0.44 LEFT\n",
"5 313 A1 0.07 RIGHT\n",
"6 313 B1 0.08 RIGHT\n",
"7 712 A2 3.29 LEFT\n",
"8 314 A2 0.29 LEFT\n",
"9 714 B2 3.32 RIGHT\n",
"10 314 B1 0.14 RIGHT\n",
"11 314 C2 0.73 RIGHT\n",
"12 713 B1 5.74 LEFT"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.DataFrame(\n",
" data=[\n",
" ['312', 'A1', 0.12, 'LEFT'],\n",
" ['312', 'A2', 0.37, 'LEFT'],\n",
" ['312', 'C2', 0.68, 'LEFT'],\n",
" ['711', 'A1', 4.01, 'RIGHT'],\n",
" ['711', 'A2', 0.44, 'LEFT'],\n",
" ['313', 'A1', 0.07, 'RIGHT'],\n",
" ['313', 'B1', 0.08, 'RIGHT'],\n",
" ['712', 'A2', 3.29, 'LEFT'],\n",
" ['314', 'A2', 0.29, 'LEFT'],\n",
" ['714', 'B2', 3.32, 'RIGHT'],\n",
" ['314', 'B1', 0.14, 'RIGHT'],\n",
" ['314', 'C2', 0.73, 'RIGHT'],\n",
" ['713', 'B1', 5.74, 'LEFT'],\n",
" ],\n",
" columns=['subject_id', 'condition_id', 'response_time', 'response'],\n",
")\n",
"data"
]
},
{
"cell_type": "markdown",
"id": "9f6de0d6",
"metadata": {},
"source": [
"Each experiment belongs to one experimental condition, but the parameters of each condition are not in the table"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "455471d7",
"metadata": {},
"outputs": [],
"source": [
"condition_to_orientation = {\n",
" 'A1': 0,\n",
" 'A2': 0,\n",
" 'B1': 45,\n",
" 'B2': 45,\n",
" 'C1': 90,\n",
"}\n",
"\n",
"condition_to_duration = {\n",
" 'A1': 0.1,\n",
" 'A2': 0.01,\n",
" 'B1': 0.1,\n",
" 'B2': 0.01,\n",
" 'C1': 0.2,\n",
"}\n",
"\n",
"condition_to_surround = {\n",
" 'A1': 'FULL',\n",
" 'A2': 'NONE',\n",
" 'B1': 'NONE',\n",
" 'B2': 'FULL',\n",
" 'C1': 'FULL',\n",
"}\n",
"\n",
"\n",
"condition_to_stimulus_type = {\n",
" 'A1': 'LINES',\n",
" 'A2': 'DOTS',\n",
" 'B1': 'PLAID',\n",
" 'B2': 'PLAID',\n",
" 'C1': 'WIGGLES',\n",
"}\n"
]
},
{
"cell_type": "markdown",
"id": "5ccfd7e7",
"metadata": {},
"source": [
"# Manually adding the condition parameters to the table"
]
},
{
"cell_type": "code",
"execution_count": 73,
"id": "cc32110c",
"metadata": {},
"outputs": [],
"source": [
"data_with_properties = data.copy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "06263dc6",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "b96962b2",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "d6e71b13",
"metadata": {},
"source": [
"# Using a join operation"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d9835d7c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" orientation | \n",
" duration | \n",
" surround | \n",
" stimulus_type | \n",
"
\n",
" \n",
" \n",
" \n",
" A1 | \n",
" 0 | \n",
" 0.1 | \n",
" FULL | \n",
" LINES | \n",
"
\n",
" \n",
" A2 | \n",
" 0 | \n",
" 0.01 | \n",
" NONE | \n",
" DOTS | \n",
"
\n",
" \n",
" B1 | \n",
" 45 | \n",
" 0.1 | \n",
" NONE | \n",
" PLAID | \n",
"
\n",
" \n",
" B2 | \n",
" 45 | \n",
" 0.01 | \n",
" FULL | \n",
" PLAID | \n",
"
\n",
" \n",
" C1 | \n",
" 90 | \n",
" 0.2 | \n",
" FULL | \n",
" WIGGLES | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" orientation duration surround stimulus_type\n",
"A1 0 0.1 FULL LINES\n",
"A2 0 0.01 NONE DOTS\n",
"B1 45 0.1 NONE PLAID\n",
"B2 45 0.01 FULL PLAID\n",
"C1 90 0.2 FULL WIGGLES"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Often, this is done using a spreadsheet\n",
"condition_properties = pd.DataFrame(\n",
" [condition_to_orientation, condition_to_duration, condition_to_surround, condition_to_stimulus_type],\n",
" index=['orientation', 'duration', 'surround', 'stimulus_type'],\n",
").T\n",
"condition_properties"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c27ea9f3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "5e563cd0",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "cba9534f",
"metadata": {},
"source": [
"# Anti-join: filter out unwanted data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "1cb2bbdb",
"metadata": {},
"outputs": [],
"source": [
"# We are given a list of subjects that are outliers and should be disregarded in the analysis\n",
"outliers = pd.DataFrame([['711'], ['712'], ['713'], ['714'], ['888']], columns=['subject_id'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e0e2c3c5",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "90d92640",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}