{ "cells": [ { "cell_type": "markdown", "id": "37957eb0", "metadata": {}, "source": [ "# Combine information across tables: joins and anti-joins" ] }, { "cell_type": "code", "execution_count": 1, "id": "b6f949f7", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "markdown", "id": "6a7fcf90", "metadata": {}, "source": [ "# \"Load\" some experimental data" ] }, { "cell_type": "code", "execution_count": 2, "id": "a9450803", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
subject_idcondition_idresponse_timeresponse
0312A10.12LEFT
1312A20.37LEFT
2312C20.68LEFT
3711A14.01RIGHT
4711A20.44LEFT
5313A10.07RIGHT
6313B10.08RIGHT
7712A23.29LEFT
8314A20.29LEFT
9714B23.32RIGHT
10314B10.14RIGHT
11314C20.73RIGHT
12713B15.74LEFT
\n", "
" ], "text/plain": [ " subject_id condition_id response_time response\n", "0 312 A1 0.12 LEFT\n", "1 312 A2 0.37 LEFT\n", "2 312 C2 0.68 LEFT\n", "3 711 A1 4.01 RIGHT\n", "4 711 A2 0.44 LEFT\n", "5 313 A1 0.07 RIGHT\n", "6 313 B1 0.08 RIGHT\n", "7 712 A2 3.29 LEFT\n", "8 314 A2 0.29 LEFT\n", "9 714 B2 3.32 RIGHT\n", "10 314 B1 0.14 RIGHT\n", "11 314 C2 0.73 RIGHT\n", "12 713 B1 5.74 LEFT" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pd.DataFrame(\n", " data=[\n", " ['312', 'A1', 0.12, 'LEFT'],\n", " ['312', 'A2', 0.37, 'LEFT'],\n", " ['312', 'C2', 0.68, 'LEFT'],\n", " ['711', 'A1', 4.01, 'RIGHT'],\n", " ['711', 'A2', 0.44, 'LEFT'],\n", " ['313', 'A1', 0.07, 'RIGHT'],\n", " ['313', 'B1', 0.08, 'RIGHT'],\n", " ['712', 'A2', 3.29, 'LEFT'],\n", " ['314', 'A2', 0.29, 'LEFT'],\n", " ['714', 'B2', 3.32, 'RIGHT'],\n", " ['314', 'B1', 0.14, 'RIGHT'],\n", " ['314', 'C2', 0.73, 'RIGHT'],\n", " ['713', 'B1', 5.74, 'LEFT'],\n", " ],\n", " columns=['subject_id', 'condition_id', 'response_time', 'response'],\n", ")\n", "data" ] }, { "cell_type": "markdown", "id": "9f6de0d6", "metadata": {}, "source": [ "Each experiment belongs to one experimental condition, but the parameters of each condition are not in the table" ] }, { "cell_type": "code", "execution_count": 3, "id": "455471d7", "metadata": {}, "outputs": [], "source": [ "condition_to_orientation = {\n", " 'A1': 0,\n", " 'A2': 0,\n", " 'B1': 45,\n", " 'B2': 45,\n", " 'C1': 90,\n", "}\n", "\n", "condition_to_duration = {\n", " 'A1': 0.1,\n", " 'A2': 0.01,\n", " 'B1': 0.1,\n", " 'B2': 0.01,\n", " 'C1': 0.2,\n", "}\n", "\n", "condition_to_surround = {\n", " 'A1': 'FULL',\n", " 'A2': 'NONE',\n", " 'B1': 'NONE',\n", " 'B2': 'FULL',\n", " 'C1': 'FULL',\n", "}\n", "\n", "\n", "condition_to_stimulus_type = {\n", " 'A1': 'LINES',\n", " 'A2': 'DOTS',\n", " 'B1': 'PLAID',\n", " 'B2': 'PLAID',\n", " 'C1': 'WIGGLES',\n", "}\n" ] }, { "cell_type": "markdown", "id": "5ccfd7e7", "metadata": {}, "source": [ "# Manually adding the condition parameters to the table" ] }, { "cell_type": "code", "execution_count": 73, "id": "cc32110c", "metadata": {}, "outputs": [], "source": [ "data_with_properties = data.copy()" ] }, { "cell_type": "code", "execution_count": null, "id": "06263dc6", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "b96962b2", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "d6e71b13", "metadata": {}, "source": [ "# Using a join operation" ] }, { "cell_type": "code", "execution_count": 4, "id": "d9835d7c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
orientationdurationsurroundstimulus_type
A100.1FULLLINES
A200.01NONEDOTS
B1450.1NONEPLAID
B2450.01FULLPLAID
C1900.2FULLWIGGLES
\n", "
" ], "text/plain": [ " orientation duration surround stimulus_type\n", "A1 0 0.1 FULL LINES\n", "A2 0 0.01 NONE DOTS\n", "B1 45 0.1 NONE PLAID\n", "B2 45 0.01 FULL PLAID\n", "C1 90 0.2 FULL WIGGLES" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Often, this is done using a spreadsheet\n", "condition_properties = pd.DataFrame(\n", " [condition_to_orientation, condition_to_duration, condition_to_surround, condition_to_stimulus_type],\n", " index=['orientation', 'duration', 'surround', 'stimulus_type'],\n", ").T\n", "condition_properties" ] }, { "cell_type": "code", "execution_count": null, "id": "c27ea9f3", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "5e563cd0", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "cba9534f", "metadata": {}, "source": [ "# Anti-join: filter out unwanted data" ] }, { "cell_type": "code", "execution_count": 5, "id": "1cb2bbdb", "metadata": {}, "outputs": [], "source": [ "# We are given a list of subjects that are outliers and should be disregarded in the analysis\n", "outliers = pd.DataFrame([['711'], ['712'], ['713'], ['714'], ['888']], columns=['subject_id'])" ] }, { "cell_type": "code", "execution_count": null, "id": "e0e2c3c5", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "90d92640", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.3" } }, "nbformat": 4, "nbformat_minor": 5 }