adds exercises for tabular data part
This commit is contained in:
parent
2e60b94c52
commit
26eb146a5c
16 changed files with 60195 additions and 0 deletions
6246
exercises/tabular_split_apply_combine/processed_data_predimed.csv
Normal file
6246
exercises/tabular_split_apply_combine/processed_data_predimed.csv
Normal file
File diff suppressed because it is too large
Load diff
420
exercises/tabular_split_apply_combine/split_apply_combine.ipynb
Normal file
420
exercises/tabular_split_apply_combine/split_apply_combine.ipynb
Normal file
|
@ -0,0 +1,420 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6f6aa857",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Exercise: Compute summary statistics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "8f9bc8b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import pandas as pd"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1be11d54",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Load the patient data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "d2dfebd3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.read_csv('processed_data_predimed.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "09554c84",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(6245, 18)"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "df95a10b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>patient-id</th>\n",
|
||||
" <th>location-id</th>\n",
|
||||
" <th>sex</th>\n",
|
||||
" <th>age</th>\n",
|
||||
" <th>smoke</th>\n",
|
||||
" <th>bmi</th>\n",
|
||||
" <th>waist</th>\n",
|
||||
" <th>wth</th>\n",
|
||||
" <th>htn</th>\n",
|
||||
" <th>diab</th>\n",
|
||||
" <th>hyperchol</th>\n",
|
||||
" <th>famhist</th>\n",
|
||||
" <th>hormo</th>\n",
|
||||
" <th>p14</th>\n",
|
||||
" <th>toevent</th>\n",
|
||||
" <th>event</th>\n",
|
||||
" <th>group</th>\n",
|
||||
" <th>City</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>77</td>\n",
|
||||
" <td>Never</td>\n",
|
||||
" <td>25.92</td>\n",
|
||||
" <td>94</td>\n",
|
||||
" <td>0.657343</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>5.538672</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>MedDiet + VOO</td>\n",
|
||||
" <td>Madrid</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>68</td>\n",
|
||||
" <td>Never</td>\n",
|
||||
" <td>34.85</td>\n",
|
||||
" <td>150</td>\n",
|
||||
" <td>0.949367</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>3.063655</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>MedDiet + Nuts</td>\n",
|
||||
" <td>Madrid</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>66</td>\n",
|
||||
" <td>Never</td>\n",
|
||||
" <td>37.50</td>\n",
|
||||
" <td>120</td>\n",
|
||||
" <td>0.750000</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>5.590691</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>MedDiet + Nuts</td>\n",
|
||||
" <td>Madrid</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>77</td>\n",
|
||||
" <td>Never</td>\n",
|
||||
" <td>29.26</td>\n",
|
||||
" <td>93</td>\n",
|
||||
" <td>0.628378</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>5.456537</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>MedDiet + VOO</td>\n",
|
||||
" <td>Madrid</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>60</td>\n",
|
||||
" <td>Never</td>\n",
|
||||
" <td>30.02</td>\n",
|
||||
" <td>104</td>\n",
|
||||
" <td>0.662420</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>2.746064</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>Control</td>\n",
|
||||
" <td>Madrid</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" patient-id location-id sex age smoke bmi waist wth htn \\\n",
|
||||
"0 1 1 Female 77 Never 25.92 94 0.657343 Yes \n",
|
||||
"1 2 1 Female 68 Never 34.85 150 0.949367 Yes \n",
|
||||
"2 3 1 Female 66 Never 37.50 120 0.750000 Yes \n",
|
||||
"3 4 1 Female 77 Never 29.26 93 0.628378 Yes \n",
|
||||
"4 5 1 Female 60 Never 30.02 104 0.662420 Yes \n",
|
||||
"\n",
|
||||
" diab hyperchol famhist hormo p14 toevent event group City \n",
|
||||
"0 No Yes Yes No 9 5.538672 No MedDiet + VOO Madrid \n",
|
||||
"1 No Yes Yes NaN 10 3.063655 No MedDiet + Nuts Madrid \n",
|
||||
"2 Yes No No No 6 5.590691 No MedDiet + Nuts Madrid \n",
|
||||
"3 Yes No No No 6 5.456537 No MedDiet + VOO Madrid \n",
|
||||
"4 No Yes No No 9 2.746064 No Control Madrid "
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b4f6091",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 1. Did the mediterranean diet help prevent cardiovascular events?\n",
|
||||
"\n",
|
||||
"To answer this question, we need to compute how many cardiovascular \"events\" occured in each group of participants, separated by the diet they followed.\n",
|
||||
"In the data the column `event` contains `Yes` or `No`, indicating if that patient had an cardiovascular event. The column `group` contains which diet they followed.\n",
|
||||
"\n",
|
||||
"We first convert the column ``event'' to a binary value."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "99b21627-1b48-44ee-bda2-312b0718bd59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df['event'] = df['event'].map({'Yes': 1, 'No': 0})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a63e59c5-fe50-433f-a529-f601c795db67",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* Now compute the total number of events by diet group. Compare the numbers and see if you can answer the question."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "00bb9eb1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# your code here:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b052a40a-ae68-4376-8557-541eafb3face",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* Check how many patients had each group"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "db946d0f-8204-43a3-853c-41981a9811f4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# your code here:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "64339449-d766-4a2a-85d3-8aafac5533b7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There were no equal number in each group, so to be precise we need to put the numbers into perspective of the total. For that:\n",
|
||||
"* Calculate how many events occured relative to the amount of patients in each group (in percentage). \n",
|
||||
"Do this sepearated by diet group."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "13ad4130-2094-4e7a-a416-f0fd6e810413",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# your code here:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9655c0ac-18e9-4297-9f6e-557bfe95ed5e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It seems that the control group had a higher percentage of events than the other two"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1940d3fe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 2. Smoking\n",
|
||||
"\n",
|
||||
"Did smoking make a difference in the outcome of the study?\n",
|
||||
"Calculate how many events occured by diet group *and* smoking. The idea is that you arrive to a table like this:\n",
|
||||
"\n",
|
||||
"| group | Current | Former | Never |\n",
|
||||
"|:---------------|----------:|---------:|--------:|\n",
|
||||
"| Control | ... | ... | ... |\n",
|
||||
"| MedDiet + Nuts | ... | ... | ... |\n",
|
||||
"| MedDiet + VOO | ... | ... | ... |\n",
|
||||
"\n",
|
||||
"where each entry in the table has the percentage of events for each group. \n",
|
||||
"\n",
|
||||
"Hint: use `pivot_table`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "5ab4e70e-6261-4a26-8ad9-14eae15be09c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# your code here\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "511c640e-8f0f-449f-af33-85061d89cfd3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 3. Age differences?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1a04ea5c-8a27-4e67-aafa-ba34580a8d7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, check that there were no big differences in the age between the groups.\n",
|
||||
"* Calculate the mean and standard deviation of the patient's age, separated by diet group.\n",
|
||||
"\n",
|
||||
"You should be getting a table where diet group are in the rows and gender in columns, like this\n",
|
||||
"\n",
|
||||
"| group | Female | Male |\n",
|
||||
"|:---------------|---------:|-------:|\n",
|
||||
"| Control | 68 | 66.4 |\n",
|
||||
"| MedDiet + Nuts | 67.4 | 65.8 |\n",
|
||||
"| MedDiet + VOO | 67.7 | 66.1 |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "196fd111-72bc-4b87-b8fb-293547a8c83d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# your code here:\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -0,0 +1,927 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "6f6aa857",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Exercise: Compute summary statistics"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "8f9bc8b1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import pandas as pd\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1be11d54",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Load the patient data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 25,
|
||||
"id": "d2dfebd3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pd.read_csv('processed_data_predimed.csv')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"id": "09554c84",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(6245, 17)"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"id": "df95a10b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>patient-id</th>\n",
|
||||
" <th>location-id</th>\n",
|
||||
" <th>sex</th>\n",
|
||||
" <th>age</th>\n",
|
||||
" <th>smoke</th>\n",
|
||||
" <th>bmi</th>\n",
|
||||
" <th>waist</th>\n",
|
||||
" <th>wth</th>\n",
|
||||
" <th>htn</th>\n",
|
||||
" <th>diab</th>\n",
|
||||
" <th>hyperchol</th>\n",
|
||||
" <th>famhist</th>\n",
|
||||
" <th>hormo</th>\n",
|
||||
" <th>p14</th>\n",
|
||||
" <th>toevent</th>\n",
|
||||
" <th>event</th>\n",
|
||||
" <th>group</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>77</td>\n",
|
||||
" <td>Never</td>\n",
|
||||
" <td>25.92</td>\n",
|
||||
" <td>94</td>\n",
|
||||
" <td>0.657343</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>5.538672</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>MedDiet + VOO</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>68</td>\n",
|
||||
" <td>Never</td>\n",
|
||||
" <td>34.85</td>\n",
|
||||
" <td>150</td>\n",
|
||||
" <td>0.949367</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>10</td>\n",
|
||||
" <td>3.063655</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>MedDiet + Nuts</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>3</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>66</td>\n",
|
||||
" <td>Never</td>\n",
|
||||
" <td>37.50</td>\n",
|
||||
" <td>120</td>\n",
|
||||
" <td>0.750000</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>5.590691</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>MedDiet + Nuts</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>4</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>77</td>\n",
|
||||
" <td>Never</td>\n",
|
||||
" <td>29.26</td>\n",
|
||||
" <td>93</td>\n",
|
||||
" <td>0.628378</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>6</td>\n",
|
||||
" <td>5.456537</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>MedDiet + VOO</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>5</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>Female</td>\n",
|
||||
" <td>60</td>\n",
|
||||
" <td>Never</td>\n",
|
||||
" <td>30.02</td>\n",
|
||||
" <td>104</td>\n",
|
||||
" <td>0.662420</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>Yes</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>9</td>\n",
|
||||
" <td>2.746064</td>\n",
|
||||
" <td>No</td>\n",
|
||||
" <td>Control</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" patient-id location-id sex age smoke bmi waist wth htn \\\n",
|
||||
"0 1 1 Female 77 Never 25.92 94 0.657343 Yes \n",
|
||||
"1 2 1 Female 68 Never 34.85 150 0.949367 Yes \n",
|
||||
"2 3 1 Female 66 Never 37.50 120 0.750000 Yes \n",
|
||||
"3 4 1 Female 77 Never 29.26 93 0.628378 Yes \n",
|
||||
"4 5 1 Female 60 Never 30.02 104 0.662420 Yes \n",
|
||||
"\n",
|
||||
" diab hyperchol famhist hormo p14 toevent event group \n",
|
||||
"0 No Yes Yes No 9 5.538672 No MedDiet + VOO \n",
|
||||
"1 No Yes Yes NaN 10 3.063655 No MedDiet + Nuts \n",
|
||||
"2 Yes No No No 6 5.590691 No MedDiet + Nuts \n",
|
||||
"3 Yes No No No 6 5.456537 No MedDiet + VOO \n",
|
||||
"4 No Yes No No 9 2.746064 No Control "
|
||||
]
|
||||
},
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0b4f6091",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 1. Did the mediterranean diet help prevent cardiovascular events?\n",
|
||||
"\n",
|
||||
"To answer this question, we need to compute how many cardiovascular \"events\" occured in each group of participants, separated by the diet they followed.\n",
|
||||
"In the data the column `event` contains `Yes` or `No`, indicating if that patient had an cardiovascular event. The column `group` contains which diet they followed.\n",
|
||||
"\n",
|
||||
"* Convert the column `event` from string to binary (1 for Yes, 0 for No) (this will ease the calculations that follow later).\n",
|
||||
" Hint: use the method `.map()`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"id": "99b21627-1b48-44ee-bda2-312b0718bd59",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df['event'] = df['event'].map({'Yes': 1, 'No': 0})"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a63e59c5-fe50-433f-a529-f601c795db67",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* Now compute the total number of events by diet group. Compare the numbers and see if you can answer the question."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"id": "00bb9eb1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"group\n",
|
||||
"Control 96\n",
|
||||
"MedDiet + Nuts 69\n",
|
||||
"MedDiet + VOO 83\n",
|
||||
"Name: event, dtype: int64"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.groupby('group')['event'].sum()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b052a40a-ae68-4376-8557-541eafb3face",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"* Check how many patients had each group"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"id": "db946d0f-8204-43a3-853c-41981a9811f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"group\n",
|
||||
"Control 2016\n",
|
||||
"MedDiet + Nuts 2077\n",
|
||||
"MedDiet + VOO 2152\n",
|
||||
"Name: event, dtype: int64"
|
||||
]
|
||||
},
|
||||
"execution_count": 30,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.groupby('group')['event'].count()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "64339449-d766-4a2a-85d3-8aafac5533b7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"There were no equal number in each group, so to be precise we need to put the numbers into perspective of the total. For that:\n",
|
||||
"* Calculate how many events occured relative to the amount of patients in each group (in percentage). \n",
|
||||
"Do this sepearated by diet group."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "13ad4130-2094-4e7a-a416-f0fd6e810413",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"group\n",
|
||||
"Control 4.761905\n",
|
||||
"MedDiet + Nuts 3.322099\n",
|
||||
"MedDiet + VOO 3.856877\n",
|
||||
"Name: event, dtype: float64"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.groupby('group')['event'].sum()*100 / df.groupby('group')['event'].count()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9655c0ac-18e9-4297-9f6e-557bfe95ed5e",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"It seems that the control group had a higher percentage of events than the other two"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1940d3fe",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 2. Smoking\n",
|
||||
"\n",
|
||||
"Did smoking make a difference in the outcome of the study?\n",
|
||||
"Calculate how many events occured by diet group *and* smoking. The idea is that you arrive to a table like this:\n",
|
||||
"\n",
|
||||
"| group | Current | Former | Never |\n",
|
||||
"|:---------------|----------:|---------:|--------:|\n",
|
||||
"| Control | ... | ... | ... |\n",
|
||||
"| MedDiet + Nuts | ... | ... | ... |\n",
|
||||
"| MedDiet + VOO | ... | ... | ... |\n",
|
||||
"\n",
|
||||
"where each entry in the table has the percentage of events for each group\n",
|
||||
"Hint: use `pivot_table`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 32,
|
||||
"id": "5ab4e70e-6261-4a26-8ad9-14eae15be09c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th>smoke</th>\n",
|
||||
" <th>Current</th>\n",
|
||||
" <th>Former</th>\n",
|
||||
" <th>Never</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>group</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>Control</th>\n",
|
||||
" <td>13</td>\n",
|
||||
" <td>39</td>\n",
|
||||
" <td>44</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + Nuts</th>\n",
|
||||
" <td>15</td>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>34</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + VOO</th>\n",
|
||||
" <td>20</td>\n",
|
||||
" <td>29</td>\n",
|
||||
" <td>34</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"smoke Current Former Never\n",
|
||||
"group \n",
|
||||
"Control 13 39 44\n",
|
||||
"MedDiet + Nuts 15 20 34\n",
|
||||
"MedDiet + VOO 20 29 34"
|
||||
]
|
||||
},
|
||||
"execution_count": 32,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"counts = df.pivot_table(index='group', columns='smoke', values='event', aggfunc='sum')\n",
|
||||
"counts"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "f854f24d-9108-42bc-a23b-6b5503f5deba",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th>smoke</th>\n",
|
||||
" <th>Current</th>\n",
|
||||
" <th>Former</th>\n",
|
||||
" <th>Never</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>group</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>Control</th>\n",
|
||||
" <td>264</td>\n",
|
||||
" <td>485</td>\n",
|
||||
" <td>1267</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + Nuts</th>\n",
|
||||
" <td>291</td>\n",
|
||||
" <td>539</td>\n",
|
||||
" <td>1247</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + VOO</th>\n",
|
||||
" <td>290</td>\n",
|
||||
" <td>531</td>\n",
|
||||
" <td>1331</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"smoke Current Former Never\n",
|
||||
"group \n",
|
||||
"Control 264 485 1267\n",
|
||||
"MedDiet + Nuts 291 539 1247\n",
|
||||
"MedDiet + VOO 290 531 1331"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"N = df.pivot_table(index='group', columns='smoke', values='event', aggfunc='count')\n",
|
||||
"N"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "1358f6b8-60a5-44db-ba23-4ef7c7af8455",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th>smoke</th>\n",
|
||||
" <th>Current</th>\n",
|
||||
" <th>Former</th>\n",
|
||||
" <th>Never</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>group</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>Control</th>\n",
|
||||
" <td>4.924242</td>\n",
|
||||
" <td>8.041237</td>\n",
|
||||
" <td>3.472770</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + Nuts</th>\n",
|
||||
" <td>5.154639</td>\n",
|
||||
" <td>3.710575</td>\n",
|
||||
" <td>2.726544</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + VOO</th>\n",
|
||||
" <td>6.896552</td>\n",
|
||||
" <td>5.461394</td>\n",
|
||||
" <td>2.554470</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"smoke Current Former Never\n",
|
||||
"group \n",
|
||||
"Control 4.924242 8.041237 3.472770\n",
|
||||
"MedDiet + Nuts 5.154639 3.710575 2.726544\n",
|
||||
"MedDiet + VOO 6.896552 5.461394 2.554470"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"counts*100/N"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "511c640e-8f0f-449f-af33-85061d89cfd3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# 3. Age differences?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1a04ea5c-8a27-4e67-aafa-ba34580a8d7f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Finally, check that there were no big differences in the age between the groups.\n",
|
||||
"* Calculate the mean and standard deviation of the patient's age, separated by diet group.\n",
|
||||
"\n",
|
||||
"You should be getting a table where diet group are in the rows and gender in columns, like this\n",
|
||||
"\n",
|
||||
"| group | Female | Male |\n",
|
||||
"|:---------------|---------:|-------:|\n",
|
||||
"| Control | 68 | 66.4 |\n",
|
||||
"| MedDiet + Nuts | 67.4 | 65.8 |\n",
|
||||
"| MedDiet + VOO | 67.7 | 66.1 |\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 33,
|
||||
"id": "40d8f79e-5595-4a35-822c-042206bde7db",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th>sex</th>\n",
|
||||
" <th>Female</th>\n",
|
||||
" <th>Male</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>group</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>Control</th>\n",
|
||||
" <td>68.0</td>\n",
|
||||
" <td>66.4</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + Nuts</th>\n",
|
||||
" <td>67.4</td>\n",
|
||||
" <td>65.8</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + VOO</th>\n",
|
||||
" <td>67.7</td>\n",
|
||||
" <td>66.1</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"sex Female Male\n",
|
||||
"group \n",
|
||||
"Control 68.0 66.4\n",
|
||||
"MedDiet + Nuts 67.4 65.8\n",
|
||||
"MedDiet + VOO 67.7 66.1"
|
||||
]
|
||||
},
|
||||
"execution_count": 33,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# this works but it is longer than necessary\n",
|
||||
"df.groupby(['group', 'sex'])['age'].mean().reset_index().pivot_table(index='group', columns='sex', values='age').round(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"id": "196fd111-72bc-4b87-b8fb-293547a8c83d",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th>sex</th>\n",
|
||||
" <th>Female</th>\n",
|
||||
" <th>Male</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>group</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>Control</th>\n",
|
||||
" <td>68.0</td>\n",
|
||||
" <td>66.4</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + Nuts</th>\n",
|
||||
" <td>67.4</td>\n",
|
||||
" <td>65.8</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + VOO</th>\n",
|
||||
" <td>67.7</td>\n",
|
||||
" <td>66.1</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"sex Female Male\n",
|
||||
"group \n",
|
||||
"Control 68.0 66.4\n",
|
||||
"MedDiet + Nuts 67.4 65.8\n",
|
||||
"MedDiet + VOO 67.7 66.1"
|
||||
]
|
||||
},
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# instead of grouping first, do the pivot first and pass the aggregation function as an argument\n",
|
||||
"df.pivot_table(index='group', columns='sex', values='age', aggfunc='mean').round(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 37,
|
||||
"id": "dd5023cd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead tr th {\n",
|
||||
" text-align: left;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead tr:last-of-type th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr>\n",
|
||||
" <th></th>\n",
|
||||
" <th colspan=\"2\" halign=\"left\">mean</th>\n",
|
||||
" <th colspan=\"2\" halign=\"left\">std</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>sex</th>\n",
|
||||
" <th>Female</th>\n",
|
||||
" <th>Male</th>\n",
|
||||
" <th>Female</th>\n",
|
||||
" <th>Male</th>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>group</th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" <th></th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>Control</th>\n",
|
||||
" <td>68.0</td>\n",
|
||||
" <td>66.4</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>6.6</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + Nuts</th>\n",
|
||||
" <td>67.4</td>\n",
|
||||
" <td>65.8</td>\n",
|
||||
" <td>5.6</td>\n",
|
||||
" <td>6.4</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>MedDiet + VOO</th>\n",
|
||||
" <td>67.7</td>\n",
|
||||
" <td>66.1</td>\n",
|
||||
" <td>5.8</td>\n",
|
||||
" <td>6.6</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" mean std \n",
|
||||
"sex Female Male Female Male\n",
|
||||
"group \n",
|
||||
"Control 68.0 66.4 6.0 6.6\n",
|
||||
"MedDiet + Nuts 67.4 65.8 5.6 6.4\n",
|
||||
"MedDiet + VOO 67.7 66.1 5.8 6.6"
|
||||
]
|
||||
},
|
||||
"execution_count": 37,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# to get the standard deviation you could do the same the last time but pass aggfunc = 'std. This will return you another dataframe.\n",
|
||||
"\n",
|
||||
"# Alternatively, you can calculate both mean and S.D. in one step. For that you can pass more than one value for the aggregation function\n",
|
||||
"df.pivot_table(index='group', columns='sex', values='age', aggfunc=['mean', 'std']).round(1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0af53833-f872-4bd1-9c67-90a370dfe6c5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue