tabsplit #19

Open
julioro wants to merge 2 commits from julioro/2025-plovdiv-data:tabsplit into main
Showing only changes of commit 9868fe5fc8 - Show all commits

View file

@ -326,12 +326,24 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"id": "861ac334-14ce-490a-b3c4-877b32789f3e",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"(6324, 16)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"## your code here\n"
"## your code here\n",
"df.shape"
]
},
{
@ -344,12 +356,24 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"id": "14f57842-5722-4953-88d6-d7cf3070400c",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"(6287, 3)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"## your code here\n"
"## your code here\n",
"info.shape"
]
},
{
@ -364,14 +388,13 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 10,
"id": "35e19a53",
"metadata": {},
"outputs": [],
"source": [
"## your code here\n",
"\n",
"\n"
"data_diet = pd.merge(df, info, how='inner', left_on=['location-id', 'patient-id'], right_on=['location-id', 'patient-id'])"
]
},
{
@ -388,7 +411,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "36ce0688-d421-4a07-b00e-0e9b3201f0e0",
"metadata": {},
"outputs": [
@ -456,7 +479,7 @@
"4 5 Malaga"
]
},
"execution_count": 8,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@ -469,12 +492,195 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 11,
"id": "b636dde4-129a-4dd1-8cbf-c539c9c8a5f2",
"metadata": {},
"outputs": [],
"source": [
"## your code here:\n"
"## your code here:\n",
"data_diet_loc = pd.merge(data_diet, locations, how='inner', left_on='location-id', right_on='location-id')"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "e4579355-bdde-40a3-8969-1c27c7bb0eb7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>patient-id</th>\n",
" <th>location-id</th>\n",
" <th>sex</th>\n",
" <th>age</th>\n",
" <th>smoke</th>\n",
" <th>bmi</th>\n",
" <th>waist</th>\n",
" <th>wth</th>\n",
" <th>htn</th>\n",
" <th>diab</th>\n",
" <th>hyperchol</th>\n",
" <th>famhist</th>\n",
" <th>hormo</th>\n",
" <th>p14</th>\n",
" <th>toevent</th>\n",
" <th>event</th>\n",
" <th>group</th>\n",
" <th>City</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>436</td>\n",
" <td>4</td>\n",
" <td>Male</td>\n",
" <td>58</td>\n",
" <td>Former</td>\n",
" <td>33.53</td>\n",
" <td>122</td>\n",
" <td>0.753086</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>10</td>\n",
" <td>5.374401</td>\n",
" <td>Yes</td>\n",
" <td>Control</td>\n",
" <td>Bilbao</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1130</td>\n",
" <td>4</td>\n",
" <td>Male</td>\n",
" <td>77</td>\n",
" <td>Current</td>\n",
" <td>31.05</td>\n",
" <td>119</td>\n",
" <td>0.730061</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>10</td>\n",
" <td>6.097194</td>\n",
" <td>No</td>\n",
" <td>Control</td>\n",
" <td>Bilbao</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1131</td>\n",
" <td>4</td>\n",
" <td>Female</td>\n",
" <td>72</td>\n",
" <td>Former</td>\n",
" <td>30.86</td>\n",
" <td>106</td>\n",
" <td>0.654321</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>8</td>\n",
" <td>5.946612</td>\n",
" <td>No</td>\n",
" <td>MedDiet + VOO</td>\n",
" <td>Bilbao</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1132</td>\n",
" <td>4</td>\n",
" <td>Male</td>\n",
" <td>71</td>\n",
" <td>Former</td>\n",
" <td>27.68</td>\n",
" <td>118</td>\n",
" <td>0.694118</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>8</td>\n",
" <td>2.907598</td>\n",
" <td>Yes</td>\n",
" <td>MedDiet + Nuts</td>\n",
" <td>Bilbao</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1111</td>\n",
" <td>2</td>\n",
" <td>Female</td>\n",
" <td>79</td>\n",
" <td>Never</td>\n",
" <td>35.94</td>\n",
" <td>129</td>\n",
" <td>0.806250</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>9</td>\n",
" <td>4.761123</td>\n",
" <td>No</td>\n",
" <td>MedDiet + VOO</td>\n",
" <td>Valencia</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" patient-id location-id sex age smoke bmi waist wth htn \\\n",
"0 436 4 Male 58 Former 33.53 122 0.753086 No \n",
"1 1130 4 Male 77 Current 31.05 119 0.730061 Yes \n",
"2 1131 4 Female 72 Former 30.86 106 0.654321 No \n",
"3 1132 4 Male 71 Former 27.68 118 0.694118 Yes \n",
"4 1111 2 Female 79 Never 35.94 129 0.806250 Yes \n",
"\n",
" diab hyperchol famhist hormo p14 toevent event group City \n",
"0 No Yes No No 10 5.374401 Yes Control Bilbao \n",
"1 Yes No No No 10 6.097194 No Control Bilbao \n",
"2 Yes No Yes No 8 5.946612 No MedDiet + VOO Bilbao \n",
"3 No Yes No No 8 2.907598 Yes MedDiet + Nuts Bilbao \n",
"4 No Yes No No 9 4.761123 No MedDiet + VOO Valencia "
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_diet_loc.head()"
]
},
{
@ -492,7 +698,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 13,
"id": "d1d4cc27",
"metadata": {},
"outputs": [],
@ -502,7 +708,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 14,
"id": "fbebbd97",
"metadata": {},
"outputs": [
@ -512,7 +718,7 @@
"(42, 2)"
]
},
"execution_count": 11,
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@ -523,7 +729,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 15,
"id": "8a3c7943",
"metadata": {},
"outputs": [
@ -591,7 +797,7 @@
"4 4 541"
]
},
"execution_count": 12,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@ -602,12 +808,20 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 21,
"id": "573687e7",
"metadata": {},
"outputs": [],
"source": [
"# your code here\n"
"# your code here\n",
"data_diet_loc_drop = pd.merge(\n",
" data_diet_loc, \n",
" dropped, \n",
" how='left', \n",
" left_on=['location-id', 'patient-id'], \n",
" right_on=['location-id', 'patient-id'], \n",
" indicator=True\n",
").query('_merge != \"both\"').drop(columns='_merge')"
]
},
{
@ -622,15 +836,24 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 22,
"id": "85902eea",
"metadata": {},
"outputs": [],
"source": [
"fname = 'processed_data_predimed.csv'\n",
"\n",
"# your code here\n"
"# your code here\n",
"data_diet_loc_drop.to_csv(fname)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38e88726-287a-4448-a250-f8616362d070",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
@ -649,7 +872,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
"version": "3.13.7"
}
},
"nbformat": 4,