Combining patient, diet and location data

This commit is contained in:
ASPP Student 2025-09-24 13:04:28 +03:00
parent 874f984bd5
commit 9868fe5fc8

View file

@ -326,12 +326,24 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 6,
"id": "861ac334-14ce-490a-b3c4-877b32789f3e", "id": "861ac334-14ce-490a-b3c4-877b32789f3e",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"(6324, 16)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"## your code here\n" "## your code here\n",
"df.shape"
] ]
}, },
{ {
@ -344,12 +356,24 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 7,
"id": "14f57842-5722-4953-88d6-d7cf3070400c", "id": "14f57842-5722-4953-88d6-d7cf3070400c",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"(6287, 3)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"## your code here\n" "## your code here\n",
"info.shape"
] ]
}, },
{ {
@ -364,14 +388,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 10,
"id": "35e19a53", "id": "35e19a53",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"## your code here\n", "## your code here\n",
"\n", "data_diet = pd.merge(df, info, how='inner', left_on=['location-id', 'patient-id'], right_on=['location-id', 'patient-id'])"
"\n"
] ]
}, },
{ {
@ -388,7 +411,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 9,
"id": "36ce0688-d421-4a07-b00e-0e9b3201f0e0", "id": "36ce0688-d421-4a07-b00e-0e9b3201f0e0",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -456,7 +479,7 @@
"4 5 Malaga" "4 5 Malaga"
] ]
}, },
"execution_count": 8, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -469,12 +492,195 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 11,
"id": "b636dde4-129a-4dd1-8cbf-c539c9c8a5f2", "id": "b636dde4-129a-4dd1-8cbf-c539c9c8a5f2",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"## your code here:\n" "## your code here:\n",
"data_diet_loc = pd.merge(data_diet, locations, how='inner', left_on='location-id', right_on='location-id')"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "e4579355-bdde-40a3-8969-1c27c7bb0eb7",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>patient-id</th>\n",
" <th>location-id</th>\n",
" <th>sex</th>\n",
" <th>age</th>\n",
" <th>smoke</th>\n",
" <th>bmi</th>\n",
" <th>waist</th>\n",
" <th>wth</th>\n",
" <th>htn</th>\n",
" <th>diab</th>\n",
" <th>hyperchol</th>\n",
" <th>famhist</th>\n",
" <th>hormo</th>\n",
" <th>p14</th>\n",
" <th>toevent</th>\n",
" <th>event</th>\n",
" <th>group</th>\n",
" <th>City</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>436</td>\n",
" <td>4</td>\n",
" <td>Male</td>\n",
" <td>58</td>\n",
" <td>Former</td>\n",
" <td>33.53</td>\n",
" <td>122</td>\n",
" <td>0.753086</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>10</td>\n",
" <td>5.374401</td>\n",
" <td>Yes</td>\n",
" <td>Control</td>\n",
" <td>Bilbao</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1130</td>\n",
" <td>4</td>\n",
" <td>Male</td>\n",
" <td>77</td>\n",
" <td>Current</td>\n",
" <td>31.05</td>\n",
" <td>119</td>\n",
" <td>0.730061</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>10</td>\n",
" <td>6.097194</td>\n",
" <td>No</td>\n",
" <td>Control</td>\n",
" <td>Bilbao</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1131</td>\n",
" <td>4</td>\n",
" <td>Female</td>\n",
" <td>72</td>\n",
" <td>Former</td>\n",
" <td>30.86</td>\n",
" <td>106</td>\n",
" <td>0.654321</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>8</td>\n",
" <td>5.946612</td>\n",
" <td>No</td>\n",
" <td>MedDiet + VOO</td>\n",
" <td>Bilbao</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1132</td>\n",
" <td>4</td>\n",
" <td>Male</td>\n",
" <td>71</td>\n",
" <td>Former</td>\n",
" <td>27.68</td>\n",
" <td>118</td>\n",
" <td>0.694118</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>8</td>\n",
" <td>2.907598</td>\n",
" <td>Yes</td>\n",
" <td>MedDiet + Nuts</td>\n",
" <td>Bilbao</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1111</td>\n",
" <td>2</td>\n",
" <td>Female</td>\n",
" <td>79</td>\n",
" <td>Never</td>\n",
" <td>35.94</td>\n",
" <td>129</td>\n",
" <td>0.806250</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>9</td>\n",
" <td>4.761123</td>\n",
" <td>No</td>\n",
" <td>MedDiet + VOO</td>\n",
" <td>Valencia</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" patient-id location-id sex age smoke bmi waist wth htn \\\n",
"0 436 4 Male 58 Former 33.53 122 0.753086 No \n",
"1 1130 4 Male 77 Current 31.05 119 0.730061 Yes \n",
"2 1131 4 Female 72 Former 30.86 106 0.654321 No \n",
"3 1132 4 Male 71 Former 27.68 118 0.694118 Yes \n",
"4 1111 2 Female 79 Never 35.94 129 0.806250 Yes \n",
"\n",
" diab hyperchol famhist hormo p14 toevent event group City \n",
"0 No Yes No No 10 5.374401 Yes Control Bilbao \n",
"1 Yes No No No 10 6.097194 No Control Bilbao \n",
"2 Yes No Yes No 8 5.946612 No MedDiet + VOO Bilbao \n",
"3 No Yes No No 8 2.907598 Yes MedDiet + Nuts Bilbao \n",
"4 No Yes No No 9 4.761123 No MedDiet + VOO Valencia "
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_diet_loc.head()"
] ]
}, },
{ {
@ -492,7 +698,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 13,
"id": "d1d4cc27", "id": "d1d4cc27",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -502,7 +708,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 14,
"id": "fbebbd97", "id": "fbebbd97",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -512,7 +718,7 @@
"(42, 2)" "(42, 2)"
] ]
}, },
"execution_count": 11, "execution_count": 14,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -523,7 +729,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 15,
"id": "8a3c7943", "id": "8a3c7943",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -591,7 +797,7 @@
"4 4 541" "4 4 541"
] ]
}, },
"execution_count": 12, "execution_count": 15,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -602,12 +808,20 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 13, "execution_count": 21,
"id": "573687e7", "id": "573687e7",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# your code here\n" "# your code here\n",
"data_diet_loc_drop = pd.merge(\n",
" data_diet_loc, \n",
" dropped, \n",
" how='left', \n",
" left_on=['location-id', 'patient-id'], \n",
" right_on=['location-id', 'patient-id'], \n",
" indicator=True\n",
").query('_merge != \"both\"').drop(columns='_merge')"
] ]
}, },
{ {
@ -622,15 +836,24 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 22,
"id": "85902eea", "id": "85902eea",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"fname = 'processed_data_predimed.csv'\n", "fname = 'processed_data_predimed.csv'\n",
"\n", "\n",
"# your code here\n" "# your code here\n",
"data_diet_loc_drop.to_csv(fname)"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"id": "38e88726-287a-4448-a250-f8616362d070",
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
@ -649,7 +872,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.11.5" "version": "3.13.7"
} }
}, },
"nbformat": 4, "nbformat": 4,