diff --git a/exercises/tabular_join/tabular_join.ipynb b/exercises/tabular_join/tabular_join.ipynb index 6cf3ed9..9e9640a 100644 --- a/exercises/tabular_join/tabular_join.ipynb +++ b/exercises/tabular_join/tabular_join.ipynb @@ -326,12 +326,24 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "861ac334-14ce-490a-b3c4-877b32789f3e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(6324, 16)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## your code here\n" + "## your code here\n", + "df.shape" ] }, { @@ -344,12 +356,24 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "id": "14f57842-5722-4953-88d6-d7cf3070400c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(6287, 3)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## your code here\n" + "## your code here\n", + "info.shape" ] }, { @@ -364,14 +388,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "id": "35e19a53", "metadata": {}, "outputs": [], "source": [ "## your code here\n", - "\n", - "\n" + "data_diet = pd.merge(df, info, how='inner', left_on=['location-id', 'patient-id'], right_on=['location-id', 'patient-id'])" ] }, { @@ -388,7 +411,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "id": "36ce0688-d421-4a07-b00e-0e9b3201f0e0", "metadata": {}, "outputs": [ @@ -456,7 +479,7 @@ "4 5 Malaga" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -469,12 +492,195 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "b636dde4-129a-4dd1-8cbf-c539c9c8a5f2", "metadata": {}, "outputs": [], "source": [ - "## your code here:\n" + "## your code here:\n", + "data_diet_loc = pd.merge(data_diet, locations, how='inner', left_on='location-id', right_on='location-id')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "e4579355-bdde-40a3-8969-1c27c7bb0eb7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
patient-idlocation-idsexagesmokebmiwaistwthhtndiabhypercholfamhisthormop14toeventeventgroupCity
04364Male58Former33.531220.753086NoNoYesNoNo105.374401YesControlBilbao
111304Male77Current31.051190.730061YesYesNoNoNo106.097194NoControlBilbao
211314Female72Former30.861060.654321NoYesNoYesNo85.946612NoMedDiet + VOOBilbao
311324Male71Former27.681180.694118YesNoYesNoNo82.907598YesMedDiet + NutsBilbao
411112Female79Never35.941290.806250YesNoYesNoNo94.761123NoMedDiet + VOOValencia
\n", + "
" + ], + "text/plain": [ + " patient-id location-id sex age smoke bmi waist wth htn \\\n", + "0 436 4 Male 58 Former 33.53 122 0.753086 No \n", + "1 1130 4 Male 77 Current 31.05 119 0.730061 Yes \n", + "2 1131 4 Female 72 Former 30.86 106 0.654321 No \n", + "3 1132 4 Male 71 Former 27.68 118 0.694118 Yes \n", + "4 1111 2 Female 79 Never 35.94 129 0.806250 Yes \n", + "\n", + " diab hyperchol famhist hormo p14 toevent event group City \n", + "0 No Yes No No 10 5.374401 Yes Control Bilbao \n", + "1 Yes No No No 10 6.097194 No Control Bilbao \n", + "2 Yes No Yes No 8 5.946612 No MedDiet + VOO Bilbao \n", + "3 No Yes No No 8 2.907598 Yes MedDiet + Nuts Bilbao \n", + "4 No Yes No No 9 4.761123 No MedDiet + VOO Valencia " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data_diet_loc.head()" ] }, { @@ -492,7 +698,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 13, "id": "d1d4cc27", "metadata": {}, "outputs": [], @@ -502,7 +708,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 14, "id": "fbebbd97", "metadata": {}, "outputs": [ @@ -512,7 +718,7 @@ "(42, 2)" ] }, - "execution_count": 11, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -523,7 +729,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 15, "id": "8a3c7943", "metadata": {}, "outputs": [ @@ -591,7 +797,7 @@ "4 4 541" ] }, - "execution_count": 12, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -602,12 +808,20 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 21, "id": "573687e7", "metadata": {}, "outputs": [], "source": [ - "# your code here\n" + "# your code here\n", + "data_diet_loc_drop = pd.merge(\n", + " data_diet_loc, \n", + " dropped, \n", + " how='left', \n", + " left_on=['location-id', 'patient-id'], \n", + " right_on=['location-id', 'patient-id'], \n", + " indicator=True\n", + ").query('_merge != \"both\"').drop(columns='_merge')" ] }, { @@ -622,15 +836,24 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 22, "id": "85902eea", "metadata": {}, "outputs": [], "source": [ "fname = 'processed_data_predimed.csv'\n", "\n", - "# your code here\n" + "# your code here\n", + "data_diet_loc_drop.to_csv(fname)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38e88726-287a-4448-a250-f8616362d070", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -649,7 +872,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.13.7" } }, "nbformat": 4, diff --git a/exercises/tabular_split_apply_combine/split_apply_combine.ipynb b/exercises/tabular_split_apply_combine/split_apply_combine.ipynb index 190b37c..ccd1996 100644 --- a/exercises/tabular_split_apply_combine/split_apply_combine.ipynb +++ b/exercises/tabular_split_apply_combine/split_apply_combine.ipynb @@ -275,12 +275,13 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 12, "id": "00bb9eb1", "metadata": {}, "outputs": [], "source": [ - "# your code here:\n" + "# your code here:\n", + "event_by_group = df.groupby('group')['event'].sum()" ] }, { @@ -293,12 +294,13 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 13, "id": "db946d0f-8204-43a3-853c-41981a9811f4", "metadata": {}, "outputs": [], "source": [ - "# your code here:\n" + "# your code here:\n", + "group_size = df.groupby('group').size()" ] }, { @@ -313,12 +315,28 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 15, "id": "13ad4130-2094-4e7a-a416-f0fd6e810413", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "group\n", + "Control 4.761905\n", + "MedDiet + Nuts 3.322099\n", + "MedDiet + VOO 3.856877\n", + "dtype: float64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here:\n" + "# your code here:\n", + "event_by_group / group_size * 100" ] }, { @@ -352,12 +370,81 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 20, "id": "5ab4e70e-6261-4a26-8ad9-14eae15be09c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
smokeCurrentFormerNever
group
Control133944
MedDiet + Nuts152034
MedDiet + VOO202934
\n", + "
" + ], + "text/plain": [ + "smoke Current Former Never\n", + "group \n", + "Control 13 39 44\n", + "MedDiet + Nuts 15 20 34\n", + "MedDiet + VOO 20 29 34" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here\n" + "# your code here\n", + "df.pivot_table(index='group', columns='smoke', values='event',aggfunc='sum')" ] }, { @@ -387,12 +474,96 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 22, "id": "196fd111-72bc-4b87-b8fb-293547a8c83d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
meanstd
sexFemaleMaleFemaleMale
group
Control68.00904666.4000005.9793136.605266
MedDiet + Nuts67.41459165.8226655.5800506.403373
MedDiet + VOO67.66877566.0800455.8167036.621440
\n", + "
" + ], + "text/plain": [ + " mean std \n", + "sex Female Male Female Male\n", + "group \n", + "Control 68.009046 66.400000 5.979313 6.605266\n", + "MedDiet + Nuts 67.414591 65.822665 5.580050 6.403373\n", + "MedDiet + VOO 67.668775 66.080045 5.816703 6.621440" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# your code here:\n" + "# your code here:\n", + "df.pivot_table(index='group', columns='sex', values='age', aggfunc=['mean','std'])\n" ] } ], @@ -412,7 +583,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.5" + "version": "3.13.7" } }, "nbformat": 4,