788 lines
25 KiB
Plaintext
788 lines
25 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "e951a26e",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Exercise: Analysis of tubercolosis cases by country and year period\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "6b181870",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"\n",
|
|
"pd.set_option('display.max_rows', 1000)\n",
|
|
"pd.set_option('display.max_columns', 100)\n",
|
|
"pd.set_option(\"display.max_colwidth\", None)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "9adcc036",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Load the TB data from the World Health Organization"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "5d9e9162",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"tb_raw = pd.read_csv('who2.csv', index_col='rownames')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "cf7691e5",
|
|
"metadata": {},
|
|
"source": [
|
|
"Only keep data between 2000 and 2012"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "a953d230",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"cols = ['country', 'year'] + [c for c in tb_raw.columns if c.startswith('sp')]\n",
|
|
"tb_raw = tb_raw.loc[tb_raw['year'].between(2000, 2012), cols]"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "ba962fb7",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"(2783, 16)"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"tb_raw.shape"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "c79a5b8d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>country</th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>sp_m_014</th>\n",
|
|
" <th>sp_m_1524</th>\n",
|
|
" <th>sp_m_2534</th>\n",
|
|
" <th>sp_m_3544</th>\n",
|
|
" <th>sp_m_4554</th>\n",
|
|
" <th>sp_m_5564</th>\n",
|
|
" <th>sp_m_65</th>\n",
|
|
" <th>sp_f_014</th>\n",
|
|
" <th>sp_f_1524</th>\n",
|
|
" <th>sp_f_2534</th>\n",
|
|
" <th>sp_f_3544</th>\n",
|
|
" <th>sp_f_4554</th>\n",
|
|
" <th>sp_f_5564</th>\n",
|
|
" <th>sp_f_65</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>rownames</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>5551</th>\n",
|
|
" <td>San Marino</td>\n",
|
|
" <td>2009</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>642</th>\n",
|
|
" <td>Belarus</td>\n",
|
|
" <td>2009</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>66.0</td>\n",
|
|
" <td>173.0</td>\n",
|
|
" <td>208.0</td>\n",
|
|
" <td>287.0</td>\n",
|
|
" <td>134.0</td>\n",
|
|
" <td>54.0</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>41.0</td>\n",
|
|
" <td>52.0</td>\n",
|
|
" <td>52.0</td>\n",
|
|
" <td>41.0</td>\n",
|
|
" <td>25.0</td>\n",
|
|
" <td>68.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>7234</th>\n",
|
|
" <td>Zimbabwe</td>\n",
|
|
" <td>2007</td>\n",
|
|
" <td>138.0</td>\n",
|
|
" <td>500.0</td>\n",
|
|
" <td>3693.0</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>716.0</td>\n",
|
|
" <td>292.0</td>\n",
|
|
" <td>153.0</td>\n",
|
|
" <td>185.0</td>\n",
|
|
" <td>739.0</td>\n",
|
|
" <td>3311.0</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>553.0</td>\n",
|
|
" <td>213.0</td>\n",
|
|
" <td>90.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3471</th>\n",
|
|
" <td>Kuwait</td>\n",
|
|
" <td>2008</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>18.0</td>\n",
|
|
" <td>90.0</td>\n",
|
|
" <td>56.0</td>\n",
|
|
" <td>34.0</td>\n",
|
|
" <td>11.0</td>\n",
|
|
" <td>9.0</td>\n",
|
|
" <td>2.0</td>\n",
|
|
" <td>33.0</td>\n",
|
|
" <td>47.0</td>\n",
|
|
" <td>27.0</td>\n",
|
|
" <td>7.0</td>\n",
|
|
" <td>5.0</td>\n",
|
|
" <td>6.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>3336</th>\n",
|
|
" <td>Jordan</td>\n",
|
|
" <td>2009</td>\n",
|
|
" <td>1.0</td>\n",
|
|
" <td>5.0</td>\n",
|
|
" <td>15.0</td>\n",
|
|
" <td>14.0</td>\n",
|
|
" <td>10.0</td>\n",
|
|
" <td>7.0</td>\n",
|
|
" <td>6.0</td>\n",
|
|
" <td>0.0</td>\n",
|
|
" <td>7.0</td>\n",
|
|
" <td>14.0</td>\n",
|
|
" <td>8.0</td>\n",
|
|
" <td>3.0</td>\n",
|
|
" <td>7.0</td>\n",
|
|
" <td>12.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2689</th>\n",
|
|
" <td>Grenada</td>\n",
|
|
" <td>2008</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1.0</td>\n",
|
|
" <td>2.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>1.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>634</th>\n",
|
|
" <td>Belarus</td>\n",
|
|
" <td>2001</td>\n",
|
|
" <td>2.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>4.0</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" country year sp_m_014 sp_m_1524 sp_m_2534 sp_m_3544 \\\n",
|
|
"rownames \n",
|
|
"5551 San Marino 2009 NaN NaN NaN NaN \n",
|
|
"642 Belarus 2009 0.0 66.0 173.0 208.0 \n",
|
|
"7234 Zimbabwe 2007 138.0 500.0 3693.0 0.0 \n",
|
|
"3471 Kuwait 2008 0.0 18.0 90.0 56.0 \n",
|
|
"3336 Jordan 2009 1.0 5.0 15.0 14.0 \n",
|
|
"2689 Grenada 2008 NaN 1.0 NaN 1.0 \n",
|
|
"634 Belarus 2001 2.0 NaN NaN NaN \n",
|
|
"\n",
|
|
" sp_m_4554 sp_m_5564 sp_m_65 sp_f_014 sp_f_1524 sp_f_2534 \\\n",
|
|
"rownames \n",
|
|
"5551 NaN NaN NaN NaN NaN NaN \n",
|
|
"642 287.0 134.0 54.0 0.0 41.0 52.0 \n",
|
|
"7234 716.0 292.0 153.0 185.0 739.0 3311.0 \n",
|
|
"3471 34.0 11.0 9.0 2.0 33.0 47.0 \n",
|
|
"3336 10.0 7.0 6.0 0.0 7.0 14.0 \n",
|
|
"2689 2.0 NaN 1.0 NaN NaN NaN \n",
|
|
"634 NaN NaN NaN 4.0 NaN NaN \n",
|
|
"\n",
|
|
" sp_f_3544 sp_f_4554 sp_f_5564 sp_f_65 \n",
|
|
"rownames \n",
|
|
"5551 NaN NaN NaN NaN \n",
|
|
"642 52.0 41.0 25.0 68.0 \n",
|
|
"7234 0.0 553.0 213.0 90.0 \n",
|
|
"3471 27.0 7.0 5.0 6.0 \n",
|
|
"3336 8.0 3.0 7.0 12.0 \n",
|
|
"2689 NaN NaN NaN NaN \n",
|
|
"634 NaN NaN NaN NaN "
|
|
]
|
|
},
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"tb_raw.sample(7, random_state=727)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "6e8b1d89",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>country</th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>sp_m_014</th>\n",
|
|
" <th>sp_m_1524</th>\n",
|
|
" <th>sp_m_2534</th>\n",
|
|
" <th>sp_m_3544</th>\n",
|
|
" <th>sp_m_4554</th>\n",
|
|
" <th>sp_m_5564</th>\n",
|
|
" <th>sp_m_65</th>\n",
|
|
" <th>sp_f_014</th>\n",
|
|
" <th>sp_f_1524</th>\n",
|
|
" <th>sp_f_2534</th>\n",
|
|
" <th>sp_f_3544</th>\n",
|
|
" <th>sp_f_4554</th>\n",
|
|
" <th>sp_f_5564</th>\n",
|
|
" <th>sp_f_65</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>rownames</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>191</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2000</td>\n",
|
|
" <td>186.0</td>\n",
|
|
" <td>999.0</td>\n",
|
|
" <td>1003.0</td>\n",
|
|
" <td>912.0</td>\n",
|
|
" <td>482.0</td>\n",
|
|
" <td>312.0</td>\n",
|
|
" <td>194.0</td>\n",
|
|
" <td>247.0</td>\n",
|
|
" <td>1142.0</td>\n",
|
|
" <td>1091.0</td>\n",
|
|
" <td>844.0</td>\n",
|
|
" <td>417.0</td>\n",
|
|
" <td>200.0</td>\n",
|
|
" <td>120.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>192</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2001</td>\n",
|
|
" <td>230.0</td>\n",
|
|
" <td>892.0</td>\n",
|
|
" <td>752.0</td>\n",
|
|
" <td>648.0</td>\n",
|
|
" <td>420.0</td>\n",
|
|
" <td>197.0</td>\n",
|
|
" <td>173.0</td>\n",
|
|
" <td>279.0</td>\n",
|
|
" <td>993.0</td>\n",
|
|
" <td>869.0</td>\n",
|
|
" <td>647.0</td>\n",
|
|
" <td>323.0</td>\n",
|
|
" <td>200.0</td>\n",
|
|
" <td>182.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>193</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2002</td>\n",
|
|
" <td>435.0</td>\n",
|
|
" <td>2223.0</td>\n",
|
|
" <td>2292.0</td>\n",
|
|
" <td>1915.0</td>\n",
|
|
" <td>1187.0</td>\n",
|
|
" <td>624.0</td>\n",
|
|
" <td>444.0</td>\n",
|
|
" <td>640.0</td>\n",
|
|
" <td>2610.0</td>\n",
|
|
" <td>2208.0</td>\n",
|
|
" <td>1600.0</td>\n",
|
|
" <td>972.0</td>\n",
|
|
" <td>533.0</td>\n",
|
|
" <td>305.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>194</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2003</td>\n",
|
|
" <td>409.0</td>\n",
|
|
" <td>2355.0</td>\n",
|
|
" <td>2598.0</td>\n",
|
|
" <td>1908.0</td>\n",
|
|
" <td>1090.0</td>\n",
|
|
" <td>512.0</td>\n",
|
|
" <td>361.0</td>\n",
|
|
" <td>591.0</td>\n",
|
|
" <td>3078.0</td>\n",
|
|
" <td>2641.0</td>\n",
|
|
" <td>1747.0</td>\n",
|
|
" <td>1157.0</td>\n",
|
|
" <td>395.0</td>\n",
|
|
" <td>129.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>195</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2004</td>\n",
|
|
" <td>554.0</td>\n",
|
|
" <td>2684.0</td>\n",
|
|
" <td>2659.0</td>\n",
|
|
" <td>1998.0</td>\n",
|
|
" <td>1196.0</td>\n",
|
|
" <td>561.0</td>\n",
|
|
" <td>321.0</td>\n",
|
|
" <td>733.0</td>\n",
|
|
" <td>3198.0</td>\n",
|
|
" <td>2772.0</td>\n",
|
|
" <td>1854.0</td>\n",
|
|
" <td>1029.0</td>\n",
|
|
" <td>505.0</td>\n",
|
|
" <td>269.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>196</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2005</td>\n",
|
|
" <td>520.0</td>\n",
|
|
" <td>2549.0</td>\n",
|
|
" <td>2797.0</td>\n",
|
|
" <td>1918.0</td>\n",
|
|
" <td>1255.0</td>\n",
|
|
" <td>665.0</td>\n",
|
|
" <td>461.0</td>\n",
|
|
" <td>704.0</td>\n",
|
|
" <td>2926.0</td>\n",
|
|
" <td>2682.0</td>\n",
|
|
" <td>1797.0</td>\n",
|
|
" <td>1138.0</td>\n",
|
|
" <td>581.0</td>\n",
|
|
" <td>417.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>197</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2006</td>\n",
|
|
" <td>540.0</td>\n",
|
|
" <td>2632.0</td>\n",
|
|
" <td>3049.0</td>\n",
|
|
" <td>2182.0</td>\n",
|
|
" <td>1397.0</td>\n",
|
|
" <td>729.0</td>\n",
|
|
" <td>428.0</td>\n",
|
|
" <td>689.0</td>\n",
|
|
" <td>2851.0</td>\n",
|
|
" <td>2892.0</td>\n",
|
|
" <td>1990.0</td>\n",
|
|
" <td>1223.0</td>\n",
|
|
" <td>583.0</td>\n",
|
|
" <td>314.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>198</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2007</td>\n",
|
|
" <td>484.0</td>\n",
|
|
" <td>2824.0</td>\n",
|
|
" <td>3197.0</td>\n",
|
|
" <td>2255.0</td>\n",
|
|
" <td>1357.0</td>\n",
|
|
" <td>699.0</td>\n",
|
|
" <td>465.0</td>\n",
|
|
" <td>703.0</td>\n",
|
|
" <td>2943.0</td>\n",
|
|
" <td>2721.0</td>\n",
|
|
" <td>1812.0</td>\n",
|
|
" <td>1041.0</td>\n",
|
|
" <td>554.0</td>\n",
|
|
" <td>367.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>199</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2008</td>\n",
|
|
" <td>367.0</td>\n",
|
|
" <td>2970.0</td>\n",
|
|
" <td>3493.0</td>\n",
|
|
" <td>2418.0</td>\n",
|
|
" <td>1480.0</td>\n",
|
|
" <td>733.0</td>\n",
|
|
" <td>420.0</td>\n",
|
|
" <td>512.0</td>\n",
|
|
" <td>3199.0</td>\n",
|
|
" <td>2786.0</td>\n",
|
|
" <td>2082.0</td>\n",
|
|
" <td>1209.0</td>\n",
|
|
" <td>556.0</td>\n",
|
|
" <td>337.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>200</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2009</td>\n",
|
|
" <td>392.0</td>\n",
|
|
" <td>3054.0</td>\n",
|
|
" <td>3600.0</td>\n",
|
|
" <td>2420.0</td>\n",
|
|
" <td>1590.0</td>\n",
|
|
" <td>748.0</td>\n",
|
|
" <td>463.0</td>\n",
|
|
" <td>568.0</td>\n",
|
|
" <td>3152.0</td>\n",
|
|
" <td>2798.0</td>\n",
|
|
" <td>1790.0</td>\n",
|
|
" <td>1069.0</td>\n",
|
|
" <td>572.0</td>\n",
|
|
" <td>272.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>201</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2010</td>\n",
|
|
" <td>448.0</td>\n",
|
|
" <td>2900.0</td>\n",
|
|
" <td>3584.0</td>\n",
|
|
" <td>2415.0</td>\n",
|
|
" <td>1424.0</td>\n",
|
|
" <td>691.0</td>\n",
|
|
" <td>355.0</td>\n",
|
|
" <td>558.0</td>\n",
|
|
" <td>2763.0</td>\n",
|
|
" <td>2594.0</td>\n",
|
|
" <td>1688.0</td>\n",
|
|
" <td>958.0</td>\n",
|
|
" <td>482.0</td>\n",
|
|
" <td>286.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>202</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2011</td>\n",
|
|
" <td>501.0</td>\n",
|
|
" <td>3000.0</td>\n",
|
|
" <td>3792.0</td>\n",
|
|
" <td>2386.0</td>\n",
|
|
" <td>1395.0</td>\n",
|
|
" <td>680.0</td>\n",
|
|
" <td>455.0</td>\n",
|
|
" <td>708.0</td>\n",
|
|
" <td>2731.0</td>\n",
|
|
" <td>2563.0</td>\n",
|
|
" <td>1683.0</td>\n",
|
|
" <td>1006.0</td>\n",
|
|
" <td>457.0</td>\n",
|
|
" <td>346.0</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>203</th>\n",
|
|
" <td>Angola</td>\n",
|
|
" <td>2012</td>\n",
|
|
" <td>390.0</td>\n",
|
|
" <td>2804.0</td>\n",
|
|
" <td>3627.0</td>\n",
|
|
" <td>2529.0</td>\n",
|
|
" <td>1427.0</td>\n",
|
|
" <td>732.0</td>\n",
|
|
" <td>424.0</td>\n",
|
|
" <td>592.0</td>\n",
|
|
" <td>2501.0</td>\n",
|
|
" <td>2540.0</td>\n",
|
|
" <td>1617.0</td>\n",
|
|
" <td>1028.0</td>\n",
|
|
" <td>529.0</td>\n",
|
|
" <td>384.0</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" country year sp_m_014 sp_m_1524 sp_m_2534 sp_m_3544 sp_m_4554 \\\n",
|
|
"rownames \n",
|
|
"191 Angola 2000 186.0 999.0 1003.0 912.0 482.0 \n",
|
|
"192 Angola 2001 230.0 892.0 752.0 648.0 420.0 \n",
|
|
"193 Angola 2002 435.0 2223.0 2292.0 1915.0 1187.0 \n",
|
|
"194 Angola 2003 409.0 2355.0 2598.0 1908.0 1090.0 \n",
|
|
"195 Angola 2004 554.0 2684.0 2659.0 1998.0 1196.0 \n",
|
|
"196 Angola 2005 520.0 2549.0 2797.0 1918.0 1255.0 \n",
|
|
"197 Angola 2006 540.0 2632.0 3049.0 2182.0 1397.0 \n",
|
|
"198 Angola 2007 484.0 2824.0 3197.0 2255.0 1357.0 \n",
|
|
"199 Angola 2008 367.0 2970.0 3493.0 2418.0 1480.0 \n",
|
|
"200 Angola 2009 392.0 3054.0 3600.0 2420.0 1590.0 \n",
|
|
"201 Angola 2010 448.0 2900.0 3584.0 2415.0 1424.0 \n",
|
|
"202 Angola 2011 501.0 3000.0 3792.0 2386.0 1395.0 \n",
|
|
"203 Angola 2012 390.0 2804.0 3627.0 2529.0 1427.0 \n",
|
|
"\n",
|
|
" sp_m_5564 sp_m_65 sp_f_014 sp_f_1524 sp_f_2534 sp_f_3544 \\\n",
|
|
"rownames \n",
|
|
"191 312.0 194.0 247.0 1142.0 1091.0 844.0 \n",
|
|
"192 197.0 173.0 279.0 993.0 869.0 647.0 \n",
|
|
"193 624.0 444.0 640.0 2610.0 2208.0 1600.0 \n",
|
|
"194 512.0 361.0 591.0 3078.0 2641.0 1747.0 \n",
|
|
"195 561.0 321.0 733.0 3198.0 2772.0 1854.0 \n",
|
|
"196 665.0 461.0 704.0 2926.0 2682.0 1797.0 \n",
|
|
"197 729.0 428.0 689.0 2851.0 2892.0 1990.0 \n",
|
|
"198 699.0 465.0 703.0 2943.0 2721.0 1812.0 \n",
|
|
"199 733.0 420.0 512.0 3199.0 2786.0 2082.0 \n",
|
|
"200 748.0 463.0 568.0 3152.0 2798.0 1790.0 \n",
|
|
"201 691.0 355.0 558.0 2763.0 2594.0 1688.0 \n",
|
|
"202 680.0 455.0 708.0 2731.0 2563.0 1683.0 \n",
|
|
"203 732.0 424.0 592.0 2501.0 2540.0 1617.0 \n",
|
|
"\n",
|
|
" sp_f_4554 sp_f_5564 sp_f_65 \n",
|
|
"rownames \n",
|
|
"191 417.0 200.0 120.0 \n",
|
|
"192 323.0 200.0 182.0 \n",
|
|
"193 972.0 533.0 305.0 \n",
|
|
"194 1157.0 395.0 129.0 \n",
|
|
"195 1029.0 505.0 269.0 \n",
|
|
"196 1138.0 581.0 417.0 \n",
|
|
"197 1223.0 583.0 314.0 \n",
|
|
"198 1041.0 554.0 367.0 \n",
|
|
"199 1209.0 556.0 337.0 \n",
|
|
"200 1069.0 572.0 272.0 \n",
|
|
"201 958.0 482.0 286.0 \n",
|
|
"202 1006.0 457.0 346.0 \n",
|
|
"203 1028.0 529.0 384.0 "
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"tb_raw[tb_raw['country'] == 'Angola']"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "116c47ad",
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"Index(['country', 'year', 'sp_m_014', 'sp_m_1524', 'sp_m_2534', 'sp_m_3544',\n",
|
|
" 'sp_m_4554', 'sp_m_5564', 'sp_m_65', 'sp_f_014', 'sp_f_1524',\n",
|
|
" 'sp_f_2534', 'sp_f_3544', 'sp_f_4554', 'sp_f_5564', 'sp_f_65'],\n",
|
|
" dtype='object')"
|
|
]
|
|
},
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"tb_raw.columns"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "062ed46a",
|
|
"metadata": {},
|
|
"source": [
|
|
"# 1. Make data tidy\n",
|
|
"\n",
|
|
"The final table should have these columns: `country`, `year`, `gender`, `age_range`, `cases`"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "568c8440",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "9d1f036e",
|
|
"metadata": {},
|
|
"source": [
|
|
"# 2. Compute summary tables\n",
|
|
"\n",
|
|
"1. Compute the number of cases per country and gender, for data between 2000 and 2006 (included)\n",
|
|
"2. Compute the number of cases per country and year range (2000-2006, 2007-2012) on rows, and gender on columns"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "c8e9b0e4",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.3"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|