2024-heraklion-data/notebooks/030_tabular_data/011_pandas_introduction_tutor.ipynb

317 lines
5.5 KiB
Plaintext
Raw Normal View History

2024-08-27 14:27:53 +02:00
{
"cells": [
{
"cell_type": "markdown",
"id": "8cc1c960",
"metadata": {},
"source": [
"# Pandas, quick introduction"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "0f55dab1",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"id": "4b377c42",
"metadata": {},
"source": [
"# Pandas introduces a tabular data structure, the DataFrame\n",
"\n",
"* Columns can be of any C-native type\n",
"* Columns and rows have indices, i.e. labels that identify each column or row"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "ec75edbe",
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(\n",
" data = [\n",
" ['Anthony', 28, 1.53], \n",
" ['Maria', 31, 1.76], \n",
" ['Emma', 26, 1.83], \n",
" ['Philip', 41, 1.81], \n",
" ['Bill', 27, None],\n",
" ],\n",
" columns = ['name', 'age', 'height'],\n",
" index=['A484', 'C012', 'A123', 'B663', 'A377'],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "37318480",
"metadata": {},
"outputs": [],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "fe1c5739",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "dedad6f3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "e31f21c6",
"metadata": {},
"source": [
"## DataFrame attributes"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4109f1eb",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "708f9bb5",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "cb2f33b9",
"metadata": {},
"source": [
"## Indexing rows and columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "19ef2738",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "8f354ffc",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "94563f03",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "43ab5233",
"metadata": {},
"source": [
"## Examining a column"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f2cb544c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "86388f86",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "fc081b90",
"metadata": {},
"source": [
"# Filtering"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "263ae06c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "318da062",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "a570023a",
"metadata": {},
"source": [
"# Basic operations are by column (unlike NumPy)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7260d212",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "49b7057a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "f5a0f053",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e1ffe32",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "7cf9b5d7",
"metadata": {},
"source": [
"# Operations on strings"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b78bc237",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "0236069f",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "5761725b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce3d54ad",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "8c5584db",
"metadata": {},
"source": [
"# Adding new columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6e09176",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "f9a552f0",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "2e354ace",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}