ASPP 2024 material

This commit is contained in:
Pietro Berkes 2024-08-27 15:27:53 +03:00
commit 1f6bc07c51
90 changed files with 91689 additions and 0 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

6
LICENSE.txt Normal file
View file

@ -0,0 +1,6 @@
The material in this repository is released under the
CC Attribution-Share Alike 4.0 International
license.
Full license text available at
https://creativecommons.org/licenses/by-sa/4.0/

2
README.md Normal file
View file

@ -0,0 +1,2 @@
# data_class
Data structures, numpy arrays, tidy data, and more

BIN
data_complete.pdf Normal file

Binary file not shown.

BIN
exercises/.DS_Store vendored Normal file

Binary file not shown.

View file

@ -0,0 +1,168 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "373ed1db",
"metadata": {},
"source": [
"Exercise: given 2 decks of tarod cards, `deck1` and `deck2`, find all the matching pairs. The output should be a set of tuples `(idx1, idx2)` for every matching pair in `deck1`, `deck2`.\n",
"\n",
"For examples:\n",
"```\n",
"deck1 = ['C', 'B', 'A']\n",
"deck2 = ['A', 'C', 'B']\n",
"```\n",
"\n",
"should return (in no particular order):\n",
"\n",
"```\n",
"{(0, 1), (1, 2), (2, 0)}\n",
"```\n",
"\n",
"Compute the Big-O complexity of your algorithm.\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cf05b9c4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Deck 1: ['The Fool', 'Death', 'The Lovers', 'Judgement', 'Temperance', 'The Sun', 'The Hermit', 'The Chariot', 'The Tower', 'Wheel of Fortune', 'The Devil', 'The Emperor', 'The Empress', 'The World', 'The Hierophant', 'The High Priestess', 'The Star', 'The Hanged Man', 'Strength', 'Justice', 'The Moon', 'The Magician']\n",
"Deck 2: ['The Hermit', 'The Sun', 'The Tower', 'The Empress', 'The Star', 'The Emperor', 'The Magician', 'The Chariot', 'The Devil', 'The Moon', 'Judgement', 'Death', 'The Fool', 'Strength', 'Temperance', 'The Hierophant', 'The Lovers', 'Justice', 'Wheel of Fortune', 'The High Priestess', 'The Hanged Man', 'The World']\n"
]
}
],
"source": [
"import random\n",
"\n",
"# List of tarot card names (Major Arcana)\n",
"tarot_cards = [\n",
" \"The Fool\", \"The Magician\", \"The High Priestess\", \"The Empress\", \"The Emperor\",\n",
" \"The Hierophant\", \"The Lovers\", \"The Chariot\", \"Strength\", \"The Hermit\",\n",
" \"Wheel of Fortune\", \"Justice\", \"The Hanged Man\", \"Death\", \"Temperance\",\n",
" \"The Devil\", \"The Tower\", \"The Star\", \"The Moon\", \"The Sun\", \"Judgement\",\n",
" \"The World\"\n",
"]\n",
"\n",
"# Copy the list to create two separate decks\n",
"deck1 = tarot_cards.copy()\n",
"deck2 = tarot_cards.copy()\n",
"\n",
"# Shuffle both decks\n",
"random.shuffle(deck1)\n",
"random.shuffle(deck2)\n",
"\n",
"# Print the shuffled decks\n",
"print(\"Deck 1:\", deck1)\n",
"print(\"Deck 2:\", deck2)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "7685740c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[[0, 12],\n",
" [1, 11],\n",
" [2, 16],\n",
" [3, 10],\n",
" [4, 14],\n",
" [5, 1],\n",
" [6, 0],\n",
" [7, 7],\n",
" [8, 2],\n",
" [9, 18],\n",
" [10, 8],\n",
" [11, 5],\n",
" [12, 3],\n",
" [13, 21],\n",
" [14, 15],\n",
" [15, 19],\n",
" [16, 4],\n",
" [17, 20],\n",
" [18, 13],\n",
" [19, 17],\n",
" [20, 9],\n",
" [21, 6]]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"card_to_matches = {}\n",
"for card in deck1: # O(N)\n",
" card_to_matches[card] = [None, None] # O(1)\n",
"\n",
"for idx1, card in enumerate(deck1): # O(N)\n",
" card_to_matches[card][0] = idx1 # O(1)\n",
" \n",
"for idx2, card in enumerate(deck2): # O(N)\n",
" card_to_matches[card][1] = idx2 # O(1)\n",
" \n",
"list(card_to_matches.values()) # O(N)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2b33252c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'The World'"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"card"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "509dda71",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,152 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "af52b0cd",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "198afa0e",
"metadata": {},
"outputs": [],
"source": [
"def get_slice_and_perform_something(x, axis):\n",
" idx = 75\n",
" if axis == 0:\n",
" slice_ = x[idx, :]\n",
" else:\n",
" slice_ = x[:, idx]\n",
" # Here I divide by two but any other operation will do, \n",
" # we just want to simulate the fact that we actually need to read the memory\n",
" return slice_ // 2"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "15900a3c",
"metadata": {},
"outputs": [],
"source": [
"page_size = 4096\n",
"n = 100\n",
"x = np.empty((page_size * n, page_size * n), dtype='int8')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "c182d3d3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(409600, 409600)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x.shape"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "86a3e63f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"28.2 µs ± 1.22 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)\n"
]
}
],
"source": [
"%timeit get_slice_and_perform_something(x, axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "8b2a96c2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The slowest run took 5.29 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
"886 ms ± 337 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
]
}
],
"source": [
"%timeit get_slice_and_perform_something(x, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "a8f67b13",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"32218.18181818182"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"886000 / 27.5"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "17a65d38",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,462 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 107,
"id": "4ccf18f3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0 1 2 3]\n",
" [ 4 5 6 7]\n",
" [ 8 9 10 11]]\n",
"\n",
"Is it a view? False\n",
"\n",
"dtype\tint64\n",
"ndim\t2\n",
"shape\t(3, 4)\n",
"strides\t(32, 8)\n",
" \n"
]
}
],
"source": [
"import numpy as np\n",
"\n",
"\n",
"def is_view(a):\n",
" return a.base is not None\n",
"\n",
"\n",
"def print_info(a):\n",
" txt = f\"\"\"\n",
"Is it a view? {is_view(a)}\n",
"\n",
"dtype\\t{a.dtype}\n",
"ndim\\t{a.ndim}\n",
"shape\\t{a.shape}\n",
"strides\\t{a.strides}\n",
" \"\"\"\n",
" print(a)\n",
" print(txt)\n",
"\n",
"\n",
"x = np.arange(12).reshape(3, 4).copy()\n",
"print_info(x)"
]
},
{
"cell_type": "code",
"execution_count": 108,
"id": "b68308e8",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0 1 2 3]\n",
" [ 8 9 10 11]]\n",
"\n",
"Is it a view? True\n",
"\n",
"dtype\tint64\n",
"ndim\t2\n",
"shape\t(2, 4)\n",
"strides\t(64, 8)\n",
" \n"
]
}
],
"source": [
"y = x[::2, :]\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 109,
"id": "85feedb0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[4 5 6 7]\n",
"\n",
"Is it a view? True\n",
"\n",
"dtype\tint64\n",
"ndim\t1\n",
"shape\t(4,)\n",
"strides\t(8,)\n",
" \n"
]
}
],
"source": [
"y = x[1, :]\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "dbbb9a7f",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[4 5 6 7]\n",
"\n",
"Is it a view? True\n",
"\n",
"dtype\tint64\n",
"ndim\t1\n",
"shape\t(4,)\n",
"strides\t(8,)\n",
" \n"
]
}
],
"source": [
"y = x[1]\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 111,
"id": "fc63ad8c",
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 4 11]\n",
"\n",
"Is it a view? False\n",
"\n",
"dtype\tint64\n",
"ndim\t1\n",
"shape\t(2,)\n",
"strides\t(8,)\n",
" \n"
]
}
],
"source": [
"y = x[[1, 2], [0, 3]]\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 112,
"id": "aa8effeb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0 1 2 3]\n",
" [ 8 9 10 11]]\n",
"\n",
"Is it a view? False\n",
"\n",
"dtype\tint64\n",
"ndim\t2\n",
"shape\t(2, 4)\n",
"strides\t(32, 8)\n",
" \n"
]
}
],
"source": [
"# Get the first and third row\n",
"y = x[[0, 2], :]\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "4474f8cf",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 2 3 4 5]\n",
" [ 6 7 8 9]\n",
" [10 11 12 13]]\n",
"\n",
"Is it a view? False\n",
"\n",
"dtype\tint64\n",
"ndim\t2\n",
"shape\t(3, 4)\n",
"strides\t(32, 8)\n",
" \n"
]
}
],
"source": [
"y = x + 2\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 114,
"id": "4957469c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[5 9 2]\n",
"\n",
"Is it a view? False\n",
"\n",
"dtype\tint64\n",
"ndim\t1\n",
"shape\t(3,)\n",
"strides\t(8,)\n",
" \n"
]
}
],
"source": [
"y = x[[1, 2, 0], [1, 1, 2]]\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 115,
"id": "d1649515",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0 1]\n",
" [ 2 3]\n",
" [ 4 5]\n",
" [ 6 7]\n",
" [ 8 9]\n",
" [10 11]]\n",
"\n",
"Is it a view? True\n",
"\n",
"dtype\tint64\n",
"ndim\t2\n",
"shape\t(6, 2)\n",
"strides\t(16, 8)\n",
" \n"
]
}
],
"source": [
"y = x.reshape((6, 2))\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 116,
"id": "c5caeb74",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0 4]\n",
" [ 8 1]\n",
" [ 5 9]\n",
" [ 2 6]\n",
" [10 3]\n",
" [ 7 11]]\n",
"\n",
"Is it a view? True\n",
"\n",
"dtype\tint64\n",
"ndim\t2\n",
"shape\t(6, 2)\n",
"strides\t(16, 8)\n",
" \n"
]
}
],
"source": [
"y = x.T.reshape((6, 2))\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 117,
"id": "15b0d7ad",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 0 1 2 3 4 5 6 7 8 9 10 11]\n",
"\n",
"Is it a view? True\n",
"\n",
"dtype\tint64\n",
"ndim\t1\n",
"shape\t(12,)\n",
"strides\t(8,)\n",
" \n"
]
}
],
"source": [
"y = x.ravel()\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 118,
"id": "38cc1ef3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 0 4 8 1 5 9 2 6 10 3 7 11]\n",
"\n",
"Is it a view? False\n",
"\n",
"dtype\tint64\n",
"ndim\t1\n",
"shape\t(12,)\n",
"strides\t(8,)\n",
" \n"
]
}
],
"source": [
"y = x.T.ravel()\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 119,
"id": "b7d0cc63",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 0 2 4 6 8 10]\n",
"\n",
"Is it a view? False\n",
"\n",
"dtype\tint64\n",
"ndim\t1\n",
"shape\t(6,)\n",
"strides\t(8,)\n",
" \n"
]
}
],
"source": [
"y = x[(x % 2) == 0]\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": 120,
"id": "866e842a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 0 1 2 3]\n",
" [ 4 5 6 7]\n",
" [ 8 9 10 11]]\n",
"\n",
"Is it a view? False\n",
"\n",
"dtype\tint64\n",
"ndim\t2\n",
"shape\t(3, 4)\n",
"strides\t(32, 8)\n",
" \n"
]
}
],
"source": [
"y = np.sort(x, axis=1)\n",
"print_info(y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a074c89b",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "320cfb50",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,210 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "245d99ab",
"metadata": {},
"source": [
"# Exercise: Find the anagrams for all words in a list"
]
},
{
"cell_type": "markdown",
"id": "d1e46c4a",
"metadata": {},
"source": [
"* You are given an English dictionary containing M words (“the dictionary”), and a separate list of N words (“the input”, saved in the file `words_to_search.txt`)\n",
"* For each word in the input, find all the anagrams in the dictionary (e.g., for input 'acme' the anagrams are `['acme', 'came', 'mace']`)\n",
"\n",
"How to proceed?\n",
"1. Write an algorithm to find all anagrams for one input word first\n",
"2. What is the Big-O class of this algorithm when executed the full N-words input?\n",
"3. Is there a way to pre-process the dictionary to improve the Big-O performance?"
]
},
{
"cell_type": "markdown",
"id": "b9b9cecd",
"metadata": {},
"source": [
"# 1. Load the system dictionary and the input words"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "24070a26",
"metadata": {},
"outputs": [],
"source": [
"# Load the system dictionary\n",
"with open('/usr/share/dict/words', 'r') as f:\n",
" dict_words = [w.strip() for w in f.readlines()]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "4002fcdd",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"['A',\n",
" 'a',\n",
" 'aa',\n",
" 'aal',\n",
" 'aalii',\n",
" '...',\n",
" 'zythem',\n",
" 'Zythia',\n",
" 'zythum',\n",
" 'Zyzomys',\n",
" 'Zyzzogeton']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Print the start and end of the dictionary\n",
"dict_words[:5] + ['...'] + dict_words[-5:]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "823537ef",
"metadata": {},
"outputs": [],
"source": [
"# Load the input words\n",
"with open('words_to_search.txt', 'r') as f:\n",
" words = [w.strip() for w in f.readlines()]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "4ccec6a3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['acer',\n",
" 'acers',\n",
" 'aces',\n",
" 'aches',\n",
" 'acme',\n",
" '...',\n",
" 'yap',\n",
" 'yaw',\n",
" 'yea',\n",
" 'zendo',\n",
" 'zoned']"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Print the start and end of the input list\n",
"words[:5] + ['...'] + words[-5:]"
]
},
{
"cell_type": "markdown",
"id": "14d91685",
"metadata": {},
"source": [
"# 2. Look for the anagrams of one input word, e.g. \"organ\"\n",
"\n",
"* There are several anagrams, including \"groan\" and \"argon\".\n",
"\n",
"* What is the Big-O performance oh your algorithm? In terms of M, the number of words in the dictionary, and K, the number of letters in a word"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "badf44c1",
"metadata": {},
"outputs": [],
"source": [
"word = 'organ'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef938d95",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "7cd1196c",
"metadata": {},
"source": [
"The performance of this implementation is ... ."
]
},
{
"cell_type": "markdown",
"id": "115c3219",
"metadata": {},
"source": [
"# 3. Look for the anagrams of the words in the input list\n",
"\n",
"* How does the Big-O performance of your one-word implementation scale to an input list of M words?\n",
"* Is there a way to pre-process the dictionary words in a data structure that is better suited for this task?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "03ce3e28",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2fc5ec4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,210 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "245d99ab",
"metadata": {},
"source": [
"# Exercise: Find the anagrams for all words in a list"
]
},
{
"cell_type": "markdown",
"id": "d1e46c4a",
"metadata": {},
"source": [
"* You are given an English dictionary containing M words (“the dictionary”), and a separate list of N words (“the input”, saved in the file `words_to_search.txt`)\n",
"* For each word in the input, find all the anagrams in the dictionary (e.g., for input 'acme' the anagrams are `['acme', 'came', 'mace']`)\n",
"\n",
"How to proceed?\n",
"1. Write an algorithm to find all anagrams for one input word first\n",
"2. What is the Big-O class of this algorithm when executed the full N-words input?\n",
"3. Is there a way to pre-process the dictionary to improve the Big-O performance?"
]
},
{
"cell_type": "markdown",
"id": "b9b9cecd",
"metadata": {},
"source": [
"# 1. Load the system dictionary and the input words"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "24070a26",
"metadata": {},
"outputs": [],
"source": [
"# Load the system dictionary\n",
"with open('/usr/share/dict/words', 'r') as f:\n",
" dict_words = [w.strip() for w in f.readlines()]"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "4002fcdd",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"['A',\n",
" 'a',\n",
" 'aa',\n",
" 'aal',\n",
" 'aalii',\n",
" '...',\n",
" 'zythem',\n",
" 'Zythia',\n",
" 'zythum',\n",
" 'Zyzomys',\n",
" 'Zyzzogeton']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Print the start and end of the dictionary\n",
"dict_words[:5] + ['...'] + dict_words[-5:]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "823537ef",
"metadata": {},
"outputs": [],
"source": [
"# Load the input words\n",
"with open('words_to_search.txt', 'r') as f:\n",
" words = [w.strip() for w in f.readlines()]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "4ccec6a3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['acer',\n",
" 'acers',\n",
" 'aces',\n",
" 'aches',\n",
" 'acme',\n",
" '...',\n",
" 'yap',\n",
" 'yaw',\n",
" 'yea',\n",
" 'zendo',\n",
" 'zoned']"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Print the start and end of the input list\n",
"words[:5] + ['...'] + words[-5:]"
]
},
{
"cell_type": "markdown",
"id": "14d91685",
"metadata": {},
"source": [
"# 2. Look for the anagrams of one input word, e.g. \"organ\"\n",
"\n",
"* There are several anagrams, including \"groan\" and \"argon\".\n",
"\n",
"* What is the Big-O performance oh your algorithm? In terms of M, the number of words in the dictionary, and K, the number of letters in a word"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "badf44c1",
"metadata": {},
"outputs": [],
"source": [
"word = 'organ'"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef938d95",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "7cd1196c",
"metadata": {},
"source": [
"The performance of this implementation is ... ."
]
},
{
"cell_type": "markdown",
"id": "115c3219",
"metadata": {},
"source": [
"# 3. Look for the anagrams of the words in the input list\n",
"\n",
"* How does the Big-O performance of your one-word implementation scale to an input list of M words?\n",
"* Is there a way to pre-process the dictionary words in a data structure that is better suited for this task?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "03ce3e28",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a2fc5ec4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,108 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "373ed1db",
"metadata": {},
"source": [
"# Exercise: Match the tarot cards!\n",
"\n",
"Given 2 decks of tarot cards, `deck1` and `deck2`, find all the matching pairs. The output should be a set of tuples `(idx1, idx2)` for every matching pair in `deck1`, `deck2`.\n",
"\n",
"For example:\n",
"```\n",
"deck1 = ['C', 'B', 'A']\n",
"deck2 = ['A', 'C', 'B']\n",
"```\n",
"\n",
"should return (in no particular order):\n",
"\n",
"```\n",
"{(0, 1), (1, 2), (2, 0)}\n",
"```\n",
"\n",
"1. Write an algorithm to match the tarot cards\n",
"2. Compute the Big-O complexity of your algorithm\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cf05b9c4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-- Deck 1: --\n",
" ['The Lovers', 'Temperance', 'The Emperor', 'The Sun', 'The Fool', 'The Chariot', 'Death', 'Strength', 'Justice', 'The Star', 'Judgement', 'The World', 'The Tower', 'The Hanged Man', 'The Empress', 'The Hermit', 'The Devil', 'The High Priestess', 'The Moon', 'The Hierophant', 'Wheel of Fortune', 'The Magician']\n",
"-- Deck 2: --\n",
" ['The Fool', 'Death', 'The Hermit', 'Strength', 'The Moon', 'Wheel of Fortune', 'Judgement', 'The Lovers', 'The Star', 'The Hanged Man', 'The Empress', 'The Emperor', 'The Magician', 'The Tower', 'The Hierophant', 'The Chariot', 'The High Priestess', 'Temperance', 'The World', 'The Devil', 'The Sun', 'Justice']\n"
]
}
],
"source": [
"import random\n",
"\n",
"# List of tarot card names (Major Arcana)\n",
"tarot_cards = [\n",
" \"The Fool\", \"The Magician\", \"The High Priestess\", \"The Empress\", \"The Emperor\",\n",
" \"The Hierophant\", \"The Lovers\", \"The Chariot\", \"Strength\", \"The Hermit\",\n",
" \"Wheel of Fortune\", \"Justice\", \"The Hanged Man\", \"Death\", \"Temperance\",\n",
" \"The Devil\", \"The Tower\", \"The Star\", \"The Moon\", \"The Sun\", \"Judgement\",\n",
" \"The World\"\n",
"]\n",
"\n",
"# Copy the list to create two separate decks\n",
"deck1 = tarot_cards.copy()\n",
"deck2 = tarot_cards.copy()\n",
"\n",
"# Shuffle both decks\n",
"random.shuffle(deck1)\n",
"random.shuffle(deck2)\n",
"\n",
"# Print the shuffled decks\n",
"print(\"-- Deck 1: --\\n\", deck1)\n",
"print(\"-- Deck 2: --\\n\", deck2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "48eb31e2",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "509dda71",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,281 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "373ed1db",
"metadata": {},
"source": [
"# Exercise: Match the tarot cards!\n",
"\n",
"Given 2 decks of tarot cards, `deck1` and `deck2`, find all the matching pairs. The output should be a set of tuples `(idx1, idx2)` for every matching pair in `deck1`, `deck2`.\n",
"\n",
"For example:\n",
"```\n",
"deck1 = ['C', 'B', 'A']\n",
"deck2 = ['A', 'C', 'B']\n",
"```\n",
"\n",
"should return (in no particular order):\n",
"\n",
"```\n",
"{(0, 1), (1, 2), (2, 0)}\n",
"```\n",
"\n",
"1. Write an algorithm to match the tarot cards\n",
"2. Compute the Big-O complexity of your algorithm\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "cf05b9c4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-- Deck 1: --\n",
" ['The Tower', 'The Star', 'Strength', 'The Devil', 'Judgement', 'The World', 'The High Priestess', 'The Hanged Man', 'The Sun', 'The Lovers', 'The Chariot', 'The Emperor', 'The Fool', 'The Empress', 'Death', 'Temperance', 'Justice', 'The Magician', 'Wheel of Fortune', 'The Hermit', 'The Hierophant', 'The Moon']\n",
"-- Deck 2: --\n",
" ['Temperance', 'The Sun', 'The Lovers', 'Strength', 'The High Priestess', 'The Magician', 'Justice', 'Judgement', 'The Empress', 'The Star', 'The Fool', 'The Hierophant', 'The Hanged Man', 'The Tower', 'The Moon', 'The Chariot', 'Death', 'The World', 'The Hermit', 'The Devil', 'Wheel of Fortune', 'The Emperor']\n"
]
}
],
"source": [
"import random\n",
"\n",
"# List of tarot card names (Major Arcana)\n",
"tarot_cards = [\n",
" \"The Fool\", \"The Magician\", \"The High Priestess\", \"The Empress\", \"The Emperor\",\n",
" \"The Hierophant\", \"The Lovers\", \"The Chariot\", \"Strength\", \"The Hermit\",\n",
" \"Wheel of Fortune\", \"Justice\", \"The Hanged Man\", \"Death\", \"Temperance\",\n",
" \"The Devil\", \"The Tower\", \"The Star\", \"The Moon\", \"The Sun\", \"Judgement\",\n",
" \"The World\"\n",
"]\n",
"\n",
"# Copy the list to create two separate decks\n",
"deck1 = tarot_cards.copy()\n",
"deck2 = tarot_cards.copy()\n",
"\n",
"# Shuffle both decks\n",
"random.shuffle(deck1)\n",
"random.shuffle(deck2)\n",
"\n",
"# Print the shuffled decks\n",
"print(\"-- Deck 1: --\\n\", deck1)\n",
"print(\"-- Deck 2: --\\n\", deck2)"
]
},
{
"cell_type": "markdown",
"id": "3db3f337",
"metadata": {},
"source": [
"# Simplest implementation: O(N^2)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "319ef6a9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{(0, 13),\n",
" (1, 9),\n",
" (2, 3),\n",
" (3, 19),\n",
" (4, 7),\n",
" (5, 17),\n",
" (6, 4),\n",
" (7, 12),\n",
" (8, 1),\n",
" (9, 2),\n",
" (10, 15),\n",
" (11, 21),\n",
" (12, 10),\n",
" (13, 8),\n",
" (14, 16),\n",
" (15, 0),\n",
" (16, 6),\n",
" (17, 5),\n",
" (18, 20),\n",
" (19, 18),\n",
" (20, 11),\n",
" (21, 14)}"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"matches = set()\n",
"for idx1, card in enumerate(deck1): # O(N)\n",
" match = (idx1, deck2.index(card)) # O(N)\n",
" matches.add(match)\n",
" \n",
"matches"
]
},
{
"cell_type": "markdown",
"id": "3264eb67",
"metadata": {},
"source": [
"# Faster solution: O(N log N)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c768a7c2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{(0, 13),\n",
" (1, 9),\n",
" (2, 3),\n",
" (3, 19),\n",
" (4, 7),\n",
" (5, 17),\n",
" (6, 4),\n",
" (7, 12),\n",
" (8, 1),\n",
" (9, 2),\n",
" (10, 15),\n",
" (11, 21),\n",
" (12, 10),\n",
" (13, 8),\n",
" (14, 16),\n",
" (15, 0),\n",
" (16, 6),\n",
" (17, 5),\n",
" (18, 20),\n",
" (19, 18),\n",
" (20, 11),\n",
" (21, 14)}"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create a list of (tarot_card, idx), and sort it. This is kind of equivalent to np.argsort\n",
"n_cards = len(deck1)\n",
"sorted_deck1 = sorted((deck1[idx], idx) for idx in range(n_cards)) # O(N log N)\n",
"sorted_deck2 = sorted((deck2[idx], idx) for idx in range(n_cards)) # O(N log N)\n",
"\n",
"matches = set()\n",
"for idx in range(n_cards): # O(N)\n",
" matches.add((sorted_deck1[idx][1], sorted_deck2[idx][1])) # O(1)\n",
" \n",
"matches"
]
},
{
"cell_type": "markdown",
"id": "5099970a",
"metadata": {},
"source": [
"# 3. Fastest solution: O(N)"
]
},
{
"cell_type": "markdown",
"id": "83c53b82",
"metadata": {},
"source": []
},
{
"cell_type": "code",
"execution_count": 7,
"id": "509dda71",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{(0, 13),\n",
" (1, 9),\n",
" (2, 3),\n",
" (3, 19),\n",
" (4, 7),\n",
" (5, 17),\n",
" (6, 4),\n",
" (7, 12),\n",
" (8, 1),\n",
" (9, 2),\n",
" (10, 15),\n",
" (11, 21),\n",
" (12, 10),\n",
" (13, 8),\n",
" (14, 16),\n",
" (15, 0),\n",
" (16, 6),\n",
" (17, 5),\n",
" (18, 20),\n",
" (19, 18),\n",
" (20, 11),\n",
" (21, 14)}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Create a dictionary, mapping cards to the index in deck2\n",
"deck2_card_to_idx = {}\n",
"for idx2, card in enumerate(deck2): # O(N)\n",
" deck2_card_to_idx[card] = idx2\n",
"\n",
"# For each card and index in deck1, look up the index in deck2, and store the match\n",
"matches = set()\n",
"for idx1, card in enumerate(deck1): # O(N)\n",
" idx2 = deck2_card_to_idx[card] # O(1)\n",
" matches.add((idx1, idx2)) # O(1)\n",
" \n",
"matches"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f403a4ce",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,108 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "373ed1db",
"metadata": {},
"source": [
"# Exercise: Match the tarot cards!\n",
"\n",
"Given 2 decks of tarot cards, `deck1` and `deck2`, find all the matching pairs. The output should be a set of tuples `(idx1, idx2)` for every matching pair in `deck1`, `deck2`.\n",
"\n",
"For example:\n",
"```\n",
"deck1 = ['C', 'B', 'A']\n",
"deck2 = ['A', 'C', 'B']\n",
"```\n",
"\n",
"should return (in no particular order):\n",
"\n",
"```\n",
"{(0, 1), (1, 2), (2, 0)}\n",
"```\n",
"\n",
"1. Write an algorithm to match the tarot cards\n",
"2. Compute the Big-O complexity of your algorithm\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "cf05b9c4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"-- Deck 1: --\n",
" ['The Lovers', 'Temperance', 'The Emperor', 'The Sun', 'The Fool', 'The Chariot', 'Death', 'Strength', 'Justice', 'The Star', 'Judgement', 'The World', 'The Tower', 'The Hanged Man', 'The Empress', 'The Hermit', 'The Devil', 'The High Priestess', 'The Moon', 'The Hierophant', 'Wheel of Fortune', 'The Magician']\n",
"-- Deck 2: --\n",
" ['The Fool', 'Death', 'The Hermit', 'Strength', 'The Moon', 'Wheel of Fortune', 'Judgement', 'The Lovers', 'The Star', 'The Hanged Man', 'The Empress', 'The Emperor', 'The Magician', 'The Tower', 'The Hierophant', 'The Chariot', 'The High Priestess', 'Temperance', 'The World', 'The Devil', 'The Sun', 'Justice']\n"
]
}
],
"source": [
"import random\n",
"\n",
"# List of tarot card names (Major Arcana)\n",
"tarot_cards = [\n",
" \"The Fool\", \"The Magician\", \"The High Priestess\", \"The Empress\", \"The Emperor\",\n",
" \"The Hierophant\", \"The Lovers\", \"The Chariot\", \"Strength\", \"The Hermit\",\n",
" \"Wheel of Fortune\", \"Justice\", \"The Hanged Man\", \"Death\", \"Temperance\",\n",
" \"The Devil\", \"The Tower\", \"The Star\", \"The Moon\", \"The Sun\", \"Judgement\",\n",
" \"The World\"\n",
"]\n",
"\n",
"# Copy the list to create two separate decks\n",
"deck1 = tarot_cards.copy()\n",
"deck2 = tarot_cards.copy()\n",
"\n",
"# Shuffle both decks\n",
"random.shuffle(deck1)\n",
"random.shuffle(deck2)\n",
"\n",
"# Print the shuffled decks\n",
"print(\"-- Deck 1: --\\n\", deck1)\n",
"print(\"-- Deck 2: --\\n\", deck2)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "48eb31e2",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "509dda71",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -0,0 +1,281 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "373ed1db",
"metadata": {},
"source": [
"# Exercise: Match the tarot cards!\n",
"\n",
"Given 2 decks of tarot cards, `deck1` and `deck2`, find all the matching pairs. The output should be a set of tuples `(idx1, idx2)` for every matching pair in `deck1`, `deck2`.\n",
"\n",
"For example:\n",
"```\n",
"deck1 = ['C', 'B', 'A']\n",
"deck2 = ['A', 'C', 'B']\n",
"```\n",
"\n",
"should return (in no particular order):\n",
"\n",
"```\n",
"{(0, 1), (1, 2), (2, 0)}\n",
"```\n",
"\n",
"1. Write an algorithm to match the tarot cards\n",
"2. Compute the Big-O complexity of your algorithm\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "cf05b9c4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--