387 lines
9.2 KiB
Plaintext
387 lines
9.2 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "8685ea3a",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import numpy as np\n",
|
|||
|
"import timeit\n",
|
|||
|
"import matplotlib.pyplot as plt"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "048881d0",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# Example: Find common words"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "2464a282",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"Problem: given two lists of words, extract all the words that are in common"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "71740eab",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# Implementation with 2x for-loops"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "f175c775",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"%%timeit\n",
|
|||
|
"\n",
|
|||
|
"scaling_factor = 1 #10, 100\n",
|
|||
|
"\n",
|
|||
|
"words1 = ['apple', 'orange', 'banana', 'melon', 'peach'] * scaling_factor\n",
|
|||
|
"words2 = ['orange', 'kiwi', 'avocado', 'apple', 'banana'] * scaling_factor\n",
|
|||
|
"\n",
|
|||
|
"common_for = []\n",
|
|||
|
"for w in words1:\n",
|
|||
|
" if w in words2:\n",
|
|||
|
" common_for.append(w) # 612 ns, 12.3 us, 928 us "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "affab857",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"input_size = [1, 10, 100]\n",
|
|||
|
"results_for_loop = [(612/10**9)/(612/10**9), (12.4 /10**6)/(612/10**9), (928/10**6)/(612/10**9)] # in seconds\n",
|
|||
|
"\n",
|
|||
|
"x = np.linspace(0,100,100)\n",
|
|||
|
"fit1 = np.polyfit(input_size,results_for_loop,2)\n",
|
|||
|
"eval1 = np.polyval(fit1, x)\n",
|
|||
|
"\n",
|
|||
|
"plt.plot(x,eval1,c = 'orange')\n",
|
|||
|
"plt.scatter(input_size, results_for_loop, c = 'orange', s = 100, label = '2 for loops')\n",
|
|||
|
"\n",
|
|||
|
"plt.xlabel('input size')\n",
|
|||
|
"plt.ylabel('processing time')\n",
|
|||
|
"plt.yticks(results_for_loop, ['T', str(int((12.4 /10**6)/(513/10**9)))+ 'x T', str(int((928/10**6)/(513/10**9))) + 'x T'])\n",
|
|||
|
"plt.legend()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "2a61bf38",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "skip"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"print('Data increase 1x, 10x, 100x')\n",
|
|||
|
"print('Time increase 513 ns, 12.4 µs, 928 µs')\n",
|
|||
|
"print('time1, ~ 24x time1, ~ 1800x time1')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "38e47397",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "-"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"What is the big-O complexity of this implementation? "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "4118b38d",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "skip"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"n * n ~ O(n<sup>2</sup>)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "31cd0e74",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# Implementation with sorted lists"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "c13a24f4",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"%%timeit\n",
|
|||
|
"scaling_factor = 100 #10, 100\n",
|
|||
|
"words1 = ['apple', 'orange', 'banana', 'melon', 'peach'] * scaling_factor\n",
|
|||
|
"words2 = ['orange', 'kiwi', 'avocado', 'apple', 'banana'] *scaling_factor\n",
|
|||
|
"words1 = sorted(words1)\n",
|
|||
|
"words2 = sorted(words2)\n",
|
|||
|
"\n",
|
|||
|
"common_sort_list = []\n",
|
|||
|
"idx2 = 0\n",
|
|||
|
"for w in words1:\n",
|
|||
|
" while idx2 < len(words2) and words2[idx2] < w:\n",
|
|||
|
" idx2 += 1\n",
|
|||
|
" if idx2 >= len(words2):\n",
|
|||
|
" break\n",
|
|||
|
" if words2[idx2] == w:\n",
|
|||
|
" common_sort_list.append(w) #1.94 ns, 17.3 us, 204 us"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "f1e8fed2",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# 1.9 * 10**6\n",
|
|||
|
"# 17.9 * 10**6\n",
|
|||
|
"# 205 * 10**6"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "8ce798ab",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"input_size = [1, 10, 100]\n",
|
|||
|
"results_sorted_lists = [(1.9 * 10**6)/(1.9 * 10**6), (17.9 * 10**6)/(1.9 * 10**6), (205 * 10**6)/(1.9 * 10**6)]\n",
|
|||
|
"fit2 = np.polyfit(input_size, results_sorted_lists, 2)\n",
|
|||
|
"eval2 = np.polyval(fit2, x)\n",
|
|||
|
"plt.plot(x,eval1,c = 'orange')\n",
|
|||
|
"plt.plot(x,eval2,c = 'pink')\n",
|
|||
|
"plt.scatter(input_size, results_for_loop, c = 'orange', s = 100, label = '2 for loops')\n",
|
|||
|
"plt.scatter(input_size, results_sorted_lists, c = 'pink', s = 100, label = 'sorted lists')\n",
|
|||
|
"plt.xlabel('input size')\n",
|
|||
|
"plt.ylabel('processing time')\n",
|
|||
|
"plt.yticks(results_for_loop + results_sorted_lists[1:], ['T', str(int((12.4 /10**6)/(513/10**9)))+ 'x T', str(int((928/10**6)/(513/10**9))) + 'x T',\n",
|
|||
|
" str(int((17.9 * 10**6)/(1.9 * 10**6)))+ 'x T', str(int((205 * 10**6)/(1.9 * 10**6))) + 'x T',])\n",
|
|||
|
"plt.legend()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "1da4c22f",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "-"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"What is the big-O complexity of this implementation? "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "4b068a1b",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "-"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"2 * sorting + traversing two lists = 2*n log<sub>2</sub> + 2*n ~ O(n * log<sub>n</sub>)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "13c96239",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "slide"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# Implementation with sets"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "61edb9f3",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "fragment"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"%%timeit\n",
|
|||
|
"\n",
|
|||
|
"scaling_factor = 1\n",
|
|||
|
"\n",
|
|||
|
"words1 = ['apple', 'orange', 'banana', 'melon', 'peach'] * scaling_factor\n",
|
|||
|
"words2 = ['orange', 'kiwi', 'avocado', 'apple', 'banana'] *scaling_factor\n",
|
|||
|
"\n",
|
|||
|
"words2 = set(words2)\n",
|
|||
|
"\n",
|
|||
|
"common_sets = []\n",
|
|||
|
"for w in words1:\n",
|
|||
|
" if w in words2:\n",
|
|||
|
" common_sets.append(w) # 630 ns, 3.13 us, 28.6 us"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "c90d8e68",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "notes"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# 630 * 10**9\n",
|
|||
|
"# 3.13 * 10**6\n",
|
|||
|
"# 28.6 * 10**6"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"id": "236c132d",
|
|||
|
"metadata": {
|
|||
|
"scrolled": true,
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "subslide"
|
|||
|
}
|
|||
|
},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"results_sets = [(630 * 10**9)/(630 * 10**9), (3.13 * 10**6)/(630 * 10**9), (28.6 * 10**6)/(630 * 10**9)]\n",
|
|||
|
"fit3 = np.polyfit(input_size, results_sets, 2)\n",
|
|||
|
"eval3 = np.polyval(fit3, x)\n",
|
|||
|
"plt.plot(x,eval1,c = 'orange')\n",
|
|||
|
"plt.plot(x,eval2,c = 'pink')\n",
|
|||
|
"plt.plot(x, eval3, c = 'blue')\n",
|
|||
|
"plt.scatter(input_size, results_for_loop, c = 'orange', s = 100, label = '2 for loops')\n",
|
|||
|
"plt.scatter(input_size, results_sorted_lists, c = 'pink', s = 100, label = 'sorted lists')\n",
|
|||
|
"plt.scatter(input_size, results_sets, c = 'blue', s = 100, label = 'sets')\n",
|
|||
|
"plt.xlabel('input size')\n",
|
|||
|
"plt.ylabel('processing time')\n",
|
|||
|
"plt.yticks(results_for_loop + results_sorted_lists[1:], ['T', str(int((12.4 /10**6)/(513/10**9)))+ 'x T', str(int((928/10**6)/(513/10**9))) + 'x T', str(int((17.9 * 10**6)/(1.9 * 10**6)))+ 'x T', str(int((205 * 10**6)/(1.9 * 10**6))) + 'x T'])\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "c9780532",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "-"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"What is the big-O complexity of this implementation? "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"id": "297bcd7d",
|
|||
|
"metadata": {
|
|||
|
"slideshow": {
|
|||
|
"slide_type": "-"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"transforming one list to set + 1 for loop = 2 * n ~ O(n)\n",
|
|||
|
"\n",
|
|||
|
"It’s the exact same code as for lists, but now looking up an element in sets \u000b",
|
|||
|
"(if w in words2) takes constant time!\n",
|
|||
|
"How could you have known that set lookup is fast? Learning about data structures!"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"celltoolbar": "Slideshow",
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.11.3"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 5
|
|||
|
}
|