diff --git a/.gitattributes b/.gitattributes index bdb0cabc..cee6d0d8 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,7 +1,7 @@ -# Auto detect text files and perform LF normalization +# Auto detect the text files and perform LF normalization easily! * text=auto -# Custom for Visual Studio +# Custom for Visual Studio (Any Version) *.cs diff=csharp # Standard to msysgit diff --git a/BinaryToDecimal.py b/BinaryToDecimal.py index ac41308d..1c3096c8 100644 --- a/BinaryToDecimal.py +++ b/BinaryToDecimal.py @@ -1,25 +1,25 @@ # Python: Binary to Decimal Conversion # binToDec and decToBin functions are rendered obsolete by the universal convert function -def binToDec(binNum): +def binToDec(binNum): #function created to convert binary to decimal with parametere binNum decNum = 0 power = 0 - while binNum > 0: - decNum += 2 ** power * (binNum % 10) - binNum //= 10 - power += 1 + while binNum > 0: #loop will run till binNum is greater than 0 + decNum += 2 ** power * (binNum % 10) + binNum //= 10 # reducing binNum everytime by 1 digit + power += 1 # increasing power by 1 each loop return decNum -def decToBin(decNum): +def decToBin(decNum): #function created to convert decimal to binary with parametere decNum binNum = 0 power = 0 - while decNum > 0: + while decNum > 0:#loop will run till decNum is greater than 0 binNum += 10 ** power * (decNum % 2) - decNum //= 2 - power += 1 + decNum //= 2 # reducing decNum everytime by 1 digit + power += 1 # increasing power by 1 each loop return binNum -def convert(fromNum, fromBase, toBase): +def convert(fromNum, fromBase, toBase): #function for converting from any base to any other base toNum = 0 power = 0 while fromNum > 0: @@ -31,4 +31,4 @@ def convert(fromNum, fromBase, toBase): # print (str(binToDec(101011))) # print (str(decToBin(128))) print (str(convert(127, 10, 8))) # converts 127 in base 10 to base 8 -print (str(convert(101001, 2, 2))) \ No newline at end of file +print (str(convert(101001, 2, 2))) diff --git a/Contributing.txt b/Contributing.txt new file mode 100644 index 00000000..52d1fd31 --- /dev/null +++ b/Contributing.txt @@ -0,0 +1,21 @@ +Contributions are always welcome!!!! +If you want to contribute to this repository follow the below procedure - +1. Fork this repository +2. Clone the code to your local system and go through readme.md +3. You can create another branch to add further commits + +GIT COMMANDS FOR CONTRIBUTING - +1. To clone this repository +`git clone [code link]` +2.To create new branch +`git checkout -b [branch name] ` +3. To stage files +`git add .` +4.To commit changes +`git commit -m "commit message"` +5. To push changes +`git push [remote branch] [new branch]` + +HAPPY CONTRIBUTION!!!!!!!! + + \ No newline at end of file diff --git a/Datasets.txt b/Datasets.txt new file mode 100644 index 00000000..ce44cedb --- /dev/null +++ b/Datasets.txt @@ -0,0 +1,75 @@ +Data.gov +NOAA - https://www.ncdc.noaa.gov/cdo-web/ + atmospheric, ocean +Bureau of Labor Statistics - https://www.bls.gov/data/ + employment, inflation +US Census Data - https://www.census.gov/data.html + demographics, income, geo, time series +Bureau of Economic Analysis - http://www.bea.gov/data/gdp/gross-domestic-product + GDP, corporate profits, savings rates +Federal Reserve - https://fred.stlouisfed.org/ + curency, interest rates, payroll +Quandl - https://www.quandl.com/ + financial and economic + +Data.gov.uk +UK Dataservice - https://www.ukdataservice.ac.uk + Census data and much more +WorldBank - https://datacatalog.worldbank.org + census, demographics, geographic, health, income, GDP +IMF - https://www.imf.org/en/Data + economic, currency, finance, commodities, time series +OpenData.go.ke + Kenya govt data on agriculture, education, water, health, finance, … +https://data.world/ +Open Data for Africa - http://dataportal.opendataforafrica.org/ + agriculture, energy, environment, industry, … +Kaggle - https://www.kaggle.com/datasets + A huge variety of different datasets +Amazon Reviews - https://snap.stanford.edu/data/web-Amazon.html + 35M product reviews from 6.6M users +GroupLens - https://grouplens.org/datasets/movielens/ + 20M movie ratings +Yelp Reviews - https://www.yelp.com/dataset + 6.7M reviews, pictures, businesses +IMDB Reviews - http://ai.stanford.edu/~amaas/data/sentiment/ + 25k Movie reviews +Twitter Sentiment 140 - http://help.sentiment140.com/for-students/ + 160k Tweets +Airbnb - http://insideairbnb.com/get-the-data.html + A TON of data by geo +UCI ML Datasets - http://mlr.cs.umass.edu/ml/ + iris, wine, abalone, heart disease, poker hands, …. +Enron Email dataset - http://www.cs.cmu.edu/~enron/ + 500k emails from 150 people + From 2001 energy scandal. See the movie: The Smartest Guys in the Room. +Spambase - https://archive.ics.uci.edu/ml/datasets/Spambase + Emails +Jeopardy Questions - https://www.reddit.com/r/datasets/comments/1uyd0t/200000_jeopardy_questions_in_a_json_file/ + 200k Questions and answers in json +Gutenberg Ebooks - http://www.gutenberg.org/wiki/Gutenberg:Offline_Catalogs + Large collection of books + +IMAGES +ImageNet - http://image-net.org + 14M images of objects +Google - https://ai.googleblog.com/2016/09/introducing-open-images-dataset.html + 9M image URLs with labels +Microsoft Coco - http://cocodataset.org + 330k images, most labeled +Labelled Faces in the Wild - http://vis-www.cs.umass.edu/lfw/ + 13k face images with names +Stanford Dogs - http://vision.stanford.edu/aditya86/ImageNetDogs/ + 120 dog breeds, 20k images + +AUTONOMOUS CARS +Berkeley DeepDrive - https://bdd-data.berkeley.edu/ + Massive dataset including 100k videos with 1100 hours of hd driving +Belgian Traffic Signs - http://www.vision.ee.ethz.ch/~timofter/traffic_signs/ + 10k images +Bosch Small Traffic Signals - https://hci.iwr.uni-heidelberg.de/node/6132 + 5k training and 8k test images +WPI Traffic Light, Pedestrian, Lane-Keeping - http://computing.wpi.edu/dataset.html + 30GB of training and test data from Worcester, Mass +UCSD Lisa - http://cvrr.ucsd.edu/LISA/datasets.html + Vehicle detection, traffic signals diff --git a/Date Time Timestamp/Date_Time.ipynb b/Date Time Timestamp/Date_Time.ipynb new file mode 100644 index 00000000..795b0b61 --- /dev/null +++ b/Date Time Timestamp/Date_Time.ipynb @@ -0,0 +1,424 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using Times, Dates & Timestamps in Python" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Summary of Time and Date classes:\n", + "**datetime** # superclass for most of the date and time libraries \n", + "    **date** # general purpose date library \n", + "    **time** # general purpose time library \n", + "    **datetime** # for date and time in one object \n", + "    **timedelta** # for a duration or elapsed time \n", + "**time** # for Unix timestamp and process_time \n", + "**calendar** # for calendars \n", + "**dateutil** # extended datetime functionality, esp string parsing and delta calculation" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Today's Date\n", + "- Use datetime.date.today()\n", + "- datetime.date class has the following integer attributes, date(year, month, day)\n", + "- get day of the week using date.weekday() # Monday is 0" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2019-02-03\n", + "2 3 2019\n", + "6\n" + ] + } + ], + "source": [ + "from datetime import date\n", + "d1 = date.today()\n", + "print(d1)\n", + "print(d1.month, d1.day, d1.year)\n", + "print(d1.weekday())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### ISO format is a string format, yyyy-mm-dd\n", + "- date_object.isoformat() does the same thing as str(date_object)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2011-11-23\n", + "2011-11-23\n", + "2011-11-23\n" + ] + }, + { + "data": { + "text/plain": [ + "datetime.date(2011, 11, 23)" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1 = date.fromisoformat('2011-11-23')\n", + "print(d1)\n", + "print(str(d1))\n", + "print(d1.isoformat())\n", + "d1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Comparison, addition and sutraction of dates\n", + "- Comparison gives boolean result. Later date is greater than earlier date.\n", + "- Date addition & subtraction give result as a datetime.timedelta object (explained more below).\n", + "- The same comparison and add/subtract operations can be used with time objects." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n", + "1359 days, 0:00:00\n" + ] + } + ], + "source": [ + "d1 = date.today()\n", + "d2 = date(2015, 5, 14)\n", + "print(d1 > d2)\n", + "print(d1 - d2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Time\n", + "- time objects have the following attributes, time(hour, minute, second, microsecond, tzinfo)\n", + "- use datetime.time to compare time objects: t1 < t2 if t1 occurs before t2\n", + " - attributes are all optional, so you can just work with hours and minutes if you want\n", + "- daylight savings is handled by the time.dst() function (if tzinfo is set)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "14:25:36.018625\n", + "02:19:00\n", + "False\n" + ] + } + ], + "source": [ + "from datetime import time\n", + "t1 = time(14, 25, 36, 18625)\n", + "print(t1)\n", + "\n", + "t2 = time(2, 19)\n", + "print(t2)\n", + "print(t1 < t2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### datetime.datetime combines date and time attributes into a datetime object\n", + "- datetime.datetime(year, month, day, hour, minute, second, microsecond, tzinfo)\n", + "- datetime.datetime objects can be used as dictionary keys\n", + "- includes functions: date(), time()" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1941-12-07 07:53:00\n", + "1941-12-07\n", + "07:53:00\n" + ] + } + ], + "source": [ + "from datetime import datetime\n", + "dt1 = datetime(1941, 12, 7, 7, 53)\n", + "print(dt1)\n", + "print(dt1.date())\n", + "print(dt1.time())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use datetime.datetime.now() to get the current date and time" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10:02:21.354040\n", + "2019-02-03\n", + "10 2\n", + "2-3-2019\n" + ] + } + ], + "source": [ + "from datetime import datetime\n", + "t3 = datetime.now()\n", + "\n", + "print(t3.time())\n", + "print(t3.date())\n", + "print(t3.hour, t3.minute)\n", + "print(str(t3.month) + '-' + str(t3.day) + '-' + str(t3.year))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use datetime.strftime() to get names of months and weekdays." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sunday\n", + "Sun, Sunday, Feb, February, 02/03/19\n" + ] + } + ], + "source": [ + "from datetime import datetime\n", + "t3 = datetime.now()\n", + "print(t3.strftime('%A'))\n", + "print(t3.strftime('%a, %A, %b, %B, %x'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A timedelta is used for a duration, or the time difference between two dates or times\n", + "- datetime.timedelta(days, seconds, microseconds)\n", + "- A timedelta can also be multiplied or divided by an integer or float" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "461 days, 0:00:00 \n", + "39830400.0\n", + "1383 days, 0:00:00\n" + ] + } + ], + "source": [ + "from datetime import timedelta, date, time\n", + "d1 = date(2011, 6, 15)\n", + "d2 = date(2012, 9, 18)\n", + "td = d2 - d1\n", + "print(td, type(td))\n", + "print(td.total_seconds())\n", + "print(td * 3)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1008 days to event.\n", + "1008 days, 0:00:00 days to event.\n" + ] + } + ], + "source": [ + "from datetime import datetime\n", + "today = datetime.today().date()\n", + "my_event = date(2021, 11, 6)\n", + "days_to_event = abs(my_event - today)\n", + "print(days_to_event.days, 'days to event.')\n", + "print(days_to_event, 'days to event.')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get a UNIX timestamp (time since the epoch)\n", + "- A timestamp is the time since Jan 1, 1970 in seconds\n", + "- Use time.time() to get timestamp\n", + "- Use datetime.fromtimestamp(ts) and datetime.timestamp(datetime_obj) to convert between timestamp and datetime\n", + "- Use time.process_time() to get runtime of an operation on your computer" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1549156155.9644115\n", + "Sat Feb 2 17:09:15 2019\n" + ] + } + ], + "source": [ + "import time\n", + "ts = time.time()\n", + "print(ts)\n", + "print(time.ctime(ts))" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2019-02-02 17:09:15.964411\n", + "1549156155.964411\n" + ] + } + ], + "source": [ + "from datetime import datetime\n", + "now = datetime.fromtimestamp(ts)\n", + "print(now)\n", + "print(datetime.timestamp(now))" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "operation executed in 0.0\n" + ] + } + ], + "source": [ + "start_time = time.process_time()\n", + "# do some stuff\n", + "end_time = time.process_time()\n", + "print('operation executed in ', end_time - start_time)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Date Time Timestamp/Date_Time_Timestamp.py b/Date Time Timestamp/Date_Time_Timestamp.py new file mode 100644 index 00000000..cc533dcf --- /dev/null +++ b/Date Time Timestamp/Date_Time_Timestamp.py @@ -0,0 +1,133 @@ +"""############################################################################## +Using Times, Dates & Timestamps in Python + +Summary of Time and Date classes: +datetime # superclass for most of the date and time libraries + date # general purpose date library + time # general purpose time library + datetime # for date and time in one object + timedelta # for a duration or elapsed time +time # for Unix timestamp and process_time +calendar # for calendars +dateutil # extended datetime functionality, esp string parsing and delta calculation +##############################################################################""" + +# Today's Date +# --------------------------- +# Use datetime.date.today() +# datetime.date class has the following integer attributes, date(year, month, day) +# get day of the week using date.weekday() # Monday is 0 + +from datetime import date +todays_date = date.today() +print(todays_date) +print(todays_date.month, todays_date.day, todays_date.year) +print(todays_date.weekday()) + +# ISO format is a string format, yyyy-mm-dd +# --------------------------- +# date_object.isoformat() does the same thing as str(date_object) + +from datetime import date +todays_date = date.fromisoformat('2011-11-23') +print(todays_date) +print(str(todays_date)) +print(todays_date.isoformat()) +todays_date + +# Comparison, addition and sutraction of dates +# --------------------------- +# Comparison gives boolean result. Later date is greater than earlier date. +# Date addition & subtraction give result as a datetime.timedelta object (explained more below). +# The same comparison and add/subtract operations can be used with time objects. + +from datetime import date +todays_date = date.today() +d2 = date(2015, 5, 14) +print(todays_date > d2) +print(todays_date - d2) + +# Time +# --------------------------- +# time objects have the following attributes, time(hour, minute, second, microsecond, tzinfo) +# use datetime.time to compare time objects: t1 < t2 if t1 occurs before t2 +# attributes are all optional, so you can just work with hours and minutes if you want +# daylight savings is handled by the time.dst() function (if tzinfo is set) + +from datetime import time +t1 = time(14, 25, 36, 18625) +print(t1) + +t2 = time(2, 19) +print(t2) +print(t1 < t2) + +# datetime.datetime combines date and time attributes into a datetime object +# --------------------------- +# datetime.datetime(year, month, day, hour, minute, second, microsecond, tzinfo) +# datetime.datetime objects can be used as dictionary keys +# includes functions: date(), time() + +from datetime import datetime +dt1 = datetime(1941, 12, 7, 7, 53) +print(dt1) +print(dt1.date()) +print(dt1.time()) + +# Use datetime.datetime.now() to get the current date and time + +t3 = datetime.now() + +print(t3.time()) +print(t3.date()) +print(t3.hour, t3.minute) +print(str(t3.month) + '-' + str(t3.day) + '-' + str(t3.year)) + +# Use datetime.strftime() to get names of months and weekdays. + +t3 = datetime.now() +print(t3.strftime('%A')) +print(t3.strftime('%a, %A, %b, %B, %x')) + +# A timedelta is used for a duration, or the time difference between two dates or times +# --------------------------- +# datetime.timedelta(days, seconds, microseconds) +# A timedelta can also be multiplied or divided by an integer or float + +from datetime import timedelta, date, time +todays_date = date(2011, 6, 15) +d2 = date(2012, 9, 18) +td = d2 - todays_date +print(td, type(td)) +print(td.total_seconds()) +print(td * 3) + +from datetime import datetime +today = datetime.today().date() +my_event = date(2021, 11, 6) +days_to_event = abs(my_event - today) +print(days_to_event.days, 'days to event.') +print(days_to_event, 'days to event.') + +# Get a UNIX timestamp (time since the epoch) +# --------------------------- +# A timestamp is the time since Jan 1, 1970 in seconds +# Use time.time() to get timestamp +# Use datetime.fromtimestamp(ts) and datetime.timestamp(datetime_obj) to convert between timestamp and datetime +# Use time.process_time() to get runtime of an operation on your computer + +import time +ts = time.time() +print(ts) +print(time.ctime(ts)) + + +from datetime import datetime +now = datetime.fromtimestamp(ts) +print(now) +print(datetime.timestamp(now)) + +start_time = time.process_time() +# do some stuff +end_time = time.process_time() +print('operation executed in ', end_time - start_time) diff --git a/Happy Pi Day.ipynb b/Happy Pi Day.ipynb new file mode 100644 index 00000000..fd0c8e51 --- /dev/null +++ b/Happy Pi Day.ipynb @@ -0,0 +1,105 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Happy Pi Day!\n", + "\n", + "### Two Ways to Calculate Pi: \n", + "Real pi = 3.14159265359\n", + "\n", + "### 1. percentage of unit square random points that lie in unit circle\n", + "This method is only as good as our random number generator. And with number of iterations the accuracy improves, up to about 1 million." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.1419916\n" + ] + } + ], + "source": [ + "import random\n", + "in_square = in_circle = pi = 0\n", + "\n", + "for i in range(10000000):\n", + " x = random.random()\n", + " y = random.random()\n", + " dist = (x*x + y*y) ** 0.5\n", + "\n", + " in_square += 1\n", + " if dist <= 1.0:\n", + " in_circle += 1\n", + " \n", + "pi = 4 * in_circle / in_square\n", + "print(pi)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. using series addition\n", + "pi = 4/1 - 4/3 + 4/5 - 4/7 + 4/9 - 4/11 + 4/13 - 4/15 ... \n", + "This method is the more accurate of the two, and is faster. Its accuracy only depends on the size of n." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3.1415924535897797\n" + ] + } + ], + "source": [ + "pi = 0.0\n", + "for i in range(1, 10000000, 4):\n", + " pi += 4/i\n", + " pi -= 4/(i+2)\n", + "print(pi)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/HashMap.py b/HashMap.py index 445509c5..c5808e00 100644 --- a/HashMap.py +++ b/HashMap.py @@ -1,55 +1,62 @@ # Hash Map class HashMap: - def __init__(self): - self.size = 6 - self.map = [None] * self.size + def __init__(self): + self.size = 6 + self.map = [None] * self.size - def _get_hash(self, key): - hash = 0 - for char in str(key): - hash += ord(char) - return hash % self.size + def _get_hash(self, key): + hash = 0 + for char in str(key): + hash += ord(char) + return hash % self.size - def add(self, key, value): - key_hash = self._get_hash(key) - key_value = [key, value] + def add(self, key, value): + key_hash = self._get_hash(key) + key_value = [key, value] - if self.map[key_hash] is None: - self.map[key_hash] = list([key_value]) - return True - else: - for pair in self.map[key_hash]: - if pair[0] == key: - pair[1] = value - return True - self.map[key_hash].append(key_value) - return True + if self.map[key_hash] is None: + self.map[key_hash] = list([key_value]) + return True + else: + for pair in self.map[key_hash]: + if pair[0] == key: + pair[1] = value + return True + self.map[key_hash].append(key_value) + return True - def get(self, key): - key_hash = self._get_hash(key) - if self.map[key_hash] is not None: - for pair in self.map[key_hash]: - if pair[0] == key: - return pair[1] - return None + def get(self, key): + key_hash = self._get_hash(key) + if self.map[key_hash] is not None: + for pair in self.map[key_hash]: + if pair[0] == key: + return pair[1] + return None - def delete(self, key): - key_hash = self._get_hash(key) + def delete(self, key): + key_hash = self._get_hash(key) - if self.map[key_hash] is None: - return False - for i in range (0, len(self.map[key_hash])): - if self.map[key_hash][i][0] == key: - self.map[key_hash].pop(i) - return True - return False + if self.map[key_hash] is None: + return False + for i in range (0, len(self.map[key_hash])): + if self.map[key_hash][i][0] == key: + self.map[key_hash].pop(i) + return True + return False + + def keys(self): + arr = [] + for i in range(0, len(self.map)): + if self.map[i]: + arr.append(self.map[i][0]) + return arr - def print(self): - print('---PHONEBOOK----') - for item in self.map: - if item is not None: - print(str(item)) + def print(self): + print('---PHONEBOOK----') + for item in self.map: + if item is not None: + print(str(item)) h = HashMap() h.add('Bob', '567-8888') @@ -64,4 +71,4 @@ def print(self): h.delete('Bob') h.print() print('Ming: ' + h.get('Ming')) - \ No newline at end of file +print(h.keys()) diff --git a/Intro to Python Data Structures/Intro Python Data Structures.pptx b/Intro to Python Data Structures/Intro Python Data Structures.pptx new file mode 100644 index 00000000..0619f324 Binary files /dev/null and b/Intro to Python Data Structures/Intro Python Data Structures.pptx differ diff --git a/Intro to Python Data Structures/Python Data Structures.ipynb b/Intro to Python Data Structures/Python Data Structures.ipynb new file mode 100644 index 00000000..3b42851f --- /dev/null +++ b/Intro to Python Data Structures/Python Data Structures.ipynb @@ -0,0 +1,1096 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Intro to Python Data Structures\n", + "Lists, Tuples, Sets, Dicts \n", + "(c) 2019 Joe James \n", + "## Sequences: String, List, Tuple\n", + "****\n", + "**indexing** - access any item in the sequence using its index. \n", + "Indexing starts with 0 for the first element." + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "g\n", + "cow\n" + ] + } + ], + "source": [ + "# string\n", + "x = 'frog'\n", + "print (x[3])\n", + "\n", + "# list\n", + "x = ['pig', 'cow', 'horse']\n", + "print (x[1])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**slicing** - slice out substrings, sublists, subtuples using indexes. \n", + "[start : end+1 : step]" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "omp\n", + "opt\n", + "puter\n", + "compu\n", + "r\n", + "ter\n", + "comput\n" + ] + } + ], + "source": [ + "x = 'computer'\n", + "print(x[1:4])\n", + "print(x[1:6:2])\n", + "print(x[3:])\n", + "print(x[:5])\n", + "print(x[-1])\n", + "print(x[-3:])\n", + "print(x[:-2])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**adding / concatenating** - combine 2 sequences of the same type by using +" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "horseshoe\n", + "['pig', 'cow', 'horse']\n" + ] + } + ], + "source": [ + "# string\n", + "x = 'horse' + 'shoe'\n", + "print(x)\n", + "\n", + "# list\n", + "y = ['pig', 'cow'] + ['horse']\n", + "print(y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**multiplying** - multiply a sequence using *" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "bugbugbug\n", + "[8, 5, 8, 5, 8, 5]\n" + ] + } + ], + "source": [ + "# string\n", + "x = 'bug' * 3\n", + "print(x)\n", + "\n", + "# list\n", + "y = [8, 5] * 3\n", + "print(y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**checking membership** - test whether an item is or is not in a sequence." + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n", + "False\n" + ] + } + ], + "source": [ + "# string\n", + "x = 'bug'\n", + "print('u' in x)\n", + "\n", + "# list\n", + "y = ['pig', 'cow', 'horse']\n", + "print('cow' not in y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**iterating** - iterating through the items in a sequence" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "7\n", + "8\n", + "3\n", + "0 7\n", + "1 8\n", + "2 3\n" + ] + } + ], + "source": [ + "# item\n", + "x = [7, 8, 3]\n", + "for item in x:\n", + " print(item)\n", + " \n", + "# index & item\n", + "y = [7, 8, 3]\n", + "for index, item in enumerate(y):\n", + " print(index, item)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**number of items** - count the number of items in a sequence" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3\n", + "3\n" + ] + } + ], + "source": [ + "# string\n", + "x = 'bug'\n", + "print(len(x))\n", + "\n", + "# list\n", + "y = ['pig', 'cow', 'horse']\n", + "print(len(y))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**minimum** - find the minimum item in a sequence lexicographically. \n", + "Alpha or numeric types, but cannot mix types." + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "b\n", + "cow\n" + ] + } + ], + "source": [ + "# string\n", + "x = 'bug'\n", + "print(min(x))\n", + "\n", + "# list\n", + "y = ['pig', 'cow', 'horse']\n", + "print(min(y))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**maximum** - find the maximum item in a sequence lexicographically. \n", + "Alpha or numeric types, but cannot mix types." + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "u\n", + "pig\n" + ] + } + ], + "source": [ + "# string\n", + "x = 'bug'\n", + "print(max(x))\n", + "\n", + "# list\n", + "y = ['pig', 'cow', 'horse']\n", + "print(max(y))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**sum** - find the sum of items in a sequence. \n", + "Entire sequence must be numeric." + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "27\n", + "20\n" + ] + } + ], + "source": [ + "# string -> error\n", + "# x = [5, 7, 'bug']\n", + "# print(sum(x)) # generates an error\n", + "\n", + "# list\n", + "y = [2, 5, 8, 12]\n", + "print(sum(y))\n", + "print(sum(y[-2:]))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**sorting** - returns a new list of items in sorted order. \n", + "Does not change the original list." + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['b', 'g', 'u']\n", + "['cow', 'horse', 'pig']\n" + ] + } + ], + "source": [ + "# string\n", + "x = 'bug'\n", + "print(sorted(x))\n", + "\n", + "# list\n", + "y = ['pig', 'cow', 'horse']\n", + "print(sorted(y))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**count(item)** - returns count of an item" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "2\n" + ] + } + ], + "source": [ + "# string\n", + "x = 'hippo'\n", + "print(x.count('p'))\n", + "\n", + "# list\n", + "y = ['pig', 'cow', 'horse', 'cow']\n", + "print(y.count('cow'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**index(item)** - returns the index of the first occurence of an item." + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "1\n" + ] + } + ], + "source": [ + "# string\n", + "x = 'hippo'\n", + "print(x.index('p'))\n", + "\n", + "# list\n", + "y = ['pig', 'cow', 'horse', 'cow']\n", + "print(y.index('cow'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**unpacking** - unpack the n items of a sequence into n variables" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pig cow horse\n" + ] + } + ], + "source": [ + "x = ['pig', 'cow', 'horse']\n", + "a, b, c = x\n", + "print(a, b, c)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lists \n", + "****\n", + "- General purpose\n", + "- Most widely used data structure \n", + "- Grow and shrink size as needed\n", + "- Sequence type\n", + "- Sortable \n", + "\n", + "**constructors** - creating a new list" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0, 1, 2, 3, 4, 5, 6, 7]\n", + "[25, 36, 49, 64, 81]\n" + ] + } + ], + "source": [ + "x = list()\n", + "y = ['a', 25, 'dog', 8.43]\n", + "tuple1 = (10, 20)\n", + "z = list(tuple1)\n", + "\n", + "# list comprehension\n", + "a = [m for m in range(8)]\n", + "print(a)\n", + "b = [i**2 for i in range(10) if i>4]\n", + "print(b)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**delete** - delete a list or an item in a list" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5, 8, 6]\n" + ] + } + ], + "source": [ + "x = [5, 3, 8, 6]\n", + "del(x[1])\n", + "print(x)\n", + "del(x) # list x no longer exists" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**append** - append an item to a list" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5, 3, 8, 6, 7]\n" + ] + } + ], + "source": [ + "x = [5, 3, 8, 6]\n", + "x.append(7)\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**extend** - append a sequence to a list" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5, 3, 8, 6, 12, 13]\n" + ] + } + ], + "source": [ + "x = [5, 3, 8, 6]\n", + "y = [12, 13]\n", + "x.extend(y)\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**insert** - insert an item at a given index" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5, 7, 3, 8, 6]\n", + "[5, ['a', 'm'], 7, 3, 8, 6]\n" + ] + } + ], + "source": [ + "x = [5, 3, 8, 6]\n", + "x.insert(1, 7)\n", + "print(x)\n", + "x.insert(1, ['a', 'm'])\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**pop** - pops last item off list and returns item" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5, 3, 8]\n", + "8\n" + ] + } + ], + "source": [ + "x = [5, 3, 8, 6]\n", + "x.pop() # pop off the 6\n", + "print(x)\n", + "print(x.pop())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**remove** - remove first instance of an item" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5, 8, 6, 3]\n" + ] + } + ], + "source": [ + "x = [5, 3, 8, 6, 3]\n", + "x.remove(3)\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**reverse** - reverse the order of the list. It is an in-place sort, meaning it changes the original list." + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[6, 8, 3, 5]\n" + ] + } + ], + "source": [ + "x = [5, 3, 8, 6]\n", + "x.reverse()\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**sort** - sort the list in place. \n", + "Note: \n", + "sorted(x) returns a new sorted list without changing the original list x. \n", + "x.sort() puts the items of x in sorted order (sorts in place)." + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[3, 5, 6, 8]\n" + ] + } + ], + "source": [ + "x = [5, 3, 8, 6]\n", + "x.sort()\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tuples\n", + "****\n", + "- Immutable (can’t add/change)\n", + "- Useful for fixed data\n", + "- Faster than Lists\n", + "- Sequence type \n", + " \n", + "**constructors** - creating new tuples." + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(2,) \n", + "(2, 4, 6) \n" + ] + } + ], + "source": [ + "x = ()\n", + "x = (1, 2, 3)\n", + "x = 1, 2, 3\n", + "x = 2, # the comma tells Python it's a tuple\n", + "print(x, type(x))\n", + "\n", + "list1 = [2, 4, 6]\n", + "x = tuple(list1)\n", + "print(x, type(x))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**tuples are immutable**, but member objects may be mutable." + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1, 2, 3)\n", + "([1], 3)\n" + ] + } + ], + "source": [ + "x = (1, 2, 3)\n", + "# del(x[1]) # fails\n", + "# x[1] = 8 # fails\n", + "print(x)\n", + "\n", + "y = ([1, 2], 3) # a tuple where the first item is a list\n", + "del(y[0][1]) # delete the 2\n", + "print(y) # the list within the tuple is mutable" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Sets\n", + "****\n", + "- Store non-duplicate items \n", + "- Very fast access vs Lists \n", + "- Math Set ops (union, intersect) \n", + "- Sets are Unordered \n", + " \n", + "**constructors** - creating new sets" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{3, 5}\n", + "set()\n", + "{2, 3, 4}\n" + ] + } + ], + "source": [ + "x = {3, 5, 3, 5}\n", + "print(x)\n", + "\n", + "y = set()\n", + "print(y)\n", + "\n", + "list1 = [2, 3, 4]\n", + "z = set(list1)\n", + "print(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**set operations**" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{8, 3, 5}\n", + "{8, 3, 5, 7}\n", + "{8, 5, 7}\n", + "3\n", + "True\n", + "8 {5, 7}\n", + "set()\n" + ] + } + ], + "source": [ + "x = {3, 8, 5}\n", + "print(x)\n", + "x.add(7)\n", + "print(x)\n", + "\n", + "x.remove(3)\n", + "print(x)\n", + "\n", + "# get length of set x\n", + "print(len(x))\n", + "\n", + "# check membership in x\n", + "print(5 in x)\n", + "\n", + "# pop random item from set x\n", + "print(x.pop(), x)\n", + "\n", + "# delete all items from set x\n", + "x.clear()\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Mathematical set operations** \n", + "intersection (AND): set1 & set2 \n", + "union (OR): set1 | set1 \n", + "symmetric difference (XOR): set1 ^ set2\n", + "difference (in set1 but not set2): set1 - set2 \n", + "subset (set2 contains set1): set1 <= set2 \n", + "superset (set1 contains set2): set1 >= set2" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{3}\n", + "{1, 2, 3, 4, 5}\n", + "{1, 2, 4, 5}\n", + "{1, 2}\n", + "False\n", + "False\n" + ] + } + ], + "source": [ + "s1 = {1, 2, 3}\n", + "s2 = {3, 4, 5}\n", + "print(s1 & s2)\n", + "print(s1 | s2)\n", + "print(s1 ^ s2)\n", + "print(s1 - s2)\n", + "print(s1 <= s2)\n", + "print(s1 >= s2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dictionaries (dict)\n", + "****\n", + "- Key/Value pairs\n", + "- Associative array, like Java HashMap\n", + "- Dicts are Unordered" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'pork': 25.3, 'beef': 33.8, 'chicken': 22.7}\n", + "{'pork': 25.3, 'beef': 33.8, 'chicken': 22.7}\n", + "{'pork': 25.3, 'beef': 33.8, 'chicken': 22.7}\n" + ] + } + ], + "source": [ + "x = {'pork':25.3, 'beef':33.8, 'chicken':22.7}\n", + "print(x)\n", + "x = dict([('pork', 25.3),('beef', 33.8),('chicken', 22.7)])\n", + "print(x)\n", + "x = dict(pork=25.3, beef=33.8, chicken=22.7)\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**dict operations**" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'pork': 25.3, 'beef': 33.8, 'chicken': 22.7, 'shrimp': 38.2}\n", + "{'pork': 25.3, 'beef': 33.8, 'chicken': 22.7}\n", + "3\n", + "{}\n" + ] + } + ], + "source": [ + "x['shrimp'] = 38.2 # add or update\n", + "print(x)\n", + "\n", + "# delete an item\n", + "del(x['shrimp'])\n", + "print(x)\n", + "\n", + "# get length of dict x\n", + "print(len(x))\n", + "\n", + "# delete all items from dict x\n", + "x.clear()\n", + "print(x)\n", + "\n", + "# delete dict x\n", + "del(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**accessing keys and values in a dict**" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "dict_keys(['pork', 'beef', 'chicken'])\n", + "dict_values([25.3, 33.8, 22.7])\n", + "dict_items([('pork', 25.3), ('beef', 33.8), ('chicken', 22.7)])\n", + "True\n", + "False\n" + ] + } + ], + "source": [ + "y = {'pork':25.3, 'beef':33.8, 'chicken':22.7}\n", + "print(y.keys())\n", + "print(y.values())\n", + "print(y.items()) # key-value pairs\n", + "\n", + "# check membership in y_keys (only looks in keys, not values)\n", + "print('beef' in y)\n", + "\n", + "# check membership in y_values\n", + "print('clams' in y.values())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**iterating a dict - note, items are in random order**" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "pork 25.3\n", + "beef 33.8\n", + "chicken 22.7\n", + "pork 25.3\n", + "beef 33.8\n", + "chicken 22.7\n" + ] + } + ], + "source": [ + "for key in y:\n", + " print(key, y[key])\n", + " \n", + "for k, v in y.items():\n", + " print(k, v)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Intro to Python Data Structures/Some Basic Terminologies in Python Data Structure b/Intro to Python Data Structures/Some Basic Terminologies in Python Data Structure new file mode 100644 index 00000000..754c07a5 --- /dev/null +++ b/Intro to Python Data Structures/Some Basic Terminologies in Python Data Structure @@ -0,0 +1,31 @@ +##Data Structure Overview +Data structures are fundamental concepts of computer science which helps is writing efficient programs in any language. Python is a high-level, interpreted, interactive and object-oriented scripting language using which we can study the fundamentals of data structure in a simpler way as compared to other programming languages. + +In this chapter we are going to study a short overview of some frequently used data structures in general and how they are related to some specific python data types. There are also some data structures specific to python which is listed as another category. + +##General Data Structures +The various data structures in computer science are divided broadly into two categories shown below. We will discuss about each of the below data structures in detail in subsequent chapters. + +#Liner Data Structures +These are the data structures which store the data elements in a sequential manner. + +Array: It is a sequential arrangement of data elements paired with the index of the data element. +Linked List: Each data element contains a link to another element along with the data present in it. +Stack: It is a data structure which follows only to specific order of operation. LIFO(last in First Out) or FILO(First in Last Out). +Queue: It is similar to Stack but the order of operation is only FIFO(First In First Out). +Matrix: It is two dimensional data structure in which the data element is referred by a pair of indices. + +#Non-Liner Data Structures +These are the data structures in which there is no sequential linking of data elements. Any pair or group of data elements can be linked to each other and can be accessed without a strict sequence. + +Binary Tree: It is a data structure where each data element can be connected to maximum two other data elements and it starts with a root node. +Heap: It is a special case of Tree data structure where the data in the parent node is either strictly greater than/ equal to the child nodes or strictly less than it’s child nodes. +Hash Table: It is a data structure which is made of arrays associated with each other using a hash function. It retrieves values using keys rather than index from a data element. +Graph: .It is an arrangement of vertices and nodes where some of the nodes are connected to each other through links. + +#Python Specific Data Structures +These data structures are specific to python language and they give greater flexibility in storing different types of data and faster processing in python environment. + +List: It is similar to array with the exception that the data elements can be of different data types. You can have both numeric and string data in a python list. +Tuple: Tuples are similar to lists but they are immutable which means the values in a tuple cannot be modified they can only be read. +Dictionary: The dictionary contains Key-value pairs as its data elements. diff --git a/Iris Dataset/Iris_Dataset.ipynb b/Iris Dataset/Iris_Dataset.ipynb new file mode 100644 index 00000000..398f454e --- /dev/null +++ b/Iris Dataset/Iris_Dataset.ipynb @@ -0,0 +1,1174 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Data Science\n", + "### Exploring the Iris Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load Data\n", + "Load the data from CSV file into a Pandas dataframe, and print the top few rows." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsepal_lengthsepal_widthpetal_lengthpetal_widthspecies
005.13.51.40.2Iris-setosa
114.93.01.40.2Iris-setosa
224.73.21.30.2Iris-setosa
334.63.11.50.2Iris-setosa
445.03.61.40.2Iris-setosa
\n", + "
" + ], + "text/plain": [ + " id sepal_length sepal_width petal_length petal_width species\n", + "0 0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 1 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 2 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 3 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 4 5.0 3.6 1.4 0.2 Iris-setosa" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('iris.data')\n", + "data.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Customize columns\n", + "Drop the redundant id column, and rename Attribute columns to integers. Save column names for use later." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123species
05.13.51.40.2Iris-setosa
507.03.24.71.4Iris-versicolor
1006.33.36.02.5Iris-virginica
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 species\n", + "0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "50 7.0 3.2 4.7 1.4 Iris-versicolor\n", + "100 6.3 3.3 6.0 2.5 Iris-virginica" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = data.drop('id', 1)\n", + "cols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']\n", + "data.rename(columns = {cols[0]:0, cols[1]:1, cols[2]:2, cols[3]:3}, inplace=True)\n", + "data.loc[::50]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Statistical Overview\n", + "Show shape of dataframe and statistical overview of attribute columns." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(150, 5)\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123
count150.000000150.000000150.000000150.000000
mean5.8433333.0540003.7586671.198667
std0.8280660.4335941.7644200.763161
min4.3000002.0000001.0000000.100000
25%5.1000002.8000001.6000000.300000
50%5.8000003.0000004.3500001.300000
75%6.4000003.3000005.1000001.800000
max7.9000004.4000006.9000002.500000
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3\n", + "count 150.000000 150.000000 150.000000 150.000000\n", + "mean 5.843333 3.054000 3.758667 1.198667\n", + "std 0.828066 0.433594 1.764420 0.763161\n", + "min 4.300000 2.000000 1.000000 0.100000\n", + "25% 5.100000 2.800000 1.600000 0.300000\n", + "50% 5.800000 3.000000 4.350000 1.300000\n", + "75% 6.400000 3.300000 5.100000 1.800000\n", + "max 7.900000 4.400000 6.900000 2.500000" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(data.shape)\n", + "data.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Iris-virginica 50\n", + "Iris-setosa 50\n", + "Iris-versicolor 50\n", + "Name: species, dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# same as data['species'].value_counts()\n", + "data.species.value_counts()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Histograms\n", + "Histograms are useful for showing how the data is distributed. They're ridiculously easy to use, but can only show two axes." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([ 9., 23., 14., 27., 16., 26., 18., 6., 5., 6.]),\n", + " array([4.3 , 4.66, 5.02, 5.38, 5.74, 6.1 , 6.46, 6.82, 7.18, 7.54, 7.9 ]),\n", + " )" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXIAAAD4CAYAAADxeG0DAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAANfUlEQVR4nO3cf4xld13G8fdjFxQKgeJOai2FIaQhqYmUuqlFCKkWSGlNC5GYNhFbAtmqVEFJzMofSvyrJvww/gi40EpVqGBppdKCNJWEkGjjtFS6bSXUskDrtjtApKBGXPj4x5zicJmZezv3ztz7Wd6vZDLnnvO99zz7zeSZM2fPOakqJEl9/dC8A0iSpmORS1JzFrkkNWeRS1JzFrkkNbdnN3e2d+/eWl5e3s1dSlJ7d9xxx1eqammz7bta5MvLy6ysrOzmLiWpvSRf3Gq7p1YkqTmLXJKas8glqTmLXJKas8glqTmLXJKas8glqTmLXJKas8glqbldvbNTPSwfuHlu+z581YVz27fUlUfkktScRS5JzVnkktScRS5JzVnkktScRS5JzXn5oYSXXKo3j8glqTmLXJKas8glqTmLXJKaG1vkSU5L8skk9ya5J8kbh/VvTfJQkruGrwt2Pq4kadQkV60cA95cVXcmeSpwR5Jbh23vrKq37Vw8SdI4Y4u8qo4AR4blbyS5Dzh1p4NJkibzuM6RJ1kGXgDcPqy6Mslnk1yT5KRN3rM/yUqSldXV1anCSpK+38RFnuQpwIeBN1XVo8C7gOcCZ7J2xP72jd5XVQeral9V7VtaWppBZEnSehMVeZInsFbi76+qGwCq6pGq+nZVfQd4D3D2zsWUJG1mkqtWAlwN3FdV71i3/pR1w14FHJp9PEnSOJNctfIi4DXA3UnuGta9Bbg0yZlAAYeBK3YkoSRpS5NctfJpIBtsumX2cSRJj5d3dkpScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtSc5Pcov8Db/nAzXPZ7+GrLpzLfiX14hG5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDU3tsiTnJbkk0nuTXJPkjcO65+R5NYknx++n7TzcSVJoyY5Ij8GvLmqzgDOAd6Q5AzgAHBbVZ0O3Da8liTtsrFFXlVHqurOYfkbwH3AqcDFwLXDsGuBV+5USEnS5h7XOfIky8ALgNuBk6vqyLDpYeDkTd6zP8lKkpXV1dUpokqSNjJxkSd5CvBh4E1V9ej6bVVVQG30vqo6WFX7qmrf0tLSVGElSd9voiJP8gTWSvz9VXXDsPqRJKcM208Bju5MREnSVia5aiXA1cB9VfWOdZtuAi4bli8DPjL7eJKkcfZMMOZFwGuAu5PcNax7C3AV8KEkrwO+CPzizkSUJG1lbJFX1aeBbLL5vNnGkSQ9Xt7ZKUnNWeSS1JxFLknNWeSS1JxFLknNWeSS1JxFLknNWeSS1JxFLknNWeSS1Nwkz1qRtIOWD9w8l/0evurCuexXs+cRuSQ1Z5FLUnMWuSQ1Z5FLUnMWuSQ1Z5FLUnMWuSQ1Z5FLUnMWuSQ1Z5FLUnMWuSQ1Z5FLUnMWuSQ1Z5FLUnM+xlYLZV6PdJU684hckpqzyCWpOYtckpqzyCWpubFFnuSaJEeTHFq37q1JHkpy1/B1wc7GlCRtZpIj8vcB52+w/p1VdebwdctsY0mSJjW2yKvqU8DXdiGLJGkbpjlHfmWSzw6nXk7abFCS/UlWkqysrq5OsTtJ0ka2W+TvAp4LnAkcAd6+2cCqOlhV+6pq39LS0jZ3J0nazLaKvKoeqapvV9V3gPcAZ882liRpUtsq8iSnrHv5KuDQZmMlSTtr7LNWklwHnAvsTfIg8HvAuUnOBAo4DFyxgxklSVsYW+RVdekGq6/egSySpG3wzk5Jas7H2C4wH+kqaRIekUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtScxa5JDVnkUtSc2OLPMk1SY4mObRu3TOS3Jrk88P3k3Y2piRpM5Mckb8POH9k3QHgtqo6HbhteC1JmoOxRV5VnwK+NrL6YuDaYfla4JUzziVJmtB2z5GfXFVHhuWHgZM3G5hkf5KVJCurq6vb3J0kaTNT/2dnVRVQW2w/WFX7qmrf0tLStLuTJI3YbpE/kuQUgOH70dlFkiQ9Htst8puAy4bly4CPzCaOJOnxmuTyw+uAfwSel+TBJK8DrgJeluTzwEuH15KkOdgzbkBVXbrJpvNmnEWStA3e2SlJzVnkktTc2FMri2L5wM3zjiBJC8kjcklqziKXpOYscklqziKXpOYscklqziKXpOYscklqziKXpOYscklqziKXpOYscklqziKXpOYscklqziKXpOYscklqziKXpOYscklqziKXpOYscklqziKXpOYscklqziKXpOb2zDuApPlYPnDz3PZ9+KoL57Lf4/Xf7BG5JDVnkUtScxa5JDU31TnyJIeBbwDfBo5V1b5ZhJIkTW4W/9n5s1X1lRl8jiRpGzy1IknNTXtEXsAnkhTwZ1V1cHRAkv3AfoBnPetZU+5O0vFgnpcBHo+mPSJ/cVWdBbwCeEOSl4wOqKqDVbWvqvYtLS1NuTtJ0qipiryqHhq+HwVuBM6eRShJ0uS2XeRJTkzy1MeWgZcDh2YVTJI0mWnOkZ8M3Jjksc/5QFV9fCapJEkT23aRV9UDwPNnmEWStA1efihJzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktScRS5JzVnkktTcVEWe5Pwkn0tyf5IDswolSZrctos8yQnAnwKvAM4ALk1yxqyCSZImM80R+dnA/VX1QFV9C/hr4OLZxJIkTWrPFO89FfjyutcPAj89OijJfmD/8PKbST43xT6ntRf4yhz3P6kuOaFPVnPOVpecsCBZ8wdjh2yV89lbvXGaIp9IVR0EDu70fiaRZKWq9s07xzhdckKfrOacrS45oU/WaXJOc2rlIeC0da+fOayTJO2iaYr8n4HTkzwnyROBS4CbZhNLkjSpbZ9aqapjSa4E/h44Abimqu6ZWbKdsRCneCbQJSf0yWrO2eqSE/pk3XbOVNUsg0iSdpl3dkpScxa5JDV33BZ5khOSfCbJRzfYdnmS1SR3DV+vn1PGw0nuHjKsbLA9Sf5oeATCZ5OctaA5z03y9XXz+bvzyDlkeXqS65P8a5L7krxwZPuizOm4nHOf0yTPW7f/u5I8muRNI2MWZT4nyTr3OR1y/GaSe5IcSnJdkh8Z2f7DST44zOntSZbHfmhVHZdfwG8BHwA+usG2y4E/WYCMh4G9W2y/APgYEOAc4PYFzXnuRvM8p6zXAq8flp8IPH1B53RczoWZ0yHPCcDDwLMXcT4nzDr3OWXtRsovAE8aXn8IuHxkzK8B7x6WLwE+OO5zj8sj8iTPBC4E3jvvLFO6GPiLWvNPwNOTnDLvUIsqydOAlwBXA1TVt6rqP0aGzX1OJ8y5aM4D/q2qvjiyfu7zuYHNsi6KPcCTkuwBngz8+8j2i1n7RQ9wPXBekmz1gcdlkQN/CPw28J0txvzC8Kfg9UlO22LcTirgE0nuGB5lMGqjxyCcuivJvte4nAAvTPIvST6W5Cd2M9w6zwFWgT8fTqu9N8mJI2MWYU4nyQmLMaePuQS4boP1izCfozbLCnOe06p6CHgb8CXgCPD1qvrEyLDvzmlVHQO+DvzoVp973BV5kp8HjlbVHVsM+ztguap+EriV///tt9teXFVnsfYEyTckecmccowzLuedrP0Z+3zgj4G/3e2Agz3AWcC7quoFwH8Ci/h45UlyLsqcMtzwdxHwN/PKMKkxWec+p0lOYu2I+znAjwMnJvmlaT/3uCty4EXARUkOs/ZExp9L8lfrB1TVV6vqf4aX7wV+ancjfjfHQ8P3o8CNrD1Rcr2FeAzCuJxV9WhVfXNYvgV4QpK9u52TtaPBB6vq9uH19awV5nqLMKdjcy7QnMLaL/A7q+qRDbYtwnyut2nWBZnTlwJfqKrVqvpf4AbgZ0bGfHdOh9MvTwO+utWHHndFXlW/U1XPrKpl1v7E+oeq+p7feCPn8C4C7tvFiI9lODHJUx9bBl4OHBoZdhPwy8OVAeew9mfYkUXLmeTHHjuHl+Rs1n6utvzB2wlV9TDw5STPG1adB9w7MmzuczpJzkWZ08GlbH6qYu7zOWLTrAsyp18Czkny5CHLeXx//9wEXDYsv5q1Dtvyzs0df/rhokjy+8BKVd0E/EaSi4BjwNdYu4plt50M3Dj8XO0BPlBVH0/yKwBV9W7gFtauCrgf+C/gtQua89XAryY5Bvw3cMm4H7wd9OvA+4c/sR8AXruAczpJzoWY0+GX98uAK9atW8T5nCTr3Oe0qm5Pcj1rp3mOAZ8BDo7009XAXya5n7V+umTc53qLviQ1d9ydWpGkHzQWuSQ1Z5FLUnMWuSQ1Z5FLUnMWuSQ1Z5FLUnP/B6ELdCq81O9RAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist(data[0])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we give 4 columns of data to the Histogram maker, and it automatically color codes them." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([[ 0., 0., 0., 0., 0., 11., 48., 49., 31., 11.],\n", + " [ 0., 0., 11., 97., 38., 4., 0., 0., 0., 0.],\n", + " [ 0., 44., 6., 1., 10., 34., 30., 20., 5., 0.],\n", + " [50., 52., 45., 3., 0., 0., 0., 0., 0., 0.]]),\n", + " array([0.1 , 0.88, 1.66, 2.44, 3.22, 4. , 4.78, 5.56, 6.34, 7.12, 7.9 ]),\n", + " )" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD4CAYAAAAXUaZHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAOZElEQVR4nO3db4xldX3H8fdHFoJg5Y9MNusudDaR8CeaFjKhWBpjWG1ACPCAEEhLt4Zm+wAtSBNdfYJ9honxz4PGZMOia0rR7YqBCFEJYqwP3Dq70CAs1i3yZ7cLO0ZBsU2Q+u2DOdRhnGHn3jMz9+6P9yvZzD2/e+65H5adz/72d849k6pCktSWN406gCRp+VnuktQgy12SGmS5S1KDLHdJatCaUQcAOO2002pycnLUMSTpqLJnz56fVdXEQs8dsdyT3AFcDhyuqnd2Y6cCXwUmgaeAa6rqF0kCfB74APDfwF9X1d4jvcfk5CTT09NL+6+RJAGQ5OnFnlvKssyXgEvmjW0FHqyqM4EHu22AS4Ezu19bgC8MGlaS1N8Ry72qvgf8fN7wlcCO7vEO4Ko541+uWT8ATk6ybrnCSpKWZtgTqmur6lD3+Dlgbfd4PfDsnP0OdGO/J8mWJNNJpmdmZoaMIUlaSO+rZWr2/gUD38OgqrZV1VRVTU1MLHg+QJI0pGHL/flXl1u6r4e78YPA6XP229CNSZJW0bDlfi+wuXu8GbhnzvhfZdaFwItzlm8kSatkKZdC3gW8FzgtyQHgVuA2YGeSG4CngWu63e9n9jLI/cxeCvnBFcgsSTqCI5Z7VV23yFObFti3gBv7hpIk9ePtBySpQWNx+wE15JMnLTL+4urmkN7gnLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGtSr3JN8JMljSX6U5K4kxyfZmGR3kv1JvprkuOUKK0lamqHLPcl64O+Aqap6J3AMcC3wKeCzVfUO4BfADcsRVJK0dH2XZdYAb06yBjgBOARcDOzqnt8BXNXzPSRJAxq63KvqIPBp4BlmS/1FYA/wQlW90u12AFi/0OuTbEkynWR6ZmZm2BiSpAX0WZY5BbgS2Ai8HTgRuGSpr6+qbVU1VVVTExMTw8aQJC2gz7LM+4CfVtVMVf0GuBu4CDi5W6YB2AAc7JlRkjSgPuX+DHBhkhOSBNgEPA48BFzd7bMZuKdfREnSoPqsue9m9sTpXuDR7ljbgI8BtyTZD7wN2L4MOSVJA1hz5F0WV1W3ArfOG34SuKDPcSVJ/fgJVUlqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KDLHdJapDlLkkN6lXuSU5OsivJE0n2JXl3klOTPJDkJ93XU5YrrCRpafrO3D8PfLOqzgb+CNgHbAUerKozgQe7bUnSKhq63JOcBLwH2A5QVS9X1QvAlcCObrcdwFV9Q0qSBtNn5r4RmAG+mOThJLcnORFYW1WHun2eA9Yu9OIkW5JMJ5memZnpEUOSNF+fcl8DnA98oarOA37NvCWYqiqgFnpxVW2rqqmqmpqYmOgRQ5I0X59yPwAcqKrd3fYuZsv++STrALqvh/tFlCQNauhyr6rngGeTnNUNbQIeB+4FNndjm4F7eiWUJA1sTc/Xfxi4M8lxwJPAB5n9C2NnkhuAp4Frer6HJGlAvcq9qh4BphZ4alOf40qS+vETqpLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoP63jhMy2Tf2ecsOH7OE/tWOYmkFjhzl6QGWe6S1CDLXZIa5Jr7HK57S2qFM3dJapAzd0lHNLn1vgXHn7rtslVOoqVy5i5JDXLmLh0lnD1rEM7cJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoO8FFLedkFqkDN3SWqQM/dV9q4d71pwfOcq55DUNmfuktQgy12SGmS5S1KDLHdJalDvck9yTJKHk3yj296YZHeS/Um+muS4/jElSYNYjpn7TcDcC6I/BXy2qt4B/AK4YRneQ5I0gF7lnmQDcBlwe7cd4GJgV7fLDuCqPu8hSRpc35n754CPAr/ttt8GvFBVr3TbB4D1C70wyZYk00mmZ2ZmesaQJM01dLknuRw4XFV7hnl9VW2rqqmqmpqYmBg2hiRpAX0+oXoRcEWSDwDHA28FPg+cnGRNN3vfABzsH1OSNIihZ+5V9fGq2lBVk8C1wHeq6i+Ah4Cru902A/f0TilJGshKXOf+MeCWJPuZXYPfvgLvIUl6Hcty47Cq+i7w3e7xk8AFy3FcaSCfPGmR8RdXN4c0BvyEqiQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDfIHZEsraLEfiP7o5kdXOYneaJy5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAZ5KaSksTa59b4Fx5+67bJVTnJ0ceYuSQ1y5i41yg9QvbE5c5ekBlnuktQgy12SGmS5S1KDLHdJapDlLkkNstwlqUGWuyQ1yHKXpAZZ7pLUIMtdkhpkuUtSgyx3SWqQ5S5JDbLcJalBlrskNchyl6QGDV3uSU5P8lCSx5M8luSmbvzUJA8k+Un39ZTliytJWoo+M/dXgL+vqnOBC4Ebk5wLbAUerKozgQe7bUnSKhq63KvqUFXt7R7/CtgHrAeuBHZ0u+0AruobUpI0mGX5AdlJJoHzgN3A2qo61D31HLB2kddsAbYAnHHGGcsRQ9KY8Idzj17vE6pJ3gJ8Dbi5qn4597mqKqAWel1VbauqqaqampiY6BtDkjRHr3JPciyzxX5nVd3dDT+fZF33/DrgcL+IkqRB9blaJsB2YF9VfWbOU/cCm7vHm4F7ho8nSRpGnzX3i4DrgUeTPNKNfQK4DdiZ5AbgaeCafhElSYMautyr6vtAFnl607DHlST15ydUJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoMsd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGrQsP2ZPY+aTJy0y/uLq5mjQ5Nb7Fhx/6rbLVjmJVsPR/P/bmbskNciZu5rnD2vWG5Ezd0lqkOUuSQ2y3CWpQZa7JDXIcpekBlnuktQgy12SGmS5S1KD/BDTG8hiH+bZuco5JK08Z+6S1CBn7pKa4u0mZjlzl6QGOXOXlsNit1neeMbq5pA6ztwlqUHO3DVS+84+Z8Hxc57Yt8pJpLY4c5ekBlnuktQgy12SGrQi5Z7kkiQ/TrI/ydaVeA9J0uKW/YRqkmOAfwTeDxwAfpjk3qp6fLnfS0cPb30gra6VmLlfAOyvqier6mXgK8CVK/A+kqRFpKqW94DJ1cAlVfU33fb1wJ9U1Yfm7bcF2NJtngX8+HUOexrws2UNurzM18845xvnbGC+vo72fH9YVRMLPTGy69yrahuwbSn7JpmuqqkVjjQ08/UzzvnGORuYr6+W863EssxB4PQ52xu6MUnSKlmJcv8hcGaSjUmOA64F7l2B95EkLWLZl2Wq6pUkHwK+BRwD3FFVj/U87JKWb0bIfP2Mc75xzgbm66vZfMt+QlWSNHp+QlWSGmS5S1KDxr7cx/lWBknuSHI4yY9GnWW+JKcneSjJ40keS3LTqDPNleT4JP+W5N+7fP8w6kwLSXJMkoeTfGPUWeZL8lSSR5M8kmR61HnmS3Jykl1JnkiyL8m7R53pVUnO6n7fXv31yyQ3jzrXq5J8pPu++FGSu5IcP/AxxnnNvbuVwX8w51YGwHXjciuDJO8BXgK+XFXvHHWeuZKsA9ZV1d4kfwDsAa4ao9+7ACdW1UtJjgW+D9xUVT8YcbTXSHILMAW8taouH3WeuZI8BUxV1Vh+CCfJDuBfq+r27sq5E6rqhVHnmq/rmYPMftjy6THIs57Z74dzq+p/kuwE7q+qLw1ynHGfuY/1rQyq6nvAz0edYyFVdaiq9naPfwXsA9aPNtXv1KyXus1ju19jNdNIsgG4DLh91FmONklOAt4DbAeoqpfHsdg7m4D/HIdin2MN8OYka4ATgP8a9ADjXu7rgWfnbB9gjArqaJFkEjgP2D3aJK/VLXk8AhwGHqiqscoHfA74KPDbUQdZRAHfTrKnu53HONkIzABf7Ja1bk9y4qhDLeJa4K5Rh3hVVR0EPg08AxwCXqyqbw96nHEvd/WU5C3A14Cbq+qXo84zV1X9b1X9MbOfYr4gydgsbSW5HDhcVXtGneV1/FlVnQ9cCtzYLROOizXA+cAXquo84NfAWJ0zA+iWi64A/mXUWV6V5BRmVyg2Am8HTkzyl4MeZ9zL3VsZ9NCtZX8NuLOq7h51nsV0/1x/CLhk1FnmuAi4olvX/gpwcZJ/Gm2k1+pmeFTVYeDrzC5jjosDwIE5/xrbxWzZj5tLgb1V9fyog8zxPuCnVTVTVb8B7gb+dNCDjHu5eyuDIXUnLLcD+6rqM6POM1+SiSQnd4/fzOxJ8ydGm+p3qurjVbWhqiaZ/XP3naoaePa0UpKc2J0op1vu+HNgbK7aqqrngGeTnNUNbQLG4mT+PNcxRksynWeAC5Oc0H0fb2L2nNlARnZXyKVYoVsZLJskdwHvBU5LcgC4taq2jzbV/7sIuB54tFvXBvhEVd0/wkxzrQN2dFcqvAnYWVVjd7nhGFsLfH32e581wD9X1TdHG+n3fBi4s5uYPQl8cMR5XqP7S/H9wN+OOstcVbU7yS5gL/AK8DBD3IZgrC+FlCQNZ9yXZSRJQ7DcJalBlrskNchyl6QGWe6S1CDLXZIaZLlLUoP+D78BmV6m0W9NAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist([data[0], data[1], data[2], data[3]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To add a Legend we need to add labels to the Histogram builder as a list of column names, and call the legend function." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist([data[0], data[1], data[2], data[3]], label=[cols[0],cols[1],cols[2],cols[3]])\n", + "plt.legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Or we can make 4 separate calls to the Histogram builder and get 4 overlapping plots." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([41., 8., 1., 7., 8., 33., 6., 23., 9., 14.]),\n", + " array([0.1 , 0.34, 0.58, 0.82, 1.06, 1.3 , 1.54, 1.78, 2.02, 2.26, 2.5 ]),\n", + " )" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQl0lEQVR4nO3df6zddX3H8efLggMRAccdqRRXokZKbCzmDnUY40AMClFMjIFspllI6hJdYJo58B8x2WJNVNwfi0kFpMsQ7fgRDDInQQwj2dBbqLRwcSJWba30GuXXsugK7/1xv5VLubf39Nxz7jkf+nwkNz3ne77ne14p7YtPP9/v53xTVUiS2vOSUQeQJPXHApekRlngktQoC1ySGmWBS1KjjljODzvxxBNr9erVy/mRktS8rVu3/qqqJg7cvqwFvnr1aqamppbzIyWpeUl+Ot92p1AkqVEWuCQ1ygKXpEZZ4JLUKAtckhplgUtSoyxwSWqUBS5JjbLAJalRy7oScymmT1vT035rHp4echJJGg+OwCWpUT0XeJIVSe5Pclv3/NQk9yZ5JMnXk7x0eDElSQc6lBH4pcDc+YnPAldV1WuB3wCXDDKYJOngeirwJKuA84Gru+cBzgZu7HbZDFw4jICSpPn1OgL/IvAJ4Nnu+R8Cj1fVvu75LuDk+d6YZEOSqSRTMzMzSworSXrOogWe5AJgb1Vt7ecDqmpTVU1W1eTExAu+j1yS1KdeLiM8C3hvkvcARwGvAP4ROD7JEd0ofBWwe3gxJUkHWnQEXlVXVNWqqloNXAR8p6r+HLgL+EC323rg1qGllCS9wFKuA/874GNJHmF2TvyawUSSJPXikFZiVtV3ge92jx8Fzhx8JElSL1yJKUmNssAlqVEWuCQ1ygKXpEY183WyatyVx/WwzxPDzyG9iDgCl6RGWeCS1CgLXJIaZYFLUqM8idmHtZvXDu3Y29dvH9qxJb24OAKXpEZZ4JLUKAtckhplgUtSoyxwSWqUBS5JjerlpsZHJflekh8keTDJp7vt1yX5SZJt3c+64ceVJO3Xy3XgvwXOrqqnkxwJ3JPk37rX/raqbhxePEnSQhYt8Koq4Onu6ZHdTw0zlCRpcT3NgSdZkWQbsBe4o6ru7V76hyQPJLkqyR8s8N4NSaaSTM3MzAwotiSppwKvqmeqah2wCjgzyRuAK4DTgD8BXsnsXerne++mqpqsqsmJiYkBxZYkHdJVKFX1OHAXcF5V7alZvwW+gneol6Rl1ctVKBNJju8eHw2cCzycZGW3LcCFwI5hBpUkPV8vV6GsBDYnWcFs4W+pqtuSfCfJBBBgG/BXQ8wpSTpAL1ehPACcMc/2s4eSSJLUE1diSlKjLHBJapQFLkmNssAlqVHeE3MZbPnMvkX3+eAV/qeQdGgcgUtSoyxwSWqUBS5JjbLAJalRFrgkNcoCl6RGWeCS1CgLXJIaZYFLUqNc/qfxceVxPezzxPBzSI1wBC5JjerllmpHJflekh8keTDJp7vtpya5N8kjSb6e5KXDjytJ2q+XEfhvgbOr6o3AOuC8JG8BPgtcVVWvBX4DXDK8mJKkAy1a4N2d55/unh7Z/RRwNnBjt30zszc2liQtk57mwJOsSLIN2AvcAfwYeLyq9n9P6i7g5AXeuyHJVJKpmZmZQWSWJNFjgVfVM1W1DlgFnAmc1usHVNWmqpqsqsmJiYk+Y0qSDnRIV6FU1ePAXcBbgeOT7L8McRWwe8DZJEkH0ctVKBNJju8eHw2cC0wzW+Qf6HZbD9w6rJCSpBfqZSHPSmBzkhXMFv6WqrotyUPA15L8PXA/cM0Qc0qSDrBogVfVA8AZ82x/lNn5cEnzWLt57dCOvX399qEdW+1wJaYkNcoCl6RGWeCS1CgLXJIaZYFLUqMscElqlAUuSY2ywCWpURa4JDXKe2JKL2KrL/9mX+/bufH8ASfRMDgCl6RGWeCS1CgLXJIaZYFLUqM8ianD2jC/8vVw5EnT5eUIXJIa1cst1U5JcleSh5I8mOTSbvuVSXYn2db9vGf4cSVJ+/UyhbIP+HhV3ZfkWGBrkju6166qqs8NL54kaSG93FJtD7Cne/xUkmng5GEHkyQd3CHNgSdZzez9Me/tNn00yQNJrk1ywoCzSZIOoucCT/Jy4Cbgsqp6EvgS8BpgHbMj9M8v8L4NSaaSTM3MzAwgsiQJeizwJEcyW97XV9XNAFX1WFU9U1XPAl9mgTvUV9WmqpqsqsmJiYlB5Zakw14vV6EEuAaYrqovzNm+cs5u7wd2DD6eJGkhvVyFchbwIWB7km3dtk8CFydZBxSwE/jwUBJKkubVy1Uo9wCZ56XbBx9HktQrl9Jraa48btQJpMOWS+klqVEWuCQ1ygKXpEZZ4JLUKE9iNmb6tDWL7rPm4ellSCJp1ByBS1KjLHBJapQFLkmNssAlqVGexNTCXGUpjTVH4JLUKAtckhplgUtSoyxwSWqUBS5JjbLAJalRvdwT85QkdyV5KMmDSS7ttr8yyR1JftT9esLw40qS9utlBL4P+HhVnQ68BfhIktOBy4E7q+p1wJ3dc0nSMlm0wKtqT1Xd1z1+CpgGTgbeB2zudtsMXDiskJKkFzqkOfAkq4EzgHuBk6pqT/fSL4GTFnjPhiRTSaZmZmaWEFWSNFfPBZ7k5cBNwGVV9eTc16qqgJrvfVW1qaomq2pyYmJiSWElSc/pqcCTHMlseV9fVTd3mx9LsrJ7fSWwdzgRJUnz6eUqlADXANNV9YU5L30DWN89Xg/cOvh4kqSF9PJthGcBHwK2J9nWbfsksBHYkuQS4KfAB4cTUZI0n0ULvKruAbLAy+cMNo4kqVeuxJSkRlngktQoC1ySGmWBS1KjvCem1KC1m9f2tN+xaw792E9Nbzz0Ny3R6su/2df7dm48f8BJ2uIIXJIaZYFLUqMscElqlAUuSY2ywCWpURa4JDXKApekRlngktQoC1ySGuVKTEkv0O/KSC0vR+CS1Khebql2bZK9SXbM2XZlkt1JtnU/7xluTEnSgXoZgV8HnDfP9quqal33c/tgY0mSFrNogVfV3cCvlyGLJOkQLGUO/KNJHuimWE5YaKckG5JMJZmamZlZwsdJkubqt8C/BLwGWAfsAT6/0I5VtamqJqtqcmJios+PkyQdqK8Cr6rHquqZqnoW+DJw5mBjSZIW01eBJ1k55+n7gR0L7StJGo5FF/IkuQF4B3Bikl3Ap4B3JFkHFLAT+PAQM0qS5rFogVfVxfNsvmYIWbSMpk/r5WaJr2LNRb8YehZJ/XElpiQ1ygKXpEZZ4JLUKAtckhrl18mOmbWb1x709S1LOMb29dv7SCRpXDkCl6RGWeCS1CgLXJIaZYFLUqMscElqlAUuSY2ywCWpURa4JDXKApekRrkSUy9OVx7Xwz5PDD9Hg45dc/nQjv3U9MahHftw5Ahckhq1aIF3d53fm2THnG2vTHJHkh91vy54V3pJ0nD0MgK/DjjvgG2XA3dW1euAO7vnkqRltGiBV9XdwK8P2Pw+YHP3eDNw4YBzSZIW0e8c+ElVtad7/EvgpIV2TLIhyVSSqZmZmT4/TpJ0oCWfxKyqYvbu9Au9vqmqJqtqcmJiYqkfJ0nq9FvgjyVZCdD9undwkSRJvei3wL8BrO8erwduHUwcSVKvermM8AbgP4HXJ9mV5BJgI3Bukh8B7+yeS5KW0aIrMavq4gVeOmfAWTQgWz6zb97t059Zs8xJhqCXFZbSYcKVmJLUKAtckhplgUtSoyxwSWrUi+7rZKdPW/xE3ZqHpwf2eQudMNTgrD311cM58Oa1wzmutEwcgUtSoyxwSWqUBS5JjbLAJalRL7qTmINysJOhW5YxhyQtxBG4JDXKApekRlngktQoC1ySGuVJzDHhik7p0K2+/Jt9vW/nxvOX9fOW8pkH4whckhq1pBF4kp3AU8AzwL6qmhxEKEnS4gYxhfJnVfWrARxHknQInEKRpEYttcAL+HaSrUk2DCKQJKk3S51CeVtV7U7yR8AdSR6uqrvn7tAV+waAV796SN/rLKkJx665fKjHf2p6Y0/7LeVqknGypBF4Ve3uft0L3AKcOc8+m6pqsqomJyYmlvJxkqQ5+i7wJMckOXb/Y+BdwI5BBZMkHdxSplBOAm5Jsv84X62qbw0klSRpUX0XeFU9CrxxgFkkSYfAywglqVEWuCQ1ygKXpEZZ4JLUKAtckhrl94FrINae6ipbabk5ApekRlngktQoC1ySGmWBS1KjLHBJapQFLkmNssAlqVEWuCQ1ygKXpEYdlisxp09bM+oIkrRkjsAlqVFLKvAk5yX5YZJHkgz3dtOSpOdZyk2NVwD/BLwbOB24OMnpgwomSTq4pYzAzwQeqapHq+p3wNeA9w0mliRpMUs5iXky8PM5z3cBbz5wpyQbgA3d06eT/LDH458I/GoJ+Ybp8Mn26V533NHLTofP79tgma1nF8x9MlbZ8tnnPT3UbH8838ahX4VSVZuATYf6viRTVTU5hEhLZrb+mK0/ZuvP4ZBtKVMou4FT5jxf1W2TJC2DpRT494HXJTk1yUuBi4BvDCaWJGkxfU+hVNW+JB8F/h1YAVxbVQ8OLFkf0y7LyGz9MVt/zNafF322VNUgjiNJWmauxJSkRlngktSosSzwcV2in+TaJHuT9HTR83JKckqSu5I8lOTBJJeOOtN+SY5K8r0kP+iy9Xx1+XJJsiLJ/UluG3WWuZLsTLI9ybYkU6POM1eS45PcmOThJNNJ3jrqTABJXt/9fu3/eTLJZaPOtV+Sv+n+HuxIckOSo/o+1rjNgXdL9P8bOJfZxUHfBy6uqodGGgxI8nbgaeCfq+oNo84zV5KVwMqqui/JscBW4MIx+X0LcExVPZ3kSOAe4NKq+q8RR/u9JB8DJoFXVNUFi+2/XJLsBCaramwWpOyXZDPwH1V1dXcl2suq6vFR55qr65PdwJur6qdjkOdkZv/8n15V/5tkC3B7VV3Xz/HGcQQ+tkv0q+pu4NejzjGfqtpTVfd1j58CppldLTtyNevp7umR3c/YjBySrALOB64edZZWJDkOeDtwDUBV/W7cyrtzDvDjcSjvOY4Ajk5yBPAy4Bf9HmgcC3y+JfpjUUStSLIaOAO4d7RJntNNUWwD9gJ3VNXYZAO+CHwCeHbUQeZRwLeTbO2+lmJcnArMAF/ppp6uTnLMqEPN4yLghlGH2K+qdgOfA34G7AGeqKpv93u8cSxwLUGSlwM3AZdV1ZOjzrNfVT1TVeuYXbF7ZpKxmIJKcgGwt6q2jjrLAt5WVW9i9ls/P9JN442DI4A3AV+qqjOA/wHG5nwVQDet817gX0edZb8kJzA7o3Aq8CrgmCR/0e/xxrHAXaLfp25++Sbg+qq6edR55tP9M/su4LxRZ+mcBby3m2v+GnB2kn8ZbaTndCM2qmovcAuzU4zjYBewa86/pG5kttDHybuB+6rqsVEHmeOdwE+qaqaq/g+4GfjTfg82jgXuEv0+dCcKrwGmq+oLo84zV5KJJMd3j49m9gT1w6NNNauqrqiqVVW1mtk/a9+pqr5HRIOU5JjuhDTd9MS76PFrH4etqn4J/DzJ67tN5wAjP2F+gIsZo+mTzs+AtyR5Wfd39hxmz1f1ZezuibkMS/T7luQG4B3AiUl2AZ+qqmtGm+r3zgI+BGzv5poBPllVt48w034rgc3dFQEvAbZU1VhdrjemTgJumf17zhHAV6vqW6ON9Dx/DVzfDbQeBf5yxHl+r/sf3rnAh0edZa6qujfJjcB9wD7gfpawrH7sLiOUJPVmHKdQJEk9sMAlqVEWuCQ1ygKXpEZZ4JLUKAtckhplgUtSo/4f/wVTKgh22i0AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist(data[0])\n", + "plt.hist(data[1]) \n", + "plt.hist(data[2])\n", + "plt.hist(data[3])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can use alpha to control the opacity of plots. alpha of 1 is opague. alpha of 0 is transparent." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([41., 8., 1., 7., 8., 33., 6., 23., 9., 14.]),\n", + " array([0.1 , 0.34, 0.58, 0.82, 1.06, 1.3 , 1.54, 1.78, 2.02, 2.26, 2.5 ]),\n", + " )" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXAAAAD4CAYAAAD1jb0+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAQ8UlEQVR4nO3df4xldX3G8ffjgoCIgDLFLQtdokYDGhczRSnGWBCDYhSNacTWkIZkbSIEqqlF/4FN2gQTFRtoTFZAtimglB9ikFoJYigJrs7Cyq/Fioi468KOVX41DRb49I85K8Mys3P3zr1z73f3/Upu5p5zzz33ybD78N1zzveeVBWSpPa8bNQBJEn9scAlqVEWuCQ1ygKXpEZZ4JLUqL2W8sMOOeSQWrly5VJ+pCQ1b8OGDb+pqokd1y9pga9cuZKpqaml/EhJal6SX8613kMoktQoC1ySGmWBS1KjLHBJapQFLkmNssAlqVEWuCQ1ygKXpEZZ4JLUqCWdibkY0xdd3NN2E2edOeQkkjQeHIFLUqN6LvAky5LcleTGbvnIJOuTPJjkm0lePryYkqQd7coI/Gxg06zlLwAXVtXrgd8BZwwymCRp53oq8CQrgFOAS7rlACcA13SbrANOHUZASdLceh2BfwX4LPB8t/wa4PGqerZb3gwcNtcbk6xOMpVkanp6elFhJUkvWLDAk3wA2FZVG/r5gKpaW1WTVTU5MfGS7yOXJPWpl8sIjwc+mOT9wL7Aq4B/Ag5Kslc3Cl8BbBleTEnSjhYcgVfV56pqRVWtBD4GfL+q/hK4Ffhot9npwA1DSylJeonFXAf+98CnkzzIzDHxSwcTSZLUi12aiVlVPwB+0D1/CDh28JEkSb1wJqYkNcoCl6RGWeCS1CgLXJIa1czXyapx5x/YwzZPDD+HtBtxBC5JjbLAJalRFrgkNcoCl6RGeRKzD2vuWDO0fZ933HlD27ek3YsjcElqlAUuSY2ywCWpURa4JDXKApekRlngktSoXm5qvG+SHyX5SZL7kqzp1l+e5BdJNnaPVcOPK0narpfrwJ8BTqiqp5PsDdye5N+71/6uqq4ZXjxJ0nwWLPCqKuDpbnHv7lHDDCVJWlhPx8CTLEuyEdgG3FxV67uX/jHJ3UkuTLLPPO9dnWQqydT09PSAYkuSeirwqnquqlYBK4Bjk7wZ+BzwJuBPgVczc5f6ud67tqomq2pyYmJiQLElSbt0FUpVPQ7cCpxcVVtrxjPA1/EO9ZK0pHq5CmUiyUHd8/2Ak4AHkizv1gU4Fbh3mEElSS/Wy1Uoy4F1SZYxU/hXV9WNSb6fZAIIsBH4myHmlCTtoJerUO4Gjplj/QlDSSRJ6okzMSWpURa4JDXKApekRlngktQo74m5BI7+1j0LbnPfqW9ZgiSSdieOwCWpURa4JDXKApekRlngktQoC1ySGmWBS1KjLHBJapQFLkmNssAlqVHOxNT4OP/AHrZ5Yvg5pEY4ApekRvVyS7V9k/woyU+S3JdkTbf+yCTrkzyY5JtJXj78uJKk7XoZgT8DnFBVbwVWAScneQfwBeDCqno98DvgjOHFlCTtaMEC7+48/3S3uHf3KOAE4Jpu/TpmbmwsSVoiPR0DT7IsyUZgG3Az8HPg8ap6tttkM3DYPO9dnWQqydT09PQgMkuS6LHAq+q5qloFrACOBd7U6wdU1dqqmqyqyYmJiT5jSpJ2tEtXoVTV48CtwHHAQUm2X4a4Atgy4GySpJ3o5SqUiSQHdc/3A04CNjFT5B/tNjsduGFYISVJL9XLRJ7lwLoky5gp/Kur6sYk9wPfSPIPwF3ApUPMKUnawYIFXlV3A8fMsf4hZo6HS5rDmjvWDG3f5x133tD2rXY4E1OSGmWBS1KjLHBJapQFLkmNssAlqVEWuCQ1ygKXpEZZ4JLUKAtckhrlPTGl3djKc7/T1/sevuCUASfRMDgCl6RGWeCS1CgLXJIaZYFLUqM8iak92jC/8nVP5EnTpeUIXJIa1cst1Q5PcmuS+5Pcl+Tsbv35SbYk2dg93j/8uJKk7Xo5hPIs8JmqujPJAcCGJDd3r11YVV8cXjxJ0nx6uaXaVmBr9/ypJJuAw4YdTJK0c7t0DDzJSmbuj7m+W3VmkruTXJbk4AFnkyTtRM8FnuSVwLXAOVX1JPBV4HXAKmZG6F+a532rk0wlmZqenh5AZEkS9FjgSfZmpryvqKrrAKrqsap6rqqeB77GPHeor6q1VTVZVZMTExODyi1Je7xerkIJcCmwqaq+PGv98lmbfRi4d/DxJEnz6eUqlOOBTwD3JNnYrfs8cFqSVUABDwOfHEpCSdKcerkK5XYgc7x00+DjSJJ65VR6Lc75B446gbTHciq9JDXKApekRlngktQoC1ySGuVJzMZMX3TxgttMnHXmEiRRP65c/0hf7/v4248YcBLtDhyBS1KjLHBJapQFLkmNssAlqVGexNT8nGUpjTVH4JLUKAtckhplgUtSoyxwSWqUBS5JjbLAJalRvdwT8/Aktya5P8l9Sc7u1r86yc1Jftb9PHj4cSVJ2/UyAn8W+ExVHQW8A/hUkqOAc4FbquoNwC3dsiRpiSxY4FW1taru7J4/BWwCDgM+BKzrNlsHnDqskJKkl9qlY+BJVgLHAOuBQ6tqa/fSo8Ch87xndZKpJFPT09OLiCpJmq3nAk/ySuBa4JyqenL2a1VVQM31vqpaW1WTVTU5MTGxqLCSpBf0VOBJ9mamvK+oquu61Y8lWd69vhzYNpyIkqS59HIVSoBLgU1V9eVZL30bOL17fjpww+DjSZLm08u3ER4PfAK4J8nGbt3ngQuAq5OcAfwS+IvhRJQkzWXBAq+q24HM8/KJg40jSeqVMzElqVEWuCQ1ygKXpEZZ4JLUKO+JKTVozR1retpun9c+ssv7fubRj+zyexZr5bnf6et9D19wyoCTtMURuCQ1ygKXpEZZ4JLUKAtckhplgUtSoyxwSWqUBS5JjbLAJalRFrgkNcqZmJJeot+ZkVpajsAlqVG93FLtsiTbktw7a935SbYk2dg93j/cmJKkHfUyAr8cOHmO9RdW1arucdNgY0mSFrJggVfVbcBvlyCLJGkXLOYY+JlJ7u4OsRw830ZJVieZSjI1PT29iI+TJM3Wb4F/FXgdsArYCnxpvg2ram1VTVbV5MTERJ8fJ0naUV8FXlWPVdVzVfU88DXg2MHGkiQtpK8CT7J81uKHgXvn21aSNBwLTuRJchXwbuCQJJuB84B3J1kFFPAw8MkhZpQkzWHBAq+q0+ZYfekQsmgJTV908cIb3XMAE295avhhtKAr1+/6vS21+3MmpiQ1ygKXpEZZ4JLUKAtckhrl18mOmTV3rNnp60dvvmfBfdx3x3/Puf68487rK5Ok8eQIXJIaZYFLUqMscElqlAUuSY2ywCWpURa4JDXKApekRlngktQoC1ySGuVMTO2ezj+wh22eGH6OBu3z2uuGtu9nHv3I0Pa9J3IELkmNWrDAu7vOb0ty76x1r05yc5KfdT/nvSu9JGk4ehmBXw6cvMO6c4FbquoNwC3dsiRpCS1Y4FV1G/DbHVZ/CFjXPV8HnDrgXJKkBfR7DPzQqtraPX8UOHS+DZOsTjKVZGp6errPj5Mk7WjRJzGrqpi5O/18r6+tqsmqmpyYmFjsx0mSOv0W+GNJlgN0P7cNLpIkqRf9Fvi3gdO756cDNwwmjiSpV71cRngVcAfwxiSbk5wBXACclORnwHu6ZUnSElpwJmZVnTbPSycOOIsG5OhvzX3fzOmpi5c4yRD0MsNyCVy5/pFRR5CciSlJrbLAJalRFrgkNcoCl6RG7XZfJzt90cIn6ibOOnNgnzffCUMNzprXDOm70u5YM5z9SkvEEbgkNcoCl6RGWeCS1CgLXJIatdudxByUnZ0MPXrzbnDi8gd++4HUOkfgktQoC1ySGmWBS1KjLHBJapQnMceEMzqlXbfy3O/09b6HLzhlST9vMZ+5M47AJalRixqBJ3kYeAp4Dni2qiYHEUqStLBBHEL586r6zQD2I0naBR5CkaRGLbbAC/hekg1JVg8ikCSpN4s9hPLOqtqS5I+Am5M8UFW3zd6gK/bVAEccccQiP05Sy/Z57XVD3f8zj36kp+0WczXJOFnUCLyqtnQ/twHXA8fOsc3aqpqsqsmJiYnFfJwkaZa+CzzJ/kkO2P4ceC9w76CCSZJ2bjGHUA4Frk+yfT9XVtV3B5JKkrSgvgu8qh4C3jrALJKkXeBU+t3NrzfO/9qDP1y6HJKGzuvAJalRFrgkNcoCl6RGWeCS1ChPYmog1rzm4FFHkPY4jsAlqVEWuCQ1ygKXpEZZ4JLUKE9ias819fW+3/rxZS9evvK5ExcZRtp1jsAlqVEWuCQ1ygKXpEZZ4JLUqD3yJOb0RRePOoIkLZojcElq1KIKPMnJSX6a5MEk5w4qlCRpYYu5qfEy4J+B9wFHAaclOWpQwSRJO7eYEfixwINV9VBV/R74BvChwcSSJC1kMScxDwN+NWt5M/D2HTdKshpY3S0+neSnPe7/EOA3i8g3THtOtut63fDJXjbajX9vlw0syBx249/boL3ov8NYZcsXXrS4q9n+ZK6VQ78KparWAmt39X1JpqpqcgiRFs1s/TFbf8zWnz0h22IOoWwBDp+1vKJbJ0laAosp8B8Db0hyZJKXAx8Dvj2YWJKkhfR9CKWqnk1yJvAfwDLgsqq6b2DJ+jjssoTM1h+z9cds/dnts6WqBrEfSdIScyamJDXKApekRo1lgY/rFP0klyXZluTeUWfZUZLDk9ya5P4k9yU5e9SZtkuyb5IfJflJl23NqDPtKMmyJHcluXHUWWZL8nCSe5JsTDI16jyzJTkoyTVJHkiyKclxo84EkOSN3e9r++PJJOeMOtd2Sf62+3twb5Krkuzb977G7Rh4N0X/v4CTmJkc9GPgtKq6f6TBgCTvAp4G/qWq3jzqPLMlWQ4sr6o7kxwAbABOHZPfW4D9q+rpJHsDtwNnV9UPRxztD5J8GpgEXlVVHxh1nu2SPAxMVtXYTEjZLsk64D+r6pLuSrRXVNXjo841W9cnW4C3V9UvxyDPYcz8+T+qqv43ydXATVV1eT/7G8cR+NhO0a+q24DfjjrHXKpqa1Xd2T1/CtjEzGzZkasZT3eLe3ePsRk5JFkBnAJcMuosrUhyIPAu4FKAqvr9uJV350Tg5+NQ3rPsBeyXZC/gFcCv+93ROBb4XFP0x6KIWpFkJXAMsH60SV7QHaLYCGwDbq6qsckGfAX4LPD8qIPMoYDvJdnQfS3FuDgSmAa+3h16uiTJ/qMONYePAVeNOsR2VbUF+CLwCLAVeKKqvtfv/saxwLUISV4JXAucU1U9fUHJUqiq56pqFTMzdo9NMhaHoJJ8ANhWVRtGnWUe76yqtzHzrZ+f6g7jjYO9gLcBX62qY4D/AcbmfBVAd1jng8C/jTrLdkkOZuaIwpHAHwP7J/mrfvc3jgXuFP0+dceXrwWuqKqev4ZqKXX/zL4VOHnUWTrHAx/sjjV/Azghyb+ONtILuhEbVbUNuJ6ZQ4zjYDOweda/pK5hptDHyfuAO6vqsVEHmeU9wC+qarqq/o+Zr4v7s353No4F7hT9PnQnCi8FNlXVl0edZ7YkE0kO6p7vx8wJ6gdGm2pGVX2uqlZU1Upm/qx9v6r6HhENUpL9uxPSdIcn3guMxRVQVfUo8Kskb+xWnQiM/IT5Dk5jjA6fdB4B3pHkFd3f2ROZOV/Vl7G7J+YSTNHvW5KrgHcDhyTZDJxXVZeONtUfHA98ArinO9YM8PmqummEmbZbDqzrrgh4GXB1VY3V5Xpj6lDg+pm/5+wFXFlV3x1tpBc5C7iiG2g9BPz1iPP8Qfc/vJOAT446y2xVtT7JNcCdwLPAXSxiWv3YXUYoSerNOB5CkST1wAKXpEZZ4JLUKAtckhplgUtSoyxwSWqUBS5Jjfp/TKJuJuN9q/cAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.hist(data[0])\n", + "plt.hist(data[1], alpha=1) \n", + "plt.hist(data[2], alpha=0.6)\n", + "plt.hist(data[3], alpha=0.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can also plot the 4 columns on separate subplots to make it more readable. This is very readable, but beware that each plot automatically scales its axes to the data." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeYAAAD4CAYAAAA5OEWQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAXRUlEQVR4nO3df6xcZ33n8fenjvlRYAlgK/UmMTcrIqp0VZLUCslmhdiErAJBCVIjNqhNkyrIqy6UZItUXP4oatU/HGlFf1GBrIRi2jQhdUJxA7S10lQUaUmxQyA/DJuQDcWsg82PJKRFUNPv/jHHye3lXt+5c2fmPPfO+yVdeeacc2c+c3zPfOc855nnSVUhSZLa8BN9B5AkSc+xMEuS1BALsyRJDbEwS5LUEAuzJEkNOWmaT7Zp06aam5ub5lNKa9KBAwe+VVWb+86xFI9laTijHMtTLcxzc3Ps379/mk8prUlJvtZ3hhPxWJaGM8qxbFO2JEkNsTBLktQQC7MkSQ2Z6jXmWTe345Nje6zHd142tseS1B/fF7SQZ8ySJDXEwixJUkMszJIkNcRrzDPO61uS1BbPmCVJaoiFWZKkhliYJUlqiIVZkqSGWJglSWqIhVmSpIZYmCVJaoiFWZKkhliYJUlqyLod+csRrSRJa5FnzJIkNcTCLElSQ9ZtU7akH5fkBcBngOczOP73VNX7kpwB3Aa8AjgAXF1VP+wvqUYxrkt4Xr7rl2fM0mz5AXBRVb0GOBu4NMn5wI3A71bVq4DvAtf1mFGaaRZmaYbUwDPd3Y3dTwEXAXu65buBt/QQTxIWZmnmJNmQ5H7gCLAP+CrwZFUd6zY5BJy6yO9tT7I/yf6jR49OL7A0Y5YtzElOT3JPkoeTPJTk+m75y5PsS/JI9+/LJh9X0mpV1Y+q6mzgNOA84KeH/L1dVbWtqrZt3rx5ohmlWTbMGfMx4N1VdRZwPvCOJGcBO4C7q+pM4O7uvqQ1oqqeBO4BLgBOTnK8M+hpwDd6CybNuGULc1Udrqr7utvfAw4yaOa6gsG1KPCalLQmJNmc5OTu9guBSxgc0/cAV3abXQN8op+Eklb0dakkc8A5wL3AKVV1uFv1BHDKEr+zHdgOsHXr1lFzShqPLcDuJBsYfDC/varuSvIwcFuS3wG+ANzcZ0hplg1dmJO8GLgDuKGqnk7y7LqqqiS12O9V1S5gF8C2bdsW3UbSdFTVlxh8uF64/DEG15sl9WyoXtlJNjIoyrdU1Z3d4m8m2dKt38Kgh6ckSVqFYXplh0Gz1sGqev+8VXsZXIsCr0lJkjQWwzRlXwhcDTzQffcR4L3ATuD2JNcBXwPeOpmIkiTNjmULc1V9FsgSqy8ebxxJkmabI39JktQQZ5fS2DizjSStnmfMkiQ1xMIsSVJDLMySJDXEa8ySNIJx9amQFvKMWZKkhliYJUlqiE3Za5TNaJK0PnnGLElSQzxjljQzbGnSWuAZszQjkpye5J4kDyd5KMn13fKXJ9mX5JHu35f1nVWaZRZmaXYcA95dVWcB5wPvSHIWsAO4u6rOBO7u7kvqiYVZmhFVdbiq7utufw84CJwKXAHs7jbbDbyln4SSwMIszaQkc8A5wL3AKVV1uFv1BHBKT7EkYeevodhhROtJkhcDdwA3VNXTyXPTrVdVJaklfm87sB1g69at04gqzSTPmKUZkmQjg6J8S1Xd2S3+ZpIt3fotwJHFfreqdlXVtqratnnz5ukElmaQhVmaERmcGt8MHKyq989btRe4prt9DfCJaWeT9BybsqXZcSFwNfBAkvu7Ze8FdgK3J7kO+Brw1p7yScLCLM2MqvoskCVWXzzNLJKWZlO2JEkNsTBLktQQC7MkSQ2xMEuS1BALsyRJDbEwS5LUkGULc5IPJzmS5MF5y5wmTpKkCRjmjPkjwKULljlNnCRJE7BsYa6qzwDfWbDYaeIkSZqAUa8xDz1NXJLtSfYn2X/06NERn06SpNmw6s5fVVXAotPEdeudkUaSpCGNWpiHmiZOkiStzKiF2WniJEmagGVnl0pyK/B6YFOSQ8D7mOA0cXM7Pjmuh5Ikac1ZtjBX1duWWOU0cZIkjZkjf0mS1BALszRDHMlPat+yTdmS1pWPAB8APjpv2fGR/HYm2dHdf08P2bQOjavf0OM7LxvL46wFnjFLM8SR/KT2WZglDTWSn6P4SdNhYZb0rBON5OcoftJ0WJglOZKf1BA7f0k6PpLfThzJTzjQU98szGrOON8UxtWTs8VMo5j2SH6SVs7CLM0QR/KT2uc1ZkmSGmJhliSpITZla12zE8v64P+jZolnzJIkNcTCLElSQyzMkiQ1xMIsSVJDLMySJDXEwixJUkP8upSkifFrTtLKecYsSVJDLMySJDXEwixJUkMszJIkNcTOX5Kk5rXYkXBSc6t7xixJUkNWVZiTXJrkK0keTbJjXKEkTZ/Hs9SGkQtzkg3AHwFvBM4C3pbkrHEFkzQ9Hs9SO1Zzxnwe8GhVPVZVPwRuA64YTyxJU+bxLDViNZ2/TgW+Pu/+IeC1CzdKsh3Y3t19JslXVvGcfdgEfKvvEGPia2lAblx08cLX88qphHnOssfziMdyq/9P5lq5VrP1lmuJY3m+TYxwLE+8V3ZV7QJ2Tfp5JiXJ/qra1neOcfC1tGstvJ5RjuVWX5e5Vq7VbK3mgmezza3091bTlP0N4PR590/rlklaezyepUaspjB/HjgzyRlJngdcBewdTyxJU+bxLDVi5KbsqjqW5J3AXwMbgA9X1UNjS9aONdsMvwhfS7t6fT0TPJ5b/X8y18q1mq3VXDBitlTVuINIkqQROfKXJEkNsTBLktQQC/MJJHk8yQNJ7k+yv+88q5Hk5CR7knw5ycEkF/SdaRRJXt39fxz/eTrJDX3nGlWS/5nkoSQPJrk1yQv6zrQSSU5Pck+Sh7vXcf0i2yTJH3RDfX4pybkNZXt9kqfm/T395hRyvSDJPyT5YpfrtxbZ5vlJPtbts3uTzE061wqyXZvk6Lx99vZpZOuee0OSLyS5a5F1veyzIXKteH85u9Ty/ktVtfil+pX6feCvqurKrtftT/YdaBRV9RXgbHh2GMlvAB/vNdSIkpwKvAs4q6q+n+R2Br2hP9JrsJU5Bry7qu5L8hLgQJJ9VfXwvG3eCJzZ/bwW+CCLDEbUUzaAv6+qN08hz3E/AC6qqmeSbAQ+m+TTVfW5edtcB3y3ql6V5CrgRuC/NZIN4GNV9c4p5FnoeuAg8O8WWdfXPlsuF6xwf3nGPAOSvBR4HXAzQFX9sKqe7DfVWFwMfLWqvtZ3kFU4CXhhkpMYfFj6fz3nWZGqOlxV93W3v8fgzenUBZtdAXy0Bj4HnJxkSyPZpq7bD890dzd2Pwt74V4B7O5u7wEuTpJGsvUiyWnAZcBNS2zSyz4bIteKWZhPrIC/SXKgG45wrToDOAr8cdfcclOSF/UdagyuAm7tO8SoquobwP8C/hE4DDxVVX/Tb6rRdU2H5wD3Lli12HCfUy2QJ8gGcEHXdPvpJD8zpTwbktwPHAH2VdWS+6yqjgFPAa9oJBvAz3eXJfYkOX2R9ZPwe8CvA/+6xPq+9tlyuWCF+8vCfGL/uarOZdAU944kr+s70IhOAs4FPlhV5wD/BKzpaf265vjLgT/vO8uokryMwaf8M4B/D7woyS/2m2o0SV4M3AHcUFVP951nvmWy3Qe8sqpeA/wh8BfTyFRVP6qqsxmMsHZekv84jecdxhDZ/hKYq6qfBfbx3FnqxCR5M3Ckqg5M+rlWYshcK95fFuYT6M5oqKojDK5jntdvopEdAg7N++S7h0GhXsveCNxXVd/sO8gqvAH4v1V1tKr+BbgT+E89Z1qx7lrkHcAtVXXnIpv0Ntznctmq6unjTbdV9SlgY5JN08jWPeeTwD3ApQtWPbvPusscLwW+Pa1cJ8pWVd+uqh90d28Cfm4KcS4ELk/yOIOZzy5K8qcLtuljny2ba5T9ZWFeQpIXdR1G6Jp9/yvwYL+pRlNVTwBfT/LqbtHFwMIOMGvN21jDzdidfwTOT/KT3bWwixlcB10zutw3Awer6v1LbLYX+KWud/b5DJrsD7eQLclPHb8OmeQ8Bu+JE30zT7I5ycnd7RcClwBfXrDZXuCa7vaVwN/WFEaDGibbgv4BlzOFv9mq+o2qOq2bEOIqBvtjYevS1PfZMLlG2V/2yl7aKcDHu2P2JODPquqv+o20Kr8K3NI1AT8G/HLPeUbWfVC6BPjvfWdZjaq6N8keBs2px4Av0Pbwgou5ELgaeKC7LgnwXmArQFV9CPgU8CbgUeCfmd7f3jDZrgR+Jckx4PvAVVMogFuA3d23Cn4CuL2q7kry28D+qtrL4APFnyR5FPgOgzf9aRgm27uSXM7gb/Y7wLVTyvZjGtlny+Va8f5ySE5JkhpiU7YkSQ2xMEuS1BALsyRJDZlq569NmzbV3NzcNJ9SWpMOHDjwrara3HeOpXgsS8MZ5VieamGem5tj//41PReENBVJmh5m1GNZGs4ox7JN2ZIkNcTCLElSQyzM0oxZOHdskjO6+WsfzWA+2+f1nVGaZc2N/DW345NjeZzHd142lseR1qGFc8feCPxuVd2W5EMM5rX94DieyONZWjnPmKUZsnDu2G6c6IsYTGwCg5lv3tJPOklgYZZmzcK5Y18BPNnNXwsnmCs5yfYk+5PsP3r06OSTSjPKwizNiNXOaVtVu6pqW1Vt27y52a9YS2tec9eYJU3M8blj3wS8gME15t8HTk5yUnfWPLW5kiUtzjNmaUYsMXfsLwD3MJj+EAbz2X6ip4iSsDBLgvcAv9bNY/sKBvPaSuqJTdnSDKqqvwP+rrv9GHBen3kkPcczZkmSGmJhliSpIRZmSZIaYmGWJKkhFmZJkhpiYZYkqSHLFuYkL0jyD0m+mOShJL/VLXeqOEmSxmyYM+YfABdV1WuAs4FLk5zPc1PFvQr4LoOp4iRJ0iosW5hr4Jnu7sbup3CqOEmSxm6oa8xJNiS5HzgC7AO+ilPFSZI0dkMNyVlVPwLOTnIy8HHgp4d9gqraBewC2LZtW40SUpK0vLkdnxzL4zy+87KxPI5Gs6Je2VX1JIOZaC6gmyquW+VUcZIkjcGyZ8xJNgP/UlVPJnkhcAmDjl/Hp4q7DaeKE35al6RxGKYpewuwO8kGBmfYt1fVXUkeBm5L8jvAF3CqOEmSVm3ZwlxVXwLOWWS5U8VJkjRmjvwlSVJDLMySJDXEwixJUkMszJIkNcTCLElSQ4Ya+Uvr17i+eyxJGg/PmCVJaoiFWZKkhliYJUlqiIVZkqSGWJglSWqIhVmSpIZYmCVJaoiFWZKkhliYJUlqiIVZkqSGWJglSWrIsoU5yelJ7knycJKHklzfLX95kn1JHun+fdnk40qStL4NM4nFMeDdVXVfkpcAB5LsA64F7q6qnUl2ADuA90wuqiRprRnXRDmP77xsLI+zFix7xlxVh6vqvu7294CDwKnAFcDubrPdwFsmFVKSpFmxomvMSeaAc4B7gVOq6nC36gnglCV+Z3uS/Un2Hz16dBVRJUla/4YuzEleDNwB3FBVT89fV1UF1GK/V1W7qmpbVW3bvHnzqsJKkrTeDVWYk2xkUJRvqao7u8XfTLKlW78FODKZiJLGwY6c0towTK/sADcDB6vq/fNW7QWu6W5fA3xi/PEkjdHxjpxnAecD70hyFoOOm3dX1ZnA3d19ST0Z5oz5QuBq4KIk93c/bwJ2ApckeQR4Q3dfUqPsyCmtDct+XaqqPgtkidUXjzeONL6vV8BsfcViJUbtyAlsB9i6devkQ0ozypG/pBljR06pbRZmaYbYkVNqn4VZmhF25JTWhmGG5JS0PhzvyPlAkvu7Ze9l0HHz9iTXAV8D3tpTPklYmKWZYUdOaW2wKVuSpIZYmCVJaoiFWZKkhliYJUlqiIVZkqSGWJglSWqIhVmSpIas2+8xOxGCJGktWreFWZKkxYzrxG1SJ202ZUuS1BALsyRJDVm2MCf5cJIjSR6ct+zlSfYleaT792WTjSlJ0mwY5hrzR4APAB+dt2wHcHdV7Uyyo7v/nvHHk1an9WtJkrTQsmfMVfUZ4DsLFl8B7O5u7wbeMuZckiTNpFGvMZ9SVYe7208Apyy1YZLtSfYn2X/06NERn06SpNmw6s5fVVVAnWD9rqraVlXbNm/evNqnkyRpXRu1MH8zyRaA7t8j44skSdLsGrUw7wWu6W5fA3xiPHEkSZptw3xd6lbgfwOvTnIoyXXATuCSJI8Ab+juS5KkVVr261JV9bYlVl085ixagXGOBS5JrZul9zxH/pIkqSFOYiGpeet5oJhZOhPUcDxjliSpIRZmSZIaYlO2JI3AJmhNimfMkiQ1xDNmSdK/YWtAvyzM0hDG+UbVYs9gSe2wKVuSpIZYmCVJaoiFWZKkhliYJUlqiJ2/psiejpKk5XjGLElSQyzMkiQ1xMIsSVJDLMySJDVkVYU5yaVJvpLk0SQ7xhVK0vR5PEttGLlXdpINwB8BlwCHgM8n2VtVD48rnKTpmJXj2W9GaC1YzRnzecCjVfVYVf0QuA24YjyxJE2Zx7PUiNV8j/lU4Ovz7h8CXrtwoyTbge3d3WeSfGUVzzlpm4BvLVyYG3tIMn2LvvYZMPXXPeTf0ysnHGOhZY/nJY7lVv9uWs0F7WZrNRc0mi03DpVrxcfyxAcYqapdwK5JP884JNlfVdv6ztGHWX3ts/q6R7HYsdzq/ms1F7SbrdVc0G62SeVaTVP2N4DT590/rVsmae3xeJYasZrC/HngzCRnJHkecBWwdzyxJE2Zx7PUiJGbsqvqWJJ3An8NbAA+XFUPjS1ZP9ZEk/uEzOprn9XX/W+s4nhudf+1mgvazdZqLmg320Rypaom8biSJGkEjvwlSVJDLMySJDXEwgwkOT3JPUkeTvJQkuv7zjRNSTYk+UKSu/rOMk1JTk6yJ8mXkxxMckHfmVq13HCdSZ6f5GPd+nuTzDWS69okR5Pc3/28fUq5PpzkSJIHl1ifJH/Q5f5SknMbyfX6JE/N21+/OaVcy74H97jPhsk23v1WVTP/A2wBzu1uvwT4P8BZfeea4uv/NeDPgLv6zjLl170beHt3+3nAyX1navGHQWewrwL/odtPX1x4fAD/A/hQd/sq4GON5LoW+EAP++x1wLnAg0usfxPwaSDA+cC9jeR6fR/vA8O8B/e4z4bJNtb95hkzUFWHq+q+7vb3gIMMRkJa95KcBlwG3NR3lmlK8lIGb1I3A1TVD6vqyX5TNWuY4TqvYPBBB2APcHGSNJCrF1X1GeA7J9jkCuCjNfA54OQkWxrI1Ysh34P72mdTrw8W5gW6JrhzgHv7TTI1vwf8OvCvfQeZsjOAo8Afd834NyV5Ud+hGrXYcJ0L35ie3aaqjgFPAa9oIBfAz3dNn3uSnL7I+j4Mm70PFyT5YpJPJ/mZaT/5Cd6De99ny9SHse03C/M8SV4M3AHcUFVP951n0pK8GThSVQf6ztKDkxg06X2wqs4B/glwqsP15y+Buar6WWAfz53Va3H3Aa+sqtcAfwj8xTSfvOX34GWyjXW/WZg7STYy2Om3VNWdfeeZkguBy5M8zqAZ8KIkf9pvpKk5BByqquOffPcwKNT6ccMM1/nsNklOAl4KfLvvXFX17ar6QXf3JuDnJpxpWE0OgVpVT1fVM93tTwEbk2yaxnMP8R7c2z5bLtu495uFmUFvPwbXGg9W1fv7zjMtVfUbVXVaVc0x6LDzt1X1iz3HmoqqegL4epJXd4suBtbV3MNjNMxwnXuBa7rbVzL4W5r06EXL5lpwDfJyBtcHW7AX+KWup/H5wFNVdbjvUEl+6njfgCTnMagRk/6ANex7cC/7bJhs495vE59dao24ELgaeCDJ/d2y93affLR+/SpwS/em/hjwyz3naVItMVxnkt8G9lfVXgZvXH+S5FEGnYuuaiTXu5JcDhzrcl076VwASW5l0FN3U5JDwPuAjV3uDwGfYtDL+FHgn5nS394Qua4EfiXJMeD7wFVT+IAFS7wHA1vnZetlnw2Zbaz7zSE5JUlqiE3ZkiQ1xMIsSVJDLMySJDXEwixJUkMszJIkNcTCLElSQyzMkiQ15P8DXq+HYzIwlvsAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(2, 2, figsize=(8, 4))\n", + "ax[0, 0].hist(data[0])\n", + "ax[0, 1].hist(data[1])\n", + "ax[1, 0].hist(data[2])\n", + "ax[1, 1].hist(data[3])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Adding titles to the previous plot makes it more readable." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeYAAAF1CAYAAAA9YUkiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3dfbRlVXnn++9PQFQwAlIhvB+ixAS9CqYasclVGjSNwgXToWloQ4MDUzE3ttAxQ0s63ZoM7RR3pH1rMzSlIMQgQiNGBEnkEoT2RjFVBBUobV5SBLCgSgWBmNYufO4fa5Uej+dwdp2zz97z1P5+xtjjrL1en72q5n72nGuuuVJVSJKkNjxl3AFIkqQfMzFLktQQE7MkSQ0xMUuS1BATsyRJDTExS5LUEBPzDi5JJXnuPOtclOSdo4ppxrE3JnnFOI4tLVeDlOvt2Ne1Sc6cY9lUf6ydRxGLOiZmjcw4fwBIml1VvaqqLh5k3SSfT/L6pY5p0pmYJUlqiIl5xJK8NckDSR5L8o0kxyV5SpLVSe5O8u0klyfZq19/W1PSqiTfTLIpye9N29+RSb6Y5JF+2QeSPHWRMZ6Y5NZ+n3+T5IXTlm1M8ntJvprku0kuS/K0acvf0sfxzSSv39bMlWQV8FrgLUkeT/KZaYc8fK79SctBa+U6ySH9tk/p3384yeZpyz+W5Nx++ke14CQ7JfnjJN9Kcg9wwrRt3gX8n8AH+jL8gWmHfEWSO/tj/kmSLOxMCoCq8jWiF/A84D5gv/79FPAc4BzgS8ABwK7AnwKXTlungEuB3YD/A9gCvKJf/svAUcDO/bobgHOnHbOA584T10XAO/vpI4DNwEuAnYAzgY3Arv3yjcCXgf2AvfrjvaFfdjzwIPB84BnAn08//vTjTDv2nPvz5Ws5vBou1/8A/HI//Q3gHuCXpi07op/+PPD6fvoNwNeBA/vyeEN/rJ1nrjsjlquBPYCD+s9x/Lj/XZbzyxrzaD1BV0APS7JLVW2sqrvpCsN/rKr7q+r7wDuAU2Z0uPiDqvrHqvoa8FHgdICqWl9VX6qqrVW1ka7wv3wRMa4C/rSqbq6qJ6q79vR9ui+Jbd5fVd+squ8AnwEO7+efCny0qm6vqu/1n2MQc+1PWg5aLdc3Ai9P8nP9+yv694cAPwN8ZZZtTgXeW1X39eXxjwY81pqqeqSq/oEumVuGF8HEPEJVdRdwLl0B3ZzkE0n2Aw4GPtU3Az1C9+v4CWCfaZvfN236XroaJkl+IcnVSR5M8ijwX4C9FxHmwcCbt8XSx3PgtuP1Hpw2/T1g9356vxlxTp9+MnPtT2pew+X6RuAY4GXATXS13Zf3r/9RVT+cZZuZZfjeAY9lGR4iE/OIVdXHq+pX6AptAefTFYRXVdUe015Pq6oHpm164LTpg4Bv9tMfpGt6OrSqfgY4D1jM9Z37gHfNiOUZVXXpANtuomu2my1m6D6vtMNptFzfSHdN+Jh++gvA0XSJ+cY5ttk0S0zTWYZHwMQ8Qkmel+TYJLsC/wv4J+CHwIeAdyU5uF9vRZKTZ2z+n5I8I8nzgdcBl/Xznwk8Cjye5BeB315kmB8G3pDkJensluSEJM8cYNvLgdcl+aUkzwD+04zlDwE/v8j4pKa0Wq6r6s4+lt8AbqyqR+nK4K8zd2K+HHhTkgOS7AmsnrHcMjwCJubR2hVYA3yLrunnZ4G3Ae8DrgI+l+Qxug4jL5mx7Y3AXcD1wB9X1ef6+b8H/FvgMbqkehmLUFXrgN8EPgA83B/zrAG3vRZ4P901prv6zwHdNWqAC+iuwz2S5C8WE6fUkJbL9Y3At6vqvmnvA9wyx/ofBv6K7vrzLcCVM5a/j+46+cNJ3r/AmDSPVNky0bIkU8DfA7tU1dbxRrN9kvwScBtdj+5lFbu0lJZzudbSs8asoUrya0l27ZvBzgc+4xePJA3OxDwhktzeDwow8/XaIR/qt+jug76brgfqYq95S5rDCMu1RsimbEmSGmKNWZKkhpiYJUlqyJzP2FwKe++9d01NTY3ykNKytH79+m9V1YpxxzEXy7I0mIWU5ZEm5qmpKdatWzfKQ0rLUpJBh0IcC8uyNJiFlGWbsiVJaoiJWZKkhpiYJUlqiIlZkqSGmJglSWrISHtlC6ZWXzOU/Wxcc8JQ9iNpvIb1nQB+L+worDFLktQQE7MkSQ0xMUuS1BATsyRJDbHz14Sz44kktcUasyRJDZk3MSc5MMkNSe5IcnuSc/r570jyQJJb+9erlz5cSZJ2bIM0ZW8F3lxVtyR5JrA+yXX9svdU1R8vXXiSJE2WeRNzVW0CNvXTjyXZAOy/1IFJkjSJtusac5Ip4Ajg5n7WG5N8NcmFSfacY5tVSdYlWbdly5ZFBStJ0o5u4MScZHfgk8C5VfUo8EHgOcDhdDXq/zrbdlW1tqpWVtXKFStWDCFkSZJ2XAMl5iS70CXlS6rqSoCqeqiqnqiqHwIfBo5cujAlSZoMg/TKDnABsKGq3j1t/r7TVvs14LbhhydJ0mQZpFf20cAZwNeS3NrPOw84PcnhQAEbgd9akgglSZogg/TK/gKQWRZ9dvjhSJI02Rz5S5KkhpiYJUlqiIlZkqSGmJglSWqIiVmSpIbs0M9jHtazhn3OsCRpVKwxS5LUEBOzJEkNMTFLktQQE7MkSQ3ZoTt/SfpJSZ4G3ATsSlf+r6iqtyc5BPgE8GxgPXBGVf1gfJFqIezwumOwxixNlu8Dx1bVi+iepX58kqOA84H3VNVzgYeBs8cYozTRTMzSBKnO4/3bXfpXAccCV/TzLwZeM4bwJGFiliZOkp36R7huBq4D7gYeqaqt/Sr3A/vPst2qJOuSrNuyZcvoApYmjIlZmjBV9URVHQ4cABwJ/OKA262tqpVVtXLFihVLGqM0yUzM0oSqqkeAG4CXAnsk2dYZ9ADggbEFJk04E7M0QZKsSLJHP/104JXABroEfUq/2pnAp8cToaR5E3OSA5PckOSOJLcnOaefv1eS65Lc2f/dc+nDlbRI+wI3JPkq8LfAdVV1NfBW4HeT3EV3y9QFY4xRmmiD3Me8FXhzVd2S5JnA+iTXAWcB11fVmiSrgdV0hVtSo6rqq8ARs8y/h+56s6Qxm7fGXFWbquqWfvoxumav/YGT6W6rAG+vkCRpKLbrGnOSKbpf2zcD+1TVpn7Rg8A+c2zjLRaSJA1o4MScZHfgk8C5VfXo9GVVVXSDFPwUb7GQJGlwAyXmJLvQJeVLqurKfvZDSfbtl+9LN1iBJElahEF6ZYeuh+aGqnr3tEVX0d1WAd5eIUnSUAzSK/to4Azga/0wfgDnAWuAy5OcDdwLnLo0IUqSNDnmTcxV9QUgcyw+brjhSJI02Rz5S5KkhpiYJUlqiIlZkqSGmJglSWqIiVmSpIYMcruUNJCp1dcMZT8b15wwlP1I0nJkjVmSpIaYmCVJaoiJWZKkhpiYJUlqiJ2/JGkBhtXZUZrJGrMkSQ0xMUuS1BATsyRJDTExS5LUEDt/LVN2PJGkHZM1ZkmSGjJvjTnJhcCJwOaqekE/7x3AbwJb+tXOq6rPLlWQkjQstjapdYPUmC8Cjp9l/nuq6vD+ZVKWJGkI5k3MVXUT8J0RxCJJ0sRbzDXmNyb5apILk+w510pJViVZl2Tdli1b5lpNkiSx8MT8QeA5wOHAJuC/zrViVa2tqpVVtXLFihULPJykYUhyYJIbktyR5PYk5/Tz90pyXZI7+79z/tiWtLQWlJir6qGqeqKqfgh8GDhyuGFJWiJbgTdX1WHAUcDvJDkMWA1cX1WHAtf37yWNwYISc5J9p739NeC24YQjaSlV1aaquqWffgzYAOwPnAxc3K92MfCa8UQoaZDbpS4FjgH2TnI/8HbgmCSHAwVsBH5rCWOUtASSTAFHADcD+1TVpn7Rg8A+YwpLmnjzJuaqOn2W2RcsQSySRiTJ7sAngXOr6tEkP1pWVZWkZtlmFbAK4KCDDhpVqNLEcUjOATgggXYkSXahS8qXVNWV/eyHkuxbVZv6S1WbZ25XVWuBtQArV678qcQtaTgcklOaIOmqxhcAG6rq3dMWXQWc2U+fCXx61LFJ6lhjlibL0cAZwNeS3NrPOw9YA1ye5GzgXuDUMcUnTTwTszRBquoLQOZYfNwoY5E0O5uyJUlqiIlZkqSGmJglSWqIiVmSpIaYmCVJaoiJWZKkhpiYJUlqiIlZkqSGmJglSWqIiVmSpIaYmCVJaoiJWZKkhpiYJUlqyLyJOcmFSTYnuW3avL2SXJfkzv7vnksbpiRJk2GQGvNFwPEz5q0Grq+qQ4Hr+/eSJGmR5k3MVXUT8J0Zs08GLu6nLwZeM+S4JEmaSAu9xrxPVW3qpx8E9hlSPJIkTbRFd/6qqgJqruVJViVZl2Tdli1bFns4SZJ2aAtNzA8l2Reg/7t5rhWram1VrayqlStWrFjg4SRJmgwLTcxXAWf202cCnx5OOJIkTbZBbpe6FPgi8Lwk9yc5G1gDvDLJncAr+veSJGmRdp5vhao6fY5Fxw05FkmSJt68iXnUplZfM+4QJEkaG4fklCSpISZmSZIaYmKWJKkhJmZJkhpiYpYkqSHN9cqWtHSSXAicCGyuqhf08/YCLgOmgI3AqVX18Lhi1I5lWHfabFxzwlD2sxxYY5Ymy0X4GFepaSZmaYL4GFepfSZmST7GVWqIiVnSjzzZY1x9hKs0GiZmSQM9xtVHuEqjYWKW5GNcpYZ4u5Q0QfrHuB4D7J3kfuDtdI9tvbx/pOu9wKnji1At8GFC42ViliaIj3GV2mdTtiRJDbHGrOYMsxltWKMFtRiTpB3TohJzko3AY8ATwNaqWjmMoCRJmlTDqDH/i6r61hD2I0nSxPMasyRJDVlsYi7gc0nWJ1k1jIAkSZpki23K/pWqeiDJzwLXJfl6P0j+j/QJexXAQQcdtMjDSdvH+zF3DP47apIsqsZcVQ/0fzcDnwKOnGUdh/GTJGlAC07MSXZL8sxt08CvArcNKzBJkibRYpqy9wE+lWTbfj5eVX85lKgkSZpQC07MVXUP8KIhxiJJ0sTzdilJkhpiYpYkqSEmZkmSGmJiliSpISZmSZIaYmKWJKkhPo9Z0pJxKE1p+1ljliSpISZmSZIaYmKWJKkhJmZJkhpiYpYkqSEmZkmSGuLtUpKk5rV4693GNScsyX6tMUuS1BATsyRJDTExS5LUkEUl5iTHJ/lGkruSrB5WUJJGz/IstWHBiTnJTsCfAK8CDgNOT3LYsAKTNDqWZ6kdi6kxHwncVVX3VNUPgE8AJw8nLEkjZnmWGrGYxLw/cN+09/f38yQtP5ZnqRFLfh9zklXAqv7t40m+sdTHHLK9gW+NO4gh2ZE+CyzTz5PzZ50987McPJJgtsMCy3Kr/0atxgXtxmZcM8xRlqfbmwWU5cUk5geAA6e9P6Cf9xOqai2wdhHHGask66pq5bjjGIYd6bPAjvV5Gvgs85bnhZTlBj7XrFqNC9qNzbi2Xx/b1PZut5im7L8FDk1ySJKnAqcBVy1if5LGx/IsNWLBNeaq2prkjcBfATsBF1bV7UOLTNLIWJ6ldizqGnNVfRb47JBiadWybYafxY70WWDH+jxj/yxLVJ7H/rnm0Gpc0G5sxrX9FhRbqmrYgUiSpAVySE5JkhpiYn4SSTYm+VqSW5OsG3c8i5FkjyRXJPl6kg1JXjrumBYiyfP6f49tr0eTnDvuuBYqyX9IcnuS25JcmuRp445peyQ5MMkNSe7oP8c5s6yTJO/vh/r8apIXNxTbMUm+O+3/038eQVxPS/LlJF/p4/qDWdbZNcll/Tm7OcnUUse1HbGdlWTLtHP2+lHE1h97pyR/l+TqWZaN5ZwNENd2ny+fxzy/f1FVLd67t73eB/xlVZ3S97p9xrgDWoiq+gZwOPxoGMkHgE+NNagFSrI/8CbgsKr6pySX0/WGvmisgW2frcCbq+qWJM8E1ie5rqrumLbOq4BD+9dLgA/2f1uIDeB/VNWJI4hnm+8Dx1bV40l2Ab6Q5Nqq+tK0dc4GHq6q5yY5DTgf+DeNxAZwWVW9cQTxzHQOsAH4mVmWjeuczRcXbOf5ssY8AZI8C3gZcAFAVf2gqh4Zb1RDcRxwd1XdO+5AFmFn4OlJdqb7sfTNMcezXapqU1Xd0k8/RvflNHPEsJOBP6vOl4A9kuzbSGwj15+Hx/u3u/SvmZ19TgYu7qevAI5LkkZiG4skBwAnAB+ZY5WxnLMB4tpuJuYnV8DnkqzvRz1arg4BtgAf7ZtbPpJkt3EHNQSnAZeOO4iFqqoHgD8G/gHYBHy3qj433qgWrm86PAK4ecaisQ/3+SSxAby0b7q9NsnzRxTPTkluBTYD11XVnOesqrYC3wWe3UhsAL/eX5a4IsmBsyxfCu8F3gL8cI7l4zpn88UF23m+TMxP7leq6sV0TXG/k+Rl4w5ogXYGXgx8sKqOAP4RWNaP9eub408C/vu4Y1moJHvS/co/BNgP2C3Jb4w3qoVJsjvwSeDcqnp03PFMN09stwAHV9WLgP8G/MUoYqqqJ6rqcLoR1o5M8oJRHHcQA8T2GWCqql4IXMePa6lLJsmJwOaqWr/Ux9oeA8a13efLxPwk+hoNVbWZ7jrmkeONaMHuB+6f9sv3CrpEvZy9Crilqh4adyCL8Arg76tqS1X9b+BK4J+POabt1l+L/CRwSVVdOcsqAw3fO47YqurRbU23/X3cuyTZexSx9cd8BLgBOH7Goh+ds/4yx7OAb48qrieLraq+XVXf799+BPjlEYRzNHBSko10Tz47Nsmfz1hnHOds3rgWcr5MzHNIslvfYYS+2fdXgdvGG9XCVNWDwH1JntfPOg6Y2QFmuTmdZdyM3fsH4Kgkz+ivhR1Hdx102ejjvgDYUFXvnmO1q4B/1/fOPoquyX5TC7El+blt1yGTHEn3nbikX+ZJViTZo59+OvBK4OszVrsKOLOfPgX46xrBoBODxDajf8BJjOD/bFW9raoO6MedPo3ufMxsXRr5ORskroWcL3tlz20f4FN9md0Z+HhV/eV4Qxpckovoasm/38/698AlfRPwPcDrkhwD/HlVHTCG+N4BPHeWwjXItrvRfWH81rDjGqWqujnJFXTNqVuBv6PtUYxmczRwBvC1/rokwHnAQQBV9SG60cReDdwFfA94XUOxnQL8dpKtwD8Bp40gAe4LXNzfVfAU4PKqujrJHwLrquoquh8UH0tyF/Adui/92cr1giV5LXBmVf3qHLE9h+4a88zY3pTkpH7dR/np2v7IDHLOGohr2/na2sd11rzbO/LX8tA3lby+qv7fAde/iHkK8KgS82zHWUxilnYUS1GuhyXJ5+nK7UeSnEUX56+MI5ZJY1O2JEkNMTGPWLrRxN6WbjSih5N8NP1oT0lOTDcyzCNJ/ibJC/v5H6NrfvtMkseTvKWf/9+TPJhu5KKbFnurR5L9knwy3Sg1f5/kTdOWvSPJ5Un+LMlj6UYFWjlt+YvT3Yr1WB/XZUne2Tc7Xwvs18f+eJL9+s2eOtf+pOWkpXKd5MYkv95PH52kkpzQvz9uW7N+uhGpvjBtu1emGxnwu0k+AGy79v5LwIfobit7PMn0MRD2THJNX4ZvTvKchZ1BTWdiHo/XAv+S7hrOLwC/n+QI4EK666bPBv4UuCrJrlV1Bl1Hof+rqnavqv+n38+1dKMp/SzddcpLFhpQkqfQdev/Ct39gMcB5yb5l9NWO4mu5+EedB0tPtBv+1S6XusXAXvRdcr6NYCq+ke6HtTf7GPfvaq++WT7k5apVsr1jcAx/fTL6fqUvGza+xtnbpCuJ/qVwO8DewN3012jp6o2AG8AvtjHuce0TU8D/gDYk64Pwbu2M1bNwsQ8Hh+oqvuq6jt0/5FPB1YBf1pVN/f3EV5MNzzeUXPtpKourKrH+q747wBelG6Ur4X4Z8CKqvrD6kYGuwf4MD/ZgeILVfXZqnoC+Bjwon7+UXQd5N5fVf+7vy3lywMcc679SctRK+X6RroEDF1C/qNp72dNzHSd826vqiv6W/feCzw4wLE+VVVf7gf0uIR+uFwtjol5PKaPgnQv3eASBwNv7pu7Humbiw7sl/2UdKPzrElyd5JHgY39ooXeg3kwXXPz9OOfR9c7fZvpBfV7wNPS3S+4H/DAjN6s0z/jXOban7QctVKuvwj8QpJ96BLlnwEH9rXiI4GbZtlmv+nx92V5IWV49+2IU3PwS3A8pg+2cBDd+Mj3Ae+qqrmagmZ2n/+3dKNGvYKu8D4LeJj+utAC3Ec32MWhC9h2E7B/kkxLzgfSNYdBI2PtSkusiXJdVd9Lsp7uwQq3VdUPkvwN8Lt0Y8vP9lCeTdPjT5IZn8cyPELWmMfjd5IckGQv4D8Cl9E1G78hyUvS2S3JCekHOQEeAn5+2j6eSdck9m26hx/8l0XG9GXgsSRvTfL0/pf7C5L8swG2/SLwBPDGJDsnOZmfHCXtIeDZi2hml5aDlsr1jcAb+XGz9ednvJ/pGuD5Sf5V32r1JuDnpi1/CDig70+iJWZiHo+PA5+j65RxN/DOqloH/CZdB6iH6TpSnDVtmz+i60zySJLfo2ueupduGLo7gJmPZdsu/XXeE+mavv4e+Bbd8HHzJtOq+gHwr+geu/YI8BvA1XRfMFTV1+k6hN3Txz9rM560zLVUrm+kS/I3zfH+J/S16H8NrKH7UXAo8P9NW+WvgduBB5PsCI/BbZoDjIxYtnNAgeUqyc3Ah6rqo+OORVpqk1KuNRrWmDUUSV6ebtzhnZOcCbwQWDZDmEpSK0zMO7gk5+XHA3tMf1075EM9j+4e6EeANwOnjOJBBdIkGmG51hjYlC1JUkOsMUuS1BATsyRJDRnpACN77713TU1NjfKQ0rK0fv36b1XVinHHMRfLsjSYhZTlkSbmqakp1q1bN8pDSstSknvHHcOTsSxLg1lIWbYpW5KkhpiYJUlqiIlZkqSGmJglSWqIiVmSpIY09zzmqdXXDG1fG9ecMLR9STuKJDsB64AHqurEJIcAnwCeDawHzuifGLZowyrPlmVNEmvM0uQ5B9gw7f35wHuq6rl0jyY8eyxRSQJMzNJESXIAcALds7ZJEuBY4Ip+lYuB14wnOklgYpYmzXuBtwA/7N8/G3ikqrb27+8H9p9twySrkqxLsm7Lli1LH6k0oUzM0oRIciKwuarWL2T7qlpbVSurauWKFc2OFiote811/pK0ZI4GTkryauBpwM8A7wP2SLJzX2s+AHhgjDFKE88aszQhquptVXVAVU0BpwF/XVWvBW4ATulXOxP49JhClISJWRK8FfjdJHfRXXO+YMzxSBPNpmxpAlXV54HP99P3AEeOMx5JP2aNWZKkhpiYJUlqiIlZkqSGmJglSWqIiVmSpIaYmCVJaoiJWZKkhpiYJUlqiIlZkqSGmJglSWqIiVmSpIaYmCVJaoiJWZKkhpiYJUlqyLyJOcnTknw5yVeS3J7kD/r5hyS5OcldSS5L8tSlD1eSpB3bIDXm7wPHVtWLgMOB45McBZwPvKeqngs8DJy9dGFKkjQZ5k3M1Xm8f7tL/yrgWOCKfv7FwGuWJEJJkibIQNeYk+yU5FZgM3AdcDfwSFVt7Ve5H9h/jm1XJVmXZN2WLVuGEbMkSTusnQdZqaqeAA5PsgfwKeAXBz1AVa0F1gKsXLmyFhKkJGl+U6uvGcp+Nq45YSj70cJsV6/sqnoEuAF4KbBHkm2J/QDggSHHJknSxBmkV/aKvqZMkqcDrwQ20CXoU/rVzgQ+vVRBSpI0KQZpyt4XuDjJTnSJ/PKqujrJHcAnkrwT+DvggiWMU8uAzWiStHjzJuaq+ipwxCzz7wGOXIqgJEmaVI78JUlSQ0zMkiQ1xMQsTRCH2JXaZ2KWJotD7EqNMzFLE8QhdqX2mZilCbPQIXYdXlcaDROzNGGq6omqOpxuxL4jGXCI3apaW1Urq2rlihUrljRGaZKZmKUJ5RC7UptMzNIEcYhdqX0DPV1K0g7DIXalxpmYJ9ywxrfW8uAQu1L7bMqWJKkhJmZJkhpiYpYkqSEmZkmSGmJiliSpISZmSZIaYmKWJKkhJmZJkhpiYpYkqSEmZkmSGmJiliSpISZmSZIaYmKWJKkhJmZJkhpiYpYkqSEmZkmSGmJiliSpISZmSZIaMm9iTnJgkhuS3JHk9iTn9PP3SnJdkjv7v3sufbiSJO3Ydh5gna3Am6vqliTPBNYnuQ44C7i+qtYkWQ2sBt66dKFKkpabqdXXDGU/G9ecMJT9LAfz1piralNV3dJPPwZsAPYHTgYu7le7GHjNUgUpSdKk2K5rzEmmgCOAm4F9qmpTv+hBYJ85tlmVZF2SdVu2bFlEqJIk7fgGTsxJdgc+CZxbVY9OX1ZVBdRs21XV2qpaWVUrV6xYsahgJUna0Q2UmJPsQpeUL6mqK/vZDyXZt1++L7B5aUKUJGlyDNIrO8AFwIaqeve0RVcBZ/bTZwKfHn54kiRNlkFqzEcDZwDHJrm1f70aWAO8MsmdwCv695Ia5u2PUvvmvV2qqr4AZI7Fxw03HElLzNsfpcY58pc0Qbz9UWqfiVmaUAu5/VHS0htk5C9ppIY1UhBM1mhB22Pm7Y9dH89OVVWSn7r9MckqYBXAQQcdNKpQpYljjVmaMAu9/dExCaTRMDFLE8TbH6X22ZQtTZZttz9+Lcmt/bzz6G53vDzJ2cC9wKljik+aeCZmaYJ4+6PUPpuyJUlqiIlZkqSGmJglSWqIiVmSpIaYmCVJaoiJWZKkhpiYJUlqiIlZkqSGmJglSWqIiVmSpIaYmCVJaoiJWZKkhpiYJUlqiIlZkqSG7NCPfZxafc1Q9rNxzQlD2Y8kSfOxxixJUkN26BqzJEkztd6aao1ZkqSGmJglSWqIiVmSpIaYmCVJaoiJWZKkhszbKzvJhcCJwOaqekE/by/gMmAK2AicWlUPL12Y0sK03vtSkmYapMZ8EXD8jHmrgeur6lDg+v69JElapHkTc1XdBHxnxuyTgYv76YuB1ww5LkmSJtJCrzHvU1Wb+ukHgX2GFI8kSRNt0Z2/qqqAmmt5klVJ1iVZt2XLlsUeTtIiJLkwyeYkt02bt1eS65Lc2f/dc5wxSpNuoYn5oYueXRIAAAZgSURBVCT7AvR/N8+1YlWtraqVVbVyxYoVCzycpCG5CPuMSE1baGK+Cjiznz4T+PRwwpG0lOwzIrVv3sSc5FLgi8Dzktyf5GxgDfDKJHcCr+jfS1qeBuoz4mUpaTTmvY+5qk6fY9FxQ45F0phVVSWZtc9IVa0F1gKsXLlyzn4lkhbHkb8kDdxnRNLSMzFLss+I1JB5m7LVpmENNanJ0vcZOQbYO8n9wNvp+ohc3vcfuRc4dXwRSrObpO88E7M0QewzIrXPpmxJkhpijVlS84bZjNnak8ImqYlWg7HGLElSQ0zMkiQ1xMQsSVJDvMYsSQvgtWEtFWvMkiQ1xMQsSVJDbMqWJP0Em+nHyxqzJEkNscYsDWBHHuBCUlusMUuS1BATsyRJDTExS5LUEBOzJEkNMTFLktQQE7MkSQ3xdqkR88Z9SdKTscYsSVJDTMySJDXExCxJUkNMzJIkNcTELElSQ0zMkiQ1xMQsSVJDFnUfc5LjgfcBOwEfqao1Q4lK0shNSnl2LAG1bsE15iQ7AX8CvAo4DDg9yWHDCkzS6FiepXYspin7SOCuqrqnqn4AfAI4eThhSRoxy7PUiMUk5v2B+6a9v7+fJ2n5sTxLjVjysbKTrAJW9W8fT/KNpT7mIuwNfGvmzJw/hkhGb9bPPgFG/rkH/P908BKHsd3mKMut/r9pNS5oN7ZW44JGY8v5A8W13WV5MYn5AeDAae8P6Of9hKpaC6xdxHFGJsm6qlo57jjGYVI/+6R+7lnMW55nK8utnr9W44J2Y2s1Lmg3tqWKazFN2X8LHJrkkCRPBU4DrhpOWJJGzPIsNWLBNeaq2prkjcBf0d1ecWFV3T60yCSNjOVZaseirjFX1WeBzw4plhYsiyb3JTKpn31SP/dPWWB5bvX8tRoXtBtbq3FBu7EtSVypqqXYryRJWgCH5JQkqSEmZiDJgUluSHJHktuTnDPumEYpyU5J/i7J1eOOZZSS7JHkiiRfT7IhyUvHHVOrkhyf5BtJ7kqyepbluya5rF9+c5KpRuI6K8mWJLf2r9ePKK4Lk2xOctscy5Pk/X3cX03y4kbiOibJd6edr/88irj6Y8/7PTyO8zZgXMM9b1U18S9gX+DF/fQzgf8JHDbuuEb4+X8X+Dhw9bhjGfHnvhh4fT/9VGCPccfU4ouuM9jdwM/35+krM8sH8H8DH+qnTwMuaySus4APjOGcvQx4MXDbHMtfDVwLBDgKuLmRuI4Z1/fAIN/D4zhvA8Y11PNmjRmoqk1VdUs//RiwgQkZ9SjJAcAJwEfGHcsoJXkW3ZfUBQBV9YOqemS8UTVrkOE6T6b7oQNwBXBckjQQ11hU1U3Ad55klZOBP6vOl4A9kuzbQFxjM+D38MjP2zjyg4l5hr4J7gjg5vFGMjLvBd4C/HDcgYzYIcAW4KN9M/5Hkuw27qAaNchwnT9ap6q2At8Fnt1AXAC/3jd7XpHkwFmWj0PLQ6C+NMlXklyb5PnjCOBJvofHet7myQ9DO28m5mmS7A58Eji3qh4ddzxLLcmJwOaqWj/uWMZgZ7omvQ9W1RHAPwI/dY1Sy95ngKmqeiFwHT+u1Wt2twAHV9WLgP8G/MWoA2j1e3ieuIZ63kzMvSS70J30S6rqynHHMyJHAycl2UjXDHhskj8fb0gjcz9wf1Vt++V7BV2i1k8bZPjdH62TZGfgWcC3xx1XVX27qr7fv/0I8MtLHNOgBhrSeNSq6tGqeryf/iywS5K9R3X8Ab6Hx3Le5otr2OfNxEzX04/uWuOGqnr3uOMZlap6W1UdUFVTdB12/rqqfmPMYY1EVT0I3Jfkef2s44A7xhhSywYZrvMq4Mx++hS6/0tLPUjCvHHNuP54Et31wRZcBfy7vpfxUcB3q2rTuINK8nPb+gYkOZIuRyz1D6xtxx7ke3jk522QuIZ93pb86VLLxNHAGcDXktzazzuv/+WjHde/By7pv9TvAV435niaVHMM15nkD4F1VXUV3RfXx5LcRde56LRG4npTkpOArX1cZy11XABJLqXrqbt3kvuBtwO79HF/iG6EtVcDdwHfY0T/9waI6xTgt5NsBf4JOG0EP7C2mfV7GDhoWnzjOG+DxDXU8+bIX5IkNcSmbEmSGmJiliSpISZmSZIaYmKWJKkhJmZJkhpiYpYkqSEmZkmSGmJiliSpIf8/2VTWh37m0z8AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(2, 2, figsize=(8, 6))\n", + "ax[0, 0].hist(data[0])\n", + "ax[0, 1].hist(data[1])\n", + "ax[1, 0].hist(data[2])\n", + "ax[1, 1].hist(data[3])\n", + "ax[0, 0].set_title(cols[0])\n", + "ax[0, 1].set_title(cols[1])\n", + "ax[1, 0].set_title(cols[2])\n", + "ax[1, 1].set_title(cols[3])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scatter Plots\n", + "These are probably more useful for this dataset because they can show clusters by species. The most basic scatter plot does not distinguish species." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(\n", + " data[0], \n", + " data[1],)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Adding color coding by species allows us to see clustering for 2 attributes for each species. Here setosa is secluded, but virginica and versicolor overlap." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "colors = {'Iris-setosa':'red', 'Iris-virginica':'blue', 'Iris-versicolor':'green'}\n", + "plt.scatter(\n", + " data[2], \n", + " data[3], \n", + " c=data['species'].map(colors))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Adding labels to the x and y axes is useful, but we can see the data for virginica and versicolor still overlap. If we could find 1 attribute where there's no overlap for these 2 species then we could use those to definitively distinguish them. But unfortunately all 4 attributes have some overlap." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'petal_length')" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(\n", + " data[0], \n", + " data[2], \n", + " c=data['species'].map(colors))\n", + "plt.xlabel(cols[0])\n", + "plt.ylabel(cols[2])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we add a title to the plot, and show attributes 1 and 3. " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Iris Data Scatter Plot')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.scatter(\n", + " data[1], \n", + " data[3], \n", + " c=data['species'].map(colors))\n", + "plt.xlabel(cols[1])\n", + "plt.ylabel(cols[3])\n", + "plt.title('Iris Data Scatter Plot')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Correlation\n", + "We can see the correlation between attributes. A correlation close to 1 helps us distinguish between species. Low correlation doesn't help us." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123
01.000000-0.1093690.8717540.817954
1-0.1093691.000000-0.420516-0.356544
20.871754-0.4205161.0000000.962757
30.817954-0.3565440.9627571.000000
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3\n", + "0 1.000000 -0.109369 0.871754 0.817954\n", + "1 -0.109369 1.000000 -0.420516 -0.356544\n", + "2 0.871754 -0.420516 1.000000 0.962757\n", + "3 0.817954 -0.356544 0.962757 1.000000" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.corr()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Box and Whisker Plots\n", + "Box plots show the distribution of data over an attribute by showing the 25th, 50th (median) and 75th percentiles. Again, the simplest plots are not very useful, but when we add labels and color coding the plots are revealing." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'whiskers': [,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ],\n", + " 'caps': [,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ,\n", + " ],\n", + " 'boxes': [,\n", + " ,\n", + " ,\n", + " ],\n", + " 'medians': [,\n", + " ,\n", + " ,\n", + " ],\n", + " 'fliers': [,\n", + " ,\n", + " ,\n", + " ],\n", + " 'means': []}" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAD4CAYAAADFAawfAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAPo0lEQVR4nO3dYWhd933G8eepoqLEjZOr5TLqKJoDG0G1oEl3CdlqCnKWka6lfVNIHFJoEWgvOi0ZBdNOL+y8EMMwSvOiDEyUdbD41luaQAlZ10BVMkGX9ipJWyVKoYtrV3E2K/h2TsLcOspvL3TtWrbse2Sdo/O/934/cIl079HRw0F5OP6f/zl/R4QAAOn6QNkBAABXRlEDQOIoagBIHEUNAImjqAEgcdcUsdObbropdu7cWcSuAaArzc/PvxUR1fU+K6Sod+7cqUajUcSuAaAr2T52uc8Y+gCAxFHUAJA4ihoAEpepqG3/je1XbC/YrtseKDoYAGBV26K2fbOkv5ZUi4hRSX2S7i86GABgVdahj2skXWv7GknXSTpRXCQAwIXaFnVEvCHp7yUdl/SmpP+NiO9dvJ3tCdsN243l5eX8kwJAj8oy9FGR9FlJt0raIWmb7Qcv3i4iDkVELSJq1eq6c7YBAFchy9DHn0k6GhHLEXFW0lOS/rTYWJtjO9cXAJQpy52JxyXdZfs6Sf8n6W5JSd92mGUxBNuZtgOAsmUZo35B0pOSXpT0s9bPHCo4FwCgJdOzPiJiv6T9BWcBAKyDOxMBIHEUNQAkjqIGgMRR1ACQOIoaABJHUQNA4ihqAEgcRQ0AiaOoASBxFDUAJI6iBoDEUdQAkDiKGgASR1EDQOIoagBIHEUNAInLsrjtbbZfvuB12vbDWxEOAJBhhZeI+Lmk2yXJdp+kNyQ9XXAuAEDLRoc+7pb0XxFxrIgwAIBLbbSo75dUX+8D2xO2G7Yby8vLm08GAJC0gaK2/UFJn5H0r+t9HhGHIqIWEbVqtZpXPgDoeRs5o/6kpBcj4n+KCgMAuFTbi4kX2KvLDHsAyMZ2rvuLiFz3hzRlKmrb2yTdI+kvi40DdLesxWqbEsZ5mYo6It6V9HsFZwEArIM7EwEgcRQ1ACSOogaAxFHUAJA4ihoAEkdRA0DiKGoASBxFDQCJo6gBIHEdV9SDg4OyvemXpFz2Y1uDg4MlHxUA3WwjD2VKQrPZTO4ZCHk/aAcALtRxZ9QA0GsoagBIHEUNAImjqAEgcRQ1ACQuU1HbvtH2k7Zfs71o+0+KDgYAWJV1et6jkr4bEZ9rrUZ+XYGZAAAXaFvUtm+Q9AlJX5CkiPitpN8WGwsAcE6WoY9bJS1L+kfbL9l+rLXY7Rq2J2w3bDeWl5dzDwoAvSpLUV8j6WOS/iEi7pD0rqSvXLxRRByKiFpE1KrVas4xAaB3ZSnqJUlLEfFC6/sntVrcAIAt0LaoI+K/Jf3K9m2tt+6W9GqhqQAA52Wd9TEp6YnWjI/XJX2xuEgAgAtlKuqIeFlSreAsmcT+7dKBG8qOsUbs3152BABdrOMec+pHTif5mNM4UHYKAN2KW8gBIHEUNQAkjqIGgMRR1ACQOIoaABJHUQNA4ihqAEgcRQ0AiaOoASBxFDUAJI6iBoDEUdQAkDiKGgASR1EDQOIoagBIHEUNAInLtHCA7V9KelvSiqT3IqLU1V5sl/nrL1GpVMqOgAQMDg6q2Wzmtr+8/s4rlYpOnTqVy75Qjo2s8DIWEW8VliSjvFZ3sZ3cSjHobM1mM8m/qdRObLBxDH0AQOKyFnVI+p7tedsT621ge8J2w3ZjeXk5v4QA0OOyFvXuiPiYpE9K+pLtT1y8QUQciohaRNSq1WquIQGgl2Uq6oh4o/Xfk5KelnRnkaEAAL/Ttqhtb7N9/bmvJf25pIWigwEAVmWZ9fH7kp5uXTm+RtLhiPhuoakAAOe1LeqIeF3SR7cgCwBgHUzPA4DEUdQAkDiKGgASR1EDQOIoagBIHEUNAImjqLEp9Xpdo6Oj6uvr0+joqOr1etmRgK6zkcecAmvU63VNTU1pZmZGu3fv1tzcnMbHxyVJe/fuLTkd0D04o8ZVm56e1szMjMbGxtTf36+xsTHNzMxoenq67GhAV3ERDzqv1WrRaDRy32+eWDhg8/r6+nTmzBn19/eff+/s2bMaGBjQyspKicnKkerfVKq5sJbt+cutnsXQB67ayMiI5ubmNDY2dv69ubk5jYyMlJiqPLF/u3TghrJjXCL2by87AjapK4s669JDWbfjbGR9U1NTuu+++7Rt2zYdP35cw8PDevfdd/Xoo4+WHa0UfuR0kn8rthUHyk6BzejKok7xf5ZuxzEHisPFRFy16elpHTlyREePHtX777+vo0eP6siRI1xMBHJGUeOqLS4uamlpac086qWlJS0uLpYdDegqXTn0ga2xY8cO7du3T4cPHz4/j/qBBx7Qjh07yo4GdJXMZ9S2+2y/ZPuZIgOhs1x8QTbrBVoA2W1k6OMhSfybFuedOHFCBw8e1OTkpAYGBjQ5OamDBw/qxIkTZUcDukqmorY9JOlTkh4rNg46ycjIiIaGhrSwsKCVlRUtLCxoaGioZ+dRA0XJOkb9dUn7JF1/uQ1sT0iakKTh4eHNJ0MyrjScsWfPng3/DFP5gI1pe0Zt+9OSTkbE/JW2i4hDEVGLiFq1Ws0tIMoXEZd9HT58WLt27ZIk7dq1S4cPH77i9pQ0sHFtn/Vh++8kfV7Se5IGJG2X9FREPHi5n+mEZ30gXzxPIt1jkGourHWlZ320PaOOiK9GxFBE7JR0v6TvX6mkAQD54oYXAEjchm54iYgfSPpBIUkAAOvijBoAEkdRA0DiKGoASBxFDQCJo6gBIHEUNQAkjqIGgMRR1ACQOIoaABJHUQNA4ihqAEgcRQ0AiWMV8h42ODioZrOZ2/7yWti2Uqno1KlTuewL6AYUdQ9rNptJPlCelcyBtRj6AIDEUdQAkLgsi9sO2P6R7Z/YfsX2I1sRDACwKssY9W8k7YmId2z3S5qz/W8R8Z8FZwMAKENRx+rVpnda3/a3XuldgQKALpVp1oftPknzkv5Q0jci4oV1tpmQNCFJw8PDeWZEQWL/dunADWXHuETs3152BCAp3sj0LNs3Snpa0mRELFxuu1qtFo1GI4d4KJLtZKfnpZirnVRzp5oLa9mej4jaep9taNZHRPxa0qyke/MIBgBoL8usj2rrTFq2r5V0j6TXig4GAFiVZYz6w5L+qTVO/QFJ/xIRzxQbCwBwTpZZHz+VdMcWZEEJUrxdu1KplB0BSArP+uhheV5g4oIVUBxuIQeAxFHUAJA4ihoAEkdRA0DiKGoASBxFDQCJY3oegI6V930AqU4xpagBdKysxdrp8/wZ+gCAxFHUAJA4ihoAEkdRA0DiKGoASBxFDQCJY3oe2so6VzXrdp08TQooA0WNtihWoFxZ1ky8xfas7Vdtv2L7oa0Ihs5Qr9c1Ojqqvr4+jY6Oql6vlx0J6DpZzqjfk/TliHjR9vWS5m0/FxGvFpwNiavX65qamtLMzIx2796tubk5jY+PS5L27t1bcjqge7Q9o46INyPixdbXb0talHRz0cGQvunpac3MzGhsbEz9/f0aGxvTzMyMpqeny44GdBVvZPzR9k5Jz0sajYjTF302IWlCkoaHh//42LFj+aVEkvr6+nTmzBn19/eff+/s2bMaGBjQyspKicnKkerzJFLNtZU64RjYno+I2nqfZZ6eZ/tDkr4t6eGLS1qSIuJQRNQiolatVq8+LTrGyMiI5ubm1rw3NzenkZGRkhIB3SlTUdvu12pJPxERTxUbCZ1iampK4+Pjmp2d1dmzZzU7O6vx8XFNTU2VHQ3oKm0vJnp1cuyMpMWI+FrxkdApzl0wnJyc1OLiokZGRjQ9Pc2FRCBnbceobe+W9B+Sfibp/dbbfxsRz17uZ2q1WjQajdxCAp0g1XHQVHNtpU44Blcao257Rh0Rc5LyXUYBANoYHBxUs9nMbX95rAZTqVR06tSpHNJsDHcmAkhSs9lM7iw476W/suKhTACQOIoaABJHUQNA4hijBnJU1hjmlVQqlbIjYJMoaiAneV746oTpZNg6DH0AQOIoagBIHEUNAImjqAEgcRQ1ACSOogaAxDE9D0CSYv926cANZcdYI/ZvL+X3UtQAkuRHTic3l9y24sDW/16GPgAgcRQ1ACSubVHbftz2SdsLWxEIALBWljPqb0q6t+AcAIDLaFvUEfG8pK1fewYAICnHWR+2JyRNSNLw8HBeuwW6ykYeg5pl29RmRaAYuV1MjIhDEVGLiFq1Ws1rt0BXiYhcX+gNzPoAgMRR1ACQuCzT8+qSfijpNttLtseLjwUAOKftxcSI2LsVQQAA62PoAwASR1EDQOIoagBIHEUNAImjqAEgcRQ1ACSOogaAxFHUAJA4ihoAEkdRA0DiKGoASBxFDQCJy22FFwDI20ZWxNkKlUqllN9LUQNIUp4r2Nju6BVxGPoAgMRR1ACQuExFbfte2z+3/QvbXyk6FADgd9qOUdvuk/QNSfdIWpL0Y9vfiYhXiw4HAFeykYuNWbZNdRw7y8XEOyX9IiJelyTb35L0WUkUNYBSpVqsecsy9HGzpF9d8P1S6701bE/YbthuLC8v55UPAHpebhcTI+JQRNQiolatVvPaLQD0vCxF/YakWy74fqj1HgBgC2Qp6h9L+iPbt9r+oKT7JX2n2FgAgHPaXkyMiPds/5Wkf5fUJ+nxiHil8GQAAEkZbyGPiGclPVtwFgDAOrgzEQASR1EDQOJcxIRx28uSjuW+43zdJOmtskN0EY5nvjie+eqE4/kHEbHu3OZCiroT2G5ERK3sHN2C45kvjme+Ov14MvQBAImjqAEgcb1c1IfKDtBlOJ754njmq6OPZ8+OUQNAp+jlM2oA6AgUNQAkrueK2vbjtk/aXig7SzewfYvtWduv2n7F9kNlZ+pUtgds/8j2T1rH8pGyM3UD2322X7L9TNlZrlbPFbWkb0q6t+wQXeQ9SV+OiI9IukvSl2x/pORMneo3kvZExEcl3S7pXtt3lZypGzwkabHsEJvRc0UdEc9LOlV2jm4REW9GxIutr9/W6v8Ql6wAhPZi1Tutb/tbL672b4LtIUmfkvRY2Vk2o+eKGsWxvVPSHZJeKDdJ52r9M/1lSSclPRcRHMvN+bqkfZLeLzvIZlDUyIXtD0n6tqSHI+J02Xk6VUSsRMTtWl1J6U7bo2Vn6lS2Py3pZETMl51lsyhqbJrtfq2W9BMR8VTZebpBRPxa0qy4nrIZH5f0Gdu/lPQtSXts/3O5ka4ORY1NsW1JM5IWI+JrZefpZLartm9sfX2tpHskvVZuqs4VEV+NiKGI2KnVJQS/HxEPlhzrqvRcUduuS/qhpNtsL9keLztTh/u4pM9r9Wzl5dbrL8oO1aE+LGnW9k+1ulbpcxHRsVPKkB9uIQeAxPXcGTUAdBqKGgASR1EDQOIoagBIHEUNAImjqAEgcRQ1ACTu/wF/u4D4GapFtgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.boxplot([data[0], data[1], data[2], data[3]])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This shows a boxplot for one attribute, sorted by species. For this attribute we can see a big overlap between the 3 species, so it's not very useful for distinguishing. An iris with 5.5 or 6.0 for this attribute could be any of the 3 species." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "data.boxplot(column=[0], by=['species'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It's tricky to do subplots, but worth it. We can see setosa has smaller petals than the other 2 species. And versicolor has, on average, smaller sepals and smaller petals than virginica; but there is some overlap." + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'petal_width')" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "fig, ax = plt.subplots(2, 2, figsize=(8, 6))\n", + "A = [data[0][data.species == 'Iris-setosa'], data[0][data.species == 'Iris-virginica'], data[0][data.species == 'Iris-versicolor']]\n", + "B = [data[1][data.species == 'Iris-setosa'], data[1][data.species == 'Iris-virginica'], data[1][data.species == 'Iris-versicolor']]\n", + "C = [data[2][data.species == 'Iris-setosa'], data[2][data.species == 'Iris-virginica'], data[2][data.species == 'Iris-versicolor']]\n", + "D = [data[3][data.species == 'Iris-setosa'], data[3][data.species == 'Iris-virginica'], data[3][data.species == 'Iris-versicolor']]\n", + "\n", + "ax[0, 0].boxplot(A, widths = 0.7)\n", + "ax[0, 0].set_title(cols[0])\n", + "ax[0, 1].boxplot(B, widths = 0.7)\n", + "ax[0, 1].set_title(cols[1])\n", + "ax[1, 0].boxplot(C, widths = 0.7)\n", + "ax[1, 0].set_title(cols[2])\n", + "ax[1, 1].boxplot(D, widths = 0.7)\n", + "ax[1, 1].set_title(cols[3])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This plot does an awsome job of showing distributions of all 4 attributes for all 3 species. 12 box plots in 1 graph! The color coding makes it more readable. " + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/ipykernel_launcher.py:21: MatplotlibDeprecationWarning: Adding an axes using the same arguments as a previous axes currently reuses the earlier instance. In a future version, a new instance will always be created and returned. Meanwhile, this warning can be suppressed, and the future behavior ensured, by passing a unique label to each axes instance.\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAD5CAYAAAAOXX+6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAVDUlEQVR4nO3dfZBkVXnH8d+PBVxZ2GXanViKTpYyBENIRU1LRAxBRQp8LRMSIWVSa1mZpDCgVohoUmFm8lIJ8aWiMdGshGACrqUISQQVsHRdQURnlxV2WfBlBYFoGNypXdQsKHnyx70DvWP3vXe6+06f7vl+qm7t7fvS/eyZO8+cPvfccxwRAgCk65BBBwAAKEaiBoDEkagBIHEkagBIHIkaABJ3aB1vun79+tiwYUMdbw0AI2nbtm0PRcR4u321JOoNGzZodna2jrcGgJFk+95O+2j6AIDEkagBIHEkagBIXKVEbfuttnfZ3ml7s+3VdQcGAMiUJmrbx0i6QFIzIk6UtErSOXUHBgDIVG36OFTSk20fKukISf9dX0gAgFaliToiHpD0LknfkfRdSfsi4obFx9metD1re3Zubq7/kQLAClWl6WNM0mskHSvp6ZLW2H794uMiYlNENCOiOT7ets82AKALVZo+Tpf07YiYi4gfS7pa0gvrDWvpbJcuADCMqiTq70h6ge0jnGW7l0raXW9YSxcRBy2dtgHAsKnSRn2rpKskbZd0R37OpprjAgDkKo31ERFTkqZqjgUA0AZPJgJA4kjUAJA4EjUAJI5EDQCJI1EDQOJI1ACQOBI1ACSORA0AiSNRA0DiSNQAkDgSNQAkjkQNAIkjUQNA4kjUAJA4EjUAJI5EDQCJqzK57fG2d7Qs+22/ZTmCAwBUmOElIu6W9BxJsr1K0gOSrqk5LgBAbqlNHy+V9K2IuLeOYAAAP63SnIktzpG0ud0O25OSJiVpYmKix7DSkU28XowZzgHUqXKN2vbhkl4t6ePt9kfEpohoRkRzfHy8X/ENXEQctHTaBgB1WUrTx1mStkfE/9QVDADgpy0lUZ+rDs0eA9FoSHbnRSre32gMNn6gA9ulC1aWSm3UttdIepmkP6g3nCWYn5cKmh1KGyS42JGoxc1ptmliW+EqJeqI+KGkp9QcCwCgDZ5MBIDEkagBIHEkagBIHIkaABJHogaAxJGoASBxJGoASNxSB2VKx9RaaXpdb+cDwBAY3kQ9s7/wycRStjTdt2gAoDY0fQBA4kjUAJA4EjUAJG5426hr1LikofkD8x33e6bzyHtjq8e096K9dYQFYIUiUbcxf2BeMdXhRuVU8blFSRwAujHcibpgTGmrZEzqsbF+RwMAtRjeRF3WNc/urfseACSi0s1E20fbvsr2XbZ32z657sAAAJmqNer3SvpMRJydz0Z+RI0xAQBalCZq2+sknSppoyRFxKOSHq03LADAgipNH8dKmpP0r7Zvs31pPtntQWxP2p61PTs3N9f3QAFgpaqSqA+V9DxJH4iI50r6oaS3Lz4oIjZFRDMimuPj430OEwBWriqJ+n5J90fErfnrq5QlbgDAMihto46I79m+z/bxEXG3pJdKurP+0AYnovshVEMMnwqgv6r2+jhf0pV5j489kt5QX0iDZ+/v/GRi2bkzLn7QBlis0ZDmOw9ZIKnzw11jY9JehiwYdZUSdUTskNSsORZgZZqfL3w4q/APf8HTuRgdy/5koitcWMEThQDwuGVP1IuTsG0SMwAUYDxqAEgciRoAEkeiBoDEDe8wp4u0u0m5eNtS2sI7TgAwrcLZy8dWM841gP5yHTfyms1mzM7OVgtgyG4mDlu8GAJdPlz1xPn7+hMHBsr2toho2w16ZGrUwNCa2d/9JBd24Tc8jAbaqAEgcSRqAEgciRoAEkeiBoDEkagBIHEkagBIHIkaABJXe6JuNLKunp0WqXh/o1F3hACQttofeCkZE10lw6IzLjqAFa9SorZ9j6SHJT0m6SedHnME0KWCGolVUJ0Zqz62DJN2DK+l1KhfHBEP1RYJsFKVJUe7+0fMD/oYJu0YVtxMBIDEVa1Rh6QbbIekf46ITYsPsD0paVKSJiYmnjhxal1Pg8bElCQxOhiAlavSMKe2j4mIB2z/jKQbJZ0fEVs7Hd86zGmv39r69K2vb/i6iOVW1zXHtZyWomFOKzV9RMQD+b8PSrpG0kn9Cy9ttg9aOm0DgLqUJmrba2wftbAu6QxJO+sOLBURUboAQJ2qtFE/VdI1ec3xUEkfiYjP1BoVAOBxpYk6IvZI+uVePqS4daCwl+hSuokCwEiq/cnEZeoiCgAji37UAJA4EjUAJI5EDQCJq72NGsBgNC5paP7AfOExnul8p39s9Zj2XrS332GhCyRqYETNH5hXTBXcqZ8qPr8oiWN50fQBAImjRg0kpt2wBIu38UTsykKiBhJDEsZiJOoRxEwewGghUY8gZvKAJEWslabXdX++1vYxGvSCRA2MKHt/ca+PsvNnXDL1NJbLsidqbpQAwNIse6ImCQPA0tCPGgASR6IGgMRVTtS2V9m+zfa1dQYEADjYUmrUb5a0u65AAADtVUrUtp8h6RWSLq03HADAYlV7ffy9pLdJOqrTAbYnJU1K0sTERO+RAehZ4Qh40/nSwdhqJixNhcu6y9l+paSXR8R5tk+TdGFEvLLonGazGbOzs/2LEj3hyUS0w3WRFtvbIqLZbl+Vpo9TJL3a9j2SPirpJbav6GN8AIACpYk6It4REc+IiA2SzpH0uYh4fe2RAQAk0Y8aAJK3pEfII2KLpC21RAIAaIsaNQAkjkQNAIkjUY+ARkOyOy9S8f5GY7DxAyjGxAEjYH5eKu4OW9ZXvq/hAOgzEjUqYy5GYDBI1KiMuRiBwaCNGgASR6IGgMSRqAEgcbRRj4CYWlc4XGX5+ZK0r0/RdIcblUBnJOoR4Jl9Jd3zSs63FNN9C6cr3KgEOqPpAwASR40aWCHaNS8t3sa3mDSRqIEVgiQ8vGj6AIDEkahRrGjEJ4nRnoBlUNr0YXu1pK2SnpQff1VE1qEL6Sju3WYVDcw0VjTZdMGIT6VfpBntCeiLKm3Uj0h6SUT8wPZhkm6y/emI+HLNsaGisqZHu/wYAOkqTdSR3YH4Qf7ysHzh1x4AlkmlNmrbq2zvkPSgpBsj4tZ6wwIALKjUPS8iHpP0HNtHS7rG9okRsbP1GNuTkiYlaWJiou+BYkCm1krT67o/t4PGJQ3NH5gvPN0z7du4x1aPae9Fe7uLCRhCXmrfStsXS/pRRLyr0zHNZjNmZ2d7jQ190tPj2L00cBec6xkrprp7317OBVJle1tENNvtK236sD2e16Rl+8mSXibprv6GCADopErTx9Mkfdj2KmWJ/WMRcW29YQEAFlTp9XG7pOcuQywAgDYY6wPlOjy4UvwYjQqfpIno/iZlqPNNSmAUkahRrOhGYg83Gu39vd1M7OpMYDgx1gcAJI5EDQCJI1EDQOJI1ACQOBI1ACSOXh+orN9z7nUay0OSNJ0vbYytLhpAGyjX7lpeLKWpy0jUqKyfF25Z1zxPM54H6rP4Wu5pPJxlQNMHACSORA0AiSNRA0DiSNQjyPZBS6dtwIrRaGRDHnRapOL9jcZAw+dm4ghK+aYIMBDz84Xj0pT+xgy4ckONGgASR6IGgMSRqAEgcVXmTHym7c/bvtP2LttvXo7AAACZKjcTfyLpjyNiu+2jJG2zfWNE3FlzbAAAVahRR8R3I2J7vv6wpN2Sjqk7MABAZknd82xvUDbR7a1t9k1KmpSkiYmJPoSGlaTfAz4BB5nqfo7Ox88fIFe9+G0fKekLkv46Iq4uOrbZbMbs7GwfwgOAPuhhfs++nF/pI7wtIprt9lXq9WH7MEmfkHRlWZIGAPRXlV4flvQvknZHxHvqDwkA0KpKjfoUSb8r6SW2d+TLy2uOCwCQK72ZGBE3SWIUHwDDrWC8DqtkvI+xarMK1TVzDIMyARh9ZcmxTzcL65o5hkQNdGHY5tzDcCNRA10Ytjn3MNwYlAkAEkeiBoDEkaiBCoZ8JicMOdqogQpKZnJS2WROTFOJXpCogQpiap003cv5krSvT9EgFY1LGpo/MF94jGc6/5UeWz2mvRftLf0cEjVQgWf29T6mz3TfwkEi5g/MK6YKLoyp4vOLkngrEjVQUXHzRfGzbRUfbMMyGbZhdUnUQAXL9GAblklKSbgKen0AQOKoUQNAlyJ6mzkmVG3mGBI1AHTJ3l98M7Hs/BmXdOzMkKiBLgzbzSjUp7DnxrQKu3WOra52l5lEDXSBJAxJpbVpT7unGveCKlNxXWb7Qds7e/40AMCSVen1cbmkM2uOAwDQQWmijoitksqfcQQA1KJv/ahtT9qetT07NzfXr7cFgKFh+6Cl07al6luijohNEdGMiOb4+Hi/3hYAhkZElC7d4MlEAEgciRoAElele95mSbdIOt72/bbfWH9YAIAFpQ+8RMS5yxEIAKA9mj4AIHEkagBIHIkaABJHogaAxJGoASBxJGoASByJGgASR6IGgMSRqAEgcUzFBSBZZcOCrpQp0UjUAJK1OBHbXjHJuRVNHwCQOBI1gGQ0GpLdeZE672s0Bht7nWj6AJCM+XmpuGWj884uZ7kaCiRqAMmIqXXSdLfnStK+PkaTDhI1gHRMFydabiYWsH2m7bttf9P22+sOCgDwhCpTca2S9I+SzpJ0gqRzbZ9Qd2AAYPugZfG2laJKjfokSd+MiD0R8aikj0p6Tb1hAUDWj7poWSmqJOpjJN3X8vr+fBsAYBn0rR+17Unbs7Zn5+bm+vW2ALDiVUnUD0h6ZsvrZ+TbDhIRmyKiGRHN8fHxfsUHACtelUT9VUnH2T7W9uGSzpH0X/WGBQBYUNqPOiJ+YvuPJF0vaZWkyyJiV+2RAQAkVXzgJSI+JelTNccCAGiDQZkAIHEkagBInOvoNG57TtK9fX/jpVkv6aEBxzDqKOPlQTnXL4Uy/tmIaNtlrpZEnQLbsxHRHHQco4wyXh6Uc/1SL2OaPgAgcSRqAEjcKCfqTYMOYAWgjJcH5Vy/pMt4ZNuoAWBUjHKNGgBGAokaABJHogaAxA1torZ9mu1rC/ZvtP3+Gj53o+2nt7y+x/b6fn9OSsrKusL5Tdvv67DvHtvrbR9t+7x+feagLb5OCo673PbZBfu32O5r/95RK2upf+Vd4fy/sH16m+2Pl2G+/sJ+faY0xIl6gDZKKr0g8ISImI2IC0oOO1rSeSXHDJONSvc6GbWylpapvCPi4oj4bMlhp0l6YckxS1Jrora9xvZ1tr9me6ft19n+FdtfsL3N9vW2n5Yfu8X2e23vyI89Kd9+ku1bbN9m+0u2j+8ijnHbn7D91Xw5Jd8+bfuy/LP32L6g5Zw/z2dev8n2ZtsX5n8Vm5KuzON8cn74+ba3277D9rN7LrguDLKs8//30c583/bv5dv/zfbLFtU2nmL7Btu7bF8qaWGG0r+V9Kw8pnfm2460fZXtu2xfaQ9uNlPbG1ri2J3HdUS7Mm53ndi+OL/2dtre1M3/xfYZ+c9nu+2P2z4y336P7ZnF12B+3d+4UNa273X27S/pspYGU962n2/76nz9Nbb/1/bhtlfb3pNvf7x2bPvMPMbtkn5jIW5JfyjprXksv5a//an579Qed1O7Lps8spdF0m9K+lDL63WSviRpPH/9OmXjW0vSloVjJZ0qaWe+vlbSofn66ZI+ka+fJunags/eKOn9+fpHJL0oX5+QtDtfn87jeZKyZ/2/L+kwSc+XtEPSaklHSfqGpAtb4my2fM49ks7P18+TdGmdZZpoWX9Q0isknahsoomF9/6GpDWt50t6n6SL8/VXSIq87DcsxNHymfuUzSh0iKRbFn6GAyrfDXmsp+SvL5P0JyVl3HqdNFrW/13Sq/L1yyWdXfC5W5QlofWStkpak2+/qKUc216Dkt4v6R35+pnDUtaDKm9lwz7vydfflV/Lp0j6dUmbW89Xlhvuk3ScssrGx1qu8Wnl+aLlnI/nZXuCssnCl1Qelcaj7sEdkt5t+xJJ10qaV/bLfGP+B26VpO+2HL9ZkiJiq+21to9Wlig/bPs4ZT+4w7qI43RJJ7T8UV27UBuRdF1EPCLpEdsPSnqqsh/Of0bEAUkHbH+y5P2vzv/dpvwv6wAMsqy/qCzh3yvpA5ImbR8jaT4ifrioMnOq8jKKiOtszxe871ci4n5Jsr1D2S/vTRVjqsN9EXFzvn6FpD9VcRm3erHtt0k6QlJD0i5JZddVqxco+yW/Of+sw5Ul1AXtrsEXSXqtJEXEZ4asrKVlLu/IJkn5lu1fkHSSpPcou15XKbvGWz1b0rcj4huSZPsKSZMFb/8fEfF/ku60/dSiONqpNVFHxNdtP0/SyyX9laTPSdoVESd3OqXN67+U9PmIeG3+tWJLF6EcIukFeeJ9XP7DfqRl02PqrkwW3qPb83s24LLeKulNyr6t/Jmy5HC2fvriXqp+/Gz6aXGZPaziMpYk2V4t6Z+U1fjusz2trEa2FJZ0Y0Sc22F/r9dgamUtDaa8t0o6S9KPJX1WWW14lbLafC9ay3fJzUp1t1E/XdKPIuIKSe+U9KuSxm2fnO8/zPYvtpzyunz7iyTti4h9yr7CL0ymu7HLUG6QdH5LXM8pOf5mSa/K26aOlPTKln0PK6t5JmWQZR0R9yn7Sn1cROxRVhO7UNlFv9hWSb+Tf/ZZksby7UmW6yITC+Wp7P/wZXUu49b/z0KSeCi/nrrpAfBlSafY/rn8s9bY/vmSc26W9Nv58WdouMpaGkx5f1HSWyTdEhFzkp4i6XhJOxcdd5ekDbaflb9u/QPa9/Ktu9fHL0n6Sv5VakrSxcoK7RLbX1PWDtx6d/SA7duUtXm+Md/2d5L+Jt/e7V/5CyQ1bd9u+05ljf0dRcRXlU3ge7ukTytrVtiX775c0gd98M3EFAy6rG+V9PV8/YuSjlH7r84zym6s7FL2Ff07khQR31f2tX6nn7jBlZq7Jb3J9m5lSe8f1LmML1d+nSirTX1I2S/79craPpckTxobJW22fbuyZo+yG9czks6wvVPSb0n6nqSHh6SspcGU963Kmj8XKhm3S7oj8sbmBfm380lJ1+U3Ex9s2f1JSa9ddDOxJ8mM9WF7i7IG+NlBxyJJto+MiB/YPkLZD20yIrYPOq5+SK2sh0HeFHRtRJw44FAqs/0kSY/lba8nS/pARJR9m0zCMJZ3nVJoh0rVJtsnKPsa9eFRSdJYUSYkfcz2IZIelfT7A44HXUqmRt0t22+Q9OZFm2+OiDcNIp5RRlnXx/Y1ko5dtPmiiLh+EPGMumEr76FP1AAw6niEHAASR6IGgMSRqAEgcSRqAEjc/wOGYvhCig5EyQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "def set_color(bp):\n", + " plt.setp(bp['boxes'][0], color='blue')\n", + " plt.setp(bp['boxes'][1], color='red')\n", + " plt.setp(bp['boxes'][2], color='green')\n", + "\n", + "A = [data[0][data.species == 'Iris-setosa'], data[0][data.species == 'Iris-virginica'], data[0][data.species == 'Iris-versicolor']]\n", + "B = [data[1][data.species == 'Iris-setosa'], data[1][data.species == 'Iris-virginica'], data[1][data.species == 'Iris-versicolor']]\n", + "C = [data[2][data.species == 'Iris-setosa'], data[2][data.species == 'Iris-virginica'], data[2][data.species == 'Iris-versicolor']]\n", + "D = [data[3][data.species == 'Iris-setosa'], data[3][data.species == 'Iris-virginica'], data[3][data.species == 'Iris-versicolor']]\n", + "\n", + "# add this to remove outlier symbols: 0, '',\n", + "bp = plt.boxplot(A, 0, '', positions = [1, 2, 3], widths = 0.7)\n", + "set_color(bp)\n", + "bp = plt.boxplot(B, 0, '', positions = [5, 6, 7], widths = 0.7)\n", + "set_color(bp)\n", + "bp = plt.boxplot(C, 0, '', positions = [9, 10, 11], widths = 0.7)\n", + "set_color(bp)\n", + "bp = plt.boxplot(D, 0, '', positions = [13, 14, 15], widths = 0.7)\n", + "set_color(bp)\n", + "\n", + "ax = plt.axes()\n", + "ax.set_xticks([2, 6, 10, 14])\n", + "ax.set_xticklabels(cols)\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Iris Dataset/KNN-IrisData.ipynb b/Iris Dataset/KNN-IrisData.ipynb new file mode 100644 index 00000000..d49be354 --- /dev/null +++ b/Iris Dataset/KNN-IrisData.ipynb @@ -0,0 +1,629 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Classification \n", + "predict which group a new target object belongs to by comparing it to identified objects. The identified, or labeled objects are called the training set." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## KNN - K-Nearest Neighbors\n", + "Find the k nearest objects to the target object using some distance metric. Then these k nearest neighbors get to vote on the identity of the target object. \n", + "For example, if k=5, we find the 5 nearest objects in our training set. If three of them are apples, one is a pear and one is an orange then we predict our target object is an apple. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idsepal_lengthsepal_widthpetal_lengthpetal_widthspecies
005.13.51.40.2Iris-setosa
114.93.01.40.2Iris-setosa
224.73.21.30.2Iris-setosa
334.63.11.50.2Iris-setosa
445.03.61.40.2Iris-setosa
\n", + "
" + ], + "text/plain": [ + " id sepal_length sepal_width petal_length petal_width species\n", + "0 0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 1 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 2 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 3 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 4 5.0 3.6 1.4 0.2 Iris-setosa" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train = pd.read_csv('iris.data')\n", + "train.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Rename columns of training set, and add a column for distance." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123speciesdistance
05.13.51.40.2Iris-setosa9999
14.93.01.40.2Iris-setosa9999
24.73.21.30.2Iris-setosa9999
34.63.11.50.2Iris-setosa9999
45.03.61.40.2Iris-setosa9999
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 species distance\n", + "0 5.1 3.5 1.4 0.2 Iris-setosa 9999\n", + "1 4.9 3.0 1.4 0.2 Iris-setosa 9999\n", + "2 4.7 3.2 1.3 0.2 Iris-setosa 9999\n", + "3 4.6 3.1 1.5 0.2 Iris-setosa 9999\n", + "4 5.0 3.6 1.4 0.2 Iris-setosa 9999" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train = train.drop('id', 1)\n", + "cols = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']\n", + "train.rename(columns = {cols[0]:0, cols[1]:1, cols[2]:2, cols[3]:3}, inplace=True)\n", + "train['distance'] = 9999\n", + "train.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create an unidentified Target instance, then we will try to predict its species using knn." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 7.0\n", + "1 3.1\n", + "2 5.6\n", + "3 1.9\n", + "dtype: float64" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "target = pd.Series([7.0, 3.1, 5.6, 1.9])\n", + "target" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Distance\n", + "There are a variety of ways to measure [distance](https://towardsdatascience.com/9-distance-measures-in-data-science-918109d069fa). If there are many attributes, we may use a subset of the attributes to compare objects. \n", + "We'll use Euclidean distance, similar to Pythagorean Theorem but scaled to more attributes. \n", + "We compute the distance of every training instance from the target." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123speciesdistance
05.13.51.40.2Iris-setosa4.929503
105.43.71.50.2Iris-setosa4.756049
205.43.41.70.2Iris-setosa4.555217
304.83.11.60.2Iris-setosa4.871345
405.03.51.30.3Iris-setosa5.020956
507.03.24.71.4Iris-versicolor1.034408
605.02.03.51.0Iris-versicolor3.229551
705.93.24.81.8Iris-versicolor1.367479
805.52.43.81.1Iris-versicolor2.572936
905.52.64.41.2Iris-versicolor2.104757
1006.33.36.02.5Iris-virginica1.024695
1106.53.25.12.0Iris-virginica0.721110
1206.93.25.72.3Iris-virginica0.435890
1307.42.86.11.9Iris-virginica0.707107
1406.73.15.62.4Iris-virginica0.583095
\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 species distance\n", + "0 5.1 3.5 1.4 0.2 Iris-setosa 4.929503\n", + "10 5.4 3.7 1.5 0.2 Iris-setosa 4.756049\n", + "20 5.4 3.4 1.7 0.2 Iris-setosa 4.555217\n", + "30 4.8 3.1 1.6 0.2 Iris-setosa 4.871345\n", + "40 5.0 3.5 1.3 0.3 Iris-setosa 5.020956\n", + "50 7.0 3.2 4.7 1.4 Iris-versicolor 1.034408\n", + "60 5.0 2.0 3.5 1.0 Iris-versicolor 3.229551\n", + "70 5.9 3.2 4.8 1.8 Iris-versicolor 1.367479\n", + "80 5.5 2.4 3.8 1.1 Iris-versicolor 2.572936\n", + "90 5.5 2.6 4.4 1.2 Iris-versicolor 2.104757\n", + "100 6.3 3.3 6.0 2.5 Iris-virginica 1.024695\n", + "110 6.5 3.2 5.1 2.0 Iris-virginica 0.721110\n", + "120 6.9 3.2 5.7 2.3 Iris-virginica 0.435890\n", + "130 7.4 2.8 6.1 1.9 Iris-virginica 0.707107\n", + "140 6.7 3.1 5.6 2.4 Iris-virginica 0.583095" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train['distance'] = ((train.loc[:,0]-target[0])**2 + (train.loc[:,1]-target[1])**2 + (train.loc[:,2]-target[2])**2 + (train.loc[:,3]-target[3])**2) ** 0.5\n", + "train.loc[::10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We sort the training records by distance, and add the species of the (k=7) items nearest to the target to a list." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Iris-virginica',\n", + " 'Iris-virginica',\n", + " 'Iris-virginica',\n", + " 'Iris-virginica',\n", + " 'Iris-virginica',\n", + " 'Iris-virginica',\n", + " 'Iris-virginica']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "k = 7\n", + "train = train.sort_values('distance', ascending=True)\n", + "knn = list(train.head(k).species)\n", + "knn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We use mode to get the most popular of the knn list. In this example the whole knn list is Iris-virginica, so our prediction is obvious. But sometimes the list of nearest neighbors will be a variety, and the mode tells us our prediction." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iris-virginica\n" + ] + } + ], + "source": [ + "from statistics import mode\n", + "print(mode(knn))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To check our prediction, we plot the training set on a scatter plot, then plot our target. Here we can see our target is surrounded by Iris-virginica instances, so our prediction is probably correct." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Iris Data Scatter Plot')" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "colors = {'Iris-setosa':'red', 'Iris-virginica':'blue', 'Iris-versicolor':'green'}\n", + "plt.scatter(\n", + " train[2], \n", + " train[3], \n", + " c=train['species'].map(colors))\n", + "plt.scatter(target[2], target[3], c='orange')\n", + "plt.xlabel(cols[2])\n", + "plt.ylabel(cols[3])\n", + "plt.title('Iris Data Scatter Plot')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Iris Dataset/iris.data b/Iris Dataset/iris.data new file mode 100644 index 00000000..835f8b44 --- /dev/null +++ b/Iris Dataset/iris.data @@ -0,0 +1,151 @@ +id,sepal_length,sepal_width,petal_length,petal_width,species +0,5.1,3.5,1.4,0.2,Iris-setosa +1,4.9,3.0,1.4,0.2,Iris-setosa +2,4.7,3.2,1.3,0.2,Iris-setosa +3,4.6,3.1,1.5,0.2,Iris-setosa +4,5.0,3.6,1.4,0.2,Iris-setosa +5,5.4,3.9,1.7,0.4,Iris-setosa +6,4.6,3.4,1.4,0.3,Iris-setosa +7,5.0,3.4,1.5,0.2,Iris-setosa +8,4.4,2.9,1.4,0.2,Iris-setosa +9,4.9,3.1,1.5,0.1,Iris-setosa +10,5.4,3.7,1.5,0.2,Iris-setosa +11,4.8,3.4,1.6,0.2,Iris-setosa +12,4.8,3.0,1.4,0.1,Iris-setosa +13,4.3,3.0,1.1,0.1,Iris-setosa +14,5.8,4.0,1.2,0.2,Iris-setosa +15,5.7,4.4,1.5,0.4,Iris-setosa +16,5.4,3.9,1.3,0.4,Iris-setosa +17,5.1,3.5,1.4,0.3,Iris-setosa +18,5.7,3.8,1.7,0.3,Iris-setosa +19,5.1,3.8,1.5,0.3,Iris-setosa +20,5.4,3.4,1.7,0.2,Iris-setosa +21,5.1,3.7,1.5,0.4,Iris-setosa +22,4.6,3.6,1.0,0.2,Iris-setosa +23,5.1,3.3,1.7,0.5,Iris-setosa +24,4.8,3.4,1.9,0.2,Iris-setosa +25,5.0,3.0,1.6,0.2,Iris-setosa +26,5.0,3.4,1.6,0.4,Iris-setosa +27,5.2,3.5,1.5,0.2,Iris-setosa +28,5.2,3.4,1.4,0.2,Iris-setosa +29,4.7,3.2,1.6,0.2,Iris-setosa +30,4.8,3.1,1.6,0.2,Iris-setosa +31,5.4,3.4,1.5,0.4,Iris-setosa +32,5.2,4.1,1.5,0.1,Iris-setosa +33,5.5,4.2,1.4,0.2,Iris-setosa +34,4.9,3.1,1.5,0.1,Iris-setosa +35,5.0,3.2,1.2,0.2,Iris-setosa +36,5.5,3.5,1.3,0.2,Iris-setosa +37,4.9,3.1,1.5,0.1,Iris-setosa +38,4.4,3.0,1.3,0.2,Iris-setosa +39,5.1,3.4,1.5,0.2,Iris-setosa +40,5.0,3.5,1.3,0.3,Iris-setosa +41,4.5,2.3,1.3,0.3,Iris-setosa +42,4.4,3.2,1.3,0.2,Iris-setosa +43,5.0,3.5,1.6,0.6,Iris-setosa +44,5.1,3.8,1.9,0.4,Iris-setosa +45,4.8,3.0,1.4,0.3,Iris-setosa +46,5.1,3.8,1.6,0.2,Iris-setosa +47,4.6,3.2,1.4,0.2,Iris-setosa +48,5.3,3.7,1.5,0.2,Iris-setosa +49,5.0,3.3,1.4,0.2,Iris-setosa +50,7.0,3.2,4.7,1.4,Iris-versicolor +51,6.4,3.2,4.5,1.5,Iris-versicolor +52,6.9,3.1,4.9,1.5,Iris-versicolor +53,5.5,2.3,4.0,1.3,Iris-versicolor +54,6.5,2.8,4.6,1.5,Iris-versicolor +55,5.7,2.8,4.5,1.3,Iris-versicolor +56,6.3,3.3,4.7,1.6,Iris-versicolor +57,4.9,2.4,3.3,1.0,Iris-versicolor +58,6.6,2.9,4.6,1.3,Iris-versicolor +59,5.2,2.7,3.9,1.4,Iris-versicolor +60,5.0,2.0,3.5,1.0,Iris-versicolor +61,5.9,3.0,4.2,1.5,Iris-versicolor +62,6.0,2.2,4.0,1.0,Iris-versicolor +63,6.1,2.9,4.7,1.4,Iris-versicolor +64,5.6,2.9,3.6,1.3,Iris-versicolor +65,6.7,3.1,4.4,1.4,Iris-versicolor +66,5.6,3.0,4.5,1.5,Iris-versicolor +67,5.8,2.7,4.1,1.0,Iris-versicolor +68,6.2,2.2,4.5,1.5,Iris-versicolor +69,5.6,2.5,3.9,1.1,Iris-versicolor +70,5.9,3.2,4.8,1.8,Iris-versicolor +71,6.1,2.8,4.0,1.3,Iris-versicolor +72,6.3,2.5,4.9,1.5,Iris-versicolor +73,6.1,2.8,4.7,1.2,Iris-versicolor +74,6.4,2.9,4.3,1.3,Iris-versicolor +75,6.6,3.0,4.4,1.4,Iris-versicolor +76,6.8,2.8,4.8,1.4,Iris-versicolor +77,6.7,3.0,5.0,1.7,Iris-versicolor +78,6.0,2.9,4.5,1.5,Iris-versicolor +79,5.7,2.6,3.5,1.0,Iris-versicolor +80,5.5,2.4,3.8,1.1,Iris-versicolor +81,5.5,2.4,3.7,1.0,Iris-versicolor +82,5.8,2.7,3.9,1.2,Iris-versicolor +83,6.0,2.7,5.1,1.6,Iris-versicolor +84,5.4,3.0,4.5,1.5,Iris-versicolor +85,6.0,3.4,4.5,1.6,Iris-versicolor +86,6.7,3.1,4.7,1.5,Iris-versicolor +87,6.3,2.3,4.4,1.3,Iris-versicolor +88,5.6,3.0,4.1,1.3,Iris-versicolor +89,5.5,2.5,4.0,1.3,Iris-versicolor +90,5.5,2.6,4.4,1.2,Iris-versicolor +91,6.1,3.0,4.6,1.4,Iris-versicolor +92,5.8,2.6,4.0,1.2,Iris-versicolor +93,5.0,2.3,3.3,1.0,Iris-versicolor +94,5.6,2.7,4.2,1.3,Iris-versicolor +95,5.7,3.0,4.2,1.2,Iris-versicolor +96,5.7,2.9,4.2,1.3,Iris-versicolor +97,6.2,2.9,4.3,1.3,Iris-versicolor +98,5.1,2.5,3.0,1.1,Iris-versicolor +99,5.7,2.8,4.1,1.3,Iris-versicolor +100,6.3,3.3,6.0,2.5,Iris-virginica +101,5.8,2.7,5.1,1.9,Iris-virginica +102,7.1,3.0,5.9,2.1,Iris-virginica +103,6.3,2.9,5.6,1.8,Iris-virginica +104,6.5,3.0,5.8,2.2,Iris-virginica +105,7.6,3.0,6.6,2.1,Iris-virginica +106,4.9,2.5,4.5,1.7,Iris-virginica +107,7.3,2.9,6.3,1.8,Iris-virginica +108,6.7,2.5,5.8,1.8,Iris-virginica +109,7.2,3.6,6.1,2.5,Iris-virginica +110,6.5,3.2,5.1,2.0,Iris-virginica +111,6.4,2.7,5.3,1.9,Iris-virginica +112,6.8,3.0,5.5,2.1,Iris-virginica +113,5.7,2.5,5.0,2.0,Iris-virginica +114,5.8,2.8,5.1,2.4,Iris-virginica +115,6.4,3.2,5.3,2.3,Iris-virginica +116,6.5,3.0,5.5,1.8,Iris-virginica +117,7.7,3.8,6.7,2.2,Iris-virginica +118,7.7,2.6,6.9,2.3,Iris-virginica +119,6.0,2.2,5.0,1.5,Iris-virginica +120,6.9,3.2,5.7,2.3,Iris-virginica +121,5.6,2.8,4.9,2.0,Iris-virginica +122,7.7,2.8,6.7,2.0,Iris-virginica +123,6.3,2.7,4.9,1.8,Iris-virginica +124,6.7,3.3,5.7,2.1,Iris-virginica +125,7.2,3.2,6.0,1.8,Iris-virginica +126,6.2,2.8,4.8,1.8,Iris-virginica +127,6.1,3.0,4.9,1.8,Iris-virginica +128,6.4,2.8,5.6,2.1,Iris-virginica +129,7.2,3.0,5.8,1.6,Iris-virginica +130,7.4,2.8,6.1,1.9,Iris-virginica +131,7.9,3.8,6.4,2.0,Iris-virginica +132,6.4,2.8,5.6,2.2,Iris-virginica +133,6.3,2.8,5.1,1.5,Iris-virginica +134,6.1,2.6,5.6,1.4,Iris-virginica +135,7.7,3.0,6.1,2.3,Iris-virginica +136,6.3,3.4,5.6,2.4,Iris-virginica +137,6.4,3.1,5.5,1.8,Iris-virginica +138,6.0,3.0,4.8,1.8,Iris-virginica +139,6.9,3.1,5.4,2.1,Iris-virginica +140,6.7,3.1,5.6,2.4,Iris-virginica +141,6.9,3.1,5.1,2.3,Iris-virginica +142,5.8,2.7,5.1,1.9,Iris-virginica +143,6.8,3.2,5.9,2.3,Iris-virginica +144,6.7,3.3,5.7,2.5,Iris-virginica +145,6.7,3.0,5.2,2.3,Iris-virginica +146,6.3,2.5,5.0,1.9,Iris-virginica +147,6.5,3.0,5.2,2.0,Iris-virginica +148,6.2,3.4,5.4,2.3,Iris-virginica +149,5.9,3.0,5.1,1.8,Iris-virginica diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..3322e0ac --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017 Joe James + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Lambda Functions/Python Lambda Functions.ipynb b/Lambda Functions/Python Lambda Functions.ipynb new file mode 100644 index 00000000..61c11f0c --- /dev/null +++ b/Lambda Functions/Python Lambda Functions.ipynb @@ -0,0 +1,364 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python Lambda Functions\n", + "Anonymous, single-use, or throw-away functions. \n", + "**lambda arguments : expression** \n", + "Here are some single-argument lambdas:" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "12\n" + ] + } + ], + "source": [ + "add5 = lambda x: x + 5\n", + "print(add5(7))" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "64\n" + ] + } + ], + "source": [ + "square = lambda x: x * x\n", + "print(square(8))" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4\n", + "3\n" + ] + } + ], + "source": [ + "get_tens = lambda p: int(p/10)%10\n", + "print(get_tens(749))\n", + "print(get_tens(836.21))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Lambdas as an argument in other functions** \n", + "One of the most popular uses for lambda functions is as an argument inside sort, or filter functions. \n", + "### Sorting a List of Tuples using Lambda" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('carrots', 1.1), ('peaches', 2.45), ('eggs', 5.25), ('honey', 9.7)]\n" + ] + } + ], + "source": [ + "list1 = [('eggs', 5.25), ('honey', 9.70), ('carrots', 1.10), ('peaches', 2.45)]\n", + "list1.sort(key = lambda x: x[1])\n", + "print(list1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sorting a List of Dictionaries using Lambda" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{'make': 'Tesla', 'model': 'X', 'year': 1999},\n", + " {'make': 'Mercedes', 'model': 'C350E', 'year': 2008},\n", + " {'make': 'Ford', 'model': 'Focus', 'year': 2013}]\n" + ] + } + ], + "source": [ + "import pprint as pp\n", + "list1 = [{'make':'Ford', 'model':'Focus', 'year':2013}, {'make':'Tesla', 'model':'X', 'year':1999}, {'make':'Mercedes', 'model':'C350E', 'year':2008}]\n", + "list2 = sorted(list1, key = lambda x: x['year'])\n", + "pp.pprint(list2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Filtering a List of Integers using Lambda" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2, 4, 6]\n" + ] + } + ], + "source": [ + "list1 = [1, 2, 3, 4, 5, 6]\n", + "list2 = list(filter(lambda x: x%2 == 0, list1))\n", + "print(list2)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 3, 5]\n" + ] + } + ], + "source": [ + "odds = lambda x: x%2 == 1\n", + "list1 = [1, 2, 3, 4, 5, 6]\n", + "list2 = list(filter(odds, list1))\n", + "print(list2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lambda Function on a List using Map\n", + "Python's map function applies the lambda to every element in the list." + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 4, 9, 16, 25, 36]\n" + ] + } + ], + "source": [ + "list1 = [1, 2, 3, 4, 5, 6]\n", + "list2 = list(map(lambda x: x ** 2, list1))\n", + "print(list2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lambda Conditionals\n", + "**lambda args: a if boolean_expression else b** " + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n" + ] + } + ], + "source": [ + "starts_with_J = lambda x: True if x.startswith('J') else False\n", + "print(starts_with_J('Joey'))" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "and\n" + ] + } + ], + "source": [ + "wordb4 = lambda s, w: s.split()[s.split().index(w)-1] if w in s else None\n", + "sentence = 'Four score and seven years ago'\n", + "print(wordb4(sentence, 'seven'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Lambdas on DataTime Objects\n", + "You sometimes want to get just the year, month, date or time for comparision. \n", + "This would typically be most useful as a parameter in sort or filter functions." + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2019-03-07 19:36:58.442863\n", + "2019\n" + ] + } + ], + "source": [ + "import datetime\n", + "\n", + "now = datetime.datetime.now()\n", + "print(now)\n", + "year = lambda x: x.year\n", + "print(year(now))" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4096\n", + "125\n" + ] + } + ], + "source": [ + "def do_something(f, val):\n", + " return f(val)\n", + "\n", + "func = lambda x: x**3\n", + "print(func(16))\n", + "print(do_something(func, 5))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Extreme Lambdas\n", + "This is probably a stretch -- you shouldn't be trying to do this much with Lambdas. \n", + "Some things are better done in a regular function. But this shows what's possible with Lambdas." + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n", + "True\n", + "False\n", + "False\n", + "True\n", + "-1\n", + "-21.67 \n" + ] + } + ], + "source": [ + "isnum = lambda q: q.replace('.','',1).isdigit()\n", + "print(isnum('25983'))\n", + "print(isnum('3.1415'))\n", + "print(isnum('T57'))\n", + "print(isnum('-16'))\n", + "\n", + "is_num = lambda r: isnum(r[1:]) if r[0]=='-' else isnum(r)\n", + "print(is_num('-16.4'))\n", + "\n", + "tonum = lambda s: float(s) if is_num(s) else -1\n", + "print(tonum('30y'))\n", + "print(tonum('-21.67'), type(tonum('-21.67')))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/LinkedLists/CircularLinkedList.py b/LinkedLists/CircularLinkedList.py new file mode 100644 index 00000000..48a5a9df --- /dev/null +++ b/LinkedLists/CircularLinkedList.py @@ -0,0 +1,116 @@ +class Node(object): + + def __init__ (self, d, n = None): + self.data = d + self.next_node = n + + def get_next (self): + return self.next_node + + def set_next (self, n): + self.next_node = n + + def get_data (self): + return self.data + + def set_data (self, d): + self.data = d + + def __str__(self): + return "Node value: " + str(self.data) + +class CircularLinkedList (object): + + def __init__ (self, r = None): + self.root = r + self.size = 0 + + def get_size (self): + return self.size + + def add (self, d): + if self.get_size() == 0: + self.root = Node(d) + self.root.set_next(self.root) + else: + new_node = Node (d, self.root.get_next()) + self.root.set_next(new_node) + self.size += 1 + + def remove (self, d): + this_node = self.root + prev_node = None + + while True: + if this_node.get_data() == d: # found + if prev_node is not None: + prev_node.set_next(this_node.get_next()) + else: + while this_node.get_next() != self.root: + this_node = this_node.get_next() + this_node.set_next(self.root.get_next()) + self.root = self.root.get_next() + self.size -= 1 + return True # data removed + elif this_node.get_next() == self.root: + return False # data not found + prev_node = this_node + this_node = this_node.get_next() + + def find (self, d): + this_node = self.root + while True: + if this_node.get_data() == d: + return d + elif this_node.get_next() == self.root: + return False + this_node = this_node.get_next() + + def print_list (self): + print ("Print List..........") + if self.root is None: + return + this_node = self.root + print (this_node) + while this_node.get_next() != self.root: + this_node = this_node.get_next() + print (this_node) + +def main(): + myList = CircularLinkedList() + myList.add(5) + myList.add(7) + myList.add(3) + myList.add(8) + myList.add(9) + print("Find 8", myList.find(8)) + print("Find 12", myList.find(12)) + + cur = myList.root + print (cur) + for i in range(8): + cur = cur.get_next(); + print (cur) + + print("size="+str(myList.get_size())) + myList.print_list() + myList.remove(8) + print("size="+str(myList.get_size())) + print("Remove 15", myList.remove(15)) + print("size="+str(myList.get_size())) + myList.remove(5) # delete root node + myList.print_list() + +main() + + + + + + + + + + + + diff --git a/DoublyLinkedList.py b/LinkedLists/DoublyLinkedList1.py similarity index 95% rename from DoublyLinkedList.py rename to LinkedLists/DoublyLinkedList1.py index 2fd60b81..c4a8d406 100644 --- a/DoublyLinkedList.py +++ b/LinkedLists/DoublyLinkedList1.py @@ -1,78 +1,78 @@ -class Node(object): - - def __init__ (self, d, n = None, p = None): - self.data = d - self.next_node = n - self.prev_node = p - - def get_next (self): - return self.next_node - - def set_next (self, n): - self.next_node = n - - def get_prev (self): - return self.prev_node - - def set_prev (self, p): - self.prev_node = p - - def get_data (self): - return self.data - - def set_data (self, d): - self.data = d - - -class LinkedList (object): - - def __init__(self, r = None): - self.root = r - self.size = 0 - - def get_size (self): - return self.size - - def add (self, d): - new_node = Node (d, self.root) - if self.root: - self.root.set_prev(new_node) - self.root = new_node - self.size += 1 - - def remove (self, d): - this_node = self.root - - while this_node: - if this_node.get_data() == d: - next = this_node.get_next() - prev = this_node.get_prev() - - if next: - next.set_prev(prev) - if prev: - prev.set_next(next) - else: - self.root = this_node - self.size -= 1 - return True # data removed - else: - this_node = this_node.get_next() - return False # data not found - - def find (self, d): - this_node = self.root - while this_node: - if this_node.get_data() == d: - return d - else: - this_node = this_node.get_next() - return None - -myList = LinkedList() -myList.add(5) -myList.add(8) -myList.add(12) -myList.remove(8) -print(myList.remove(12)) +class Node(object): + + def __init__ (self, d, n = None, p = None): + self.data = d + self.next_node = n + self.prev_node = p + + def get_next (self): + return self.next_node + + def set_next (self, n): + self.next_node = n + + def get_prev (self): + return self.prev_node + + def set_prev (self, p): + self.prev_node = p + + def get_data (self): + return self.data + + def set_data (self, d): + self.data = d + + +class LinkedList (object): + + def __init__(self, r = None): + self.root = r + self.size = 0 + + def get_size (self): + return self.size + + def add (self, d): + new_node = Node (d, self.root) + if self.root: + self.root.set_prev(new_node) + self.root = new_node + self.size += 1 + + def remove (self, d): + this_node = self.root + + while this_node: + if this_node.get_data() == d: + next = this_node.get_next() + prev = this_node.get_prev() + + if next: + next.set_prev(prev) + if prev: + prev.set_next(next) + else: + self.root = this_node + self.size -= 1 + return True # data removed + else: + this_node = this_node.get_next() + return False # data not found + + def find (self, d): + this_node = self.root + while this_node: + if this_node.get_data() == d: + return d + else: + this_node = this_node.get_next() + return None + +myList = LinkedList() +myList.add(5) +myList.add(8) +myList.add(12) +myList.remove(8) +print(myList.remove(12)) print(myList.find(5)) \ No newline at end of file diff --git a/LinkedLists/DoublyLinkedList2.py b/LinkedLists/DoublyLinkedList2.py new file mode 100644 index 00000000..84fa0202 --- /dev/null +++ b/LinkedLists/DoublyLinkedList2.py @@ -0,0 +1,113 @@ +class Node(object): + + def __init__ (self, d, n = None, p = None): + self.data = d + self.next_node = n + self.prev_node = p + + def get_next (self): + return self.next_node + + def set_next (self, n): + self.next_node = n + + def get_prev (self): + return self.prev_node + + def set_prev (self, p): + self.prev_node = p + + def get_data (self): + return self.data + + def set_data (self, d): + self.data = d + + def to_string (self): + return "Node value: " + str(self.data) + + def has_next (self): + if self.get_next() is None: + return False + return True + +class DoublyLinkedList (object): + + def __init__ (self, r = None): + self.root = r + self.last = r + self.size = 0 + + def get_size (self): + return self.size + + def add (self, d): + if self.size == 0: + self.root = Node(d) + self.last = self.root + else: + new_node = Node(d, self.root) + self.root.set_prev(new_node) + self.root = new_node + self.size += 1 + + def remove (self, d): + this_node = self.root + while this_node is not None: + if this_node.get_data() == d: + if this_node.get_prev() is not None: + if this_node.has_next(): # delete a middle node + this_node.get_prev().set_next(this_node.get_next()) + this_node.get_next().set_prev(this_node.get_prev()) + else: # delete last node + this_node.get_prev().set_next(None) + self.last = this_node.get_prev() + else: # delete root node + self.root = this_node.get_next() + this_node.get_next().set_prev(self.root) + self.size -= 1 + return True # data removed + else: + this_node = this_node.get_next() + return False # data not found + + def find (self, d): + this_node = self.root + while this_node is not None: + if this_node.get_data() == d: + return d + elif this_node.get_next() == self.root: + return False + else: + this_node = this_node.get_next() + + def print_list (self): + print ("Print List..........") + if self.root is None: + return + this_node = self.root + print (this_node.to_string()) + while this_node.has_next(): + this_node = this_node.get_next() + print (this_node.to_string()) + +def main(): + myList = DoublyLinkedList() + myList.add(5) + myList.add(9) + myList.add(3) + myList.add(8) + myList.add(9) + print("size="+str(myList.get_size())) + myList.print_list() + myList.remove(8) + print("size="+str(myList.get_size())) + print("Remove 15", myList.remove(15)) + myList.add(21) + myList.add(22) + myList.remove(5) + myList.print_list() + print("size="+str(myList.get_size())) + print(myList.last.get_prev().to_string()) + +main() \ No newline at end of file diff --git a/LinkedLists.py b/LinkedLists/LinkedList0.py similarity index 95% rename from LinkedLists.py rename to LinkedLists/LinkedList0.py index cb21684f..19e30946 100644 --- a/LinkedLists.py +++ b/LinkedLists/LinkedList0.py @@ -1,69 +1,69 @@ -class Node(object): - - def __init__ (self, d, n = None): - self.data = d - self.next_node = n - - def get_next (self): - return self.next_node - - def set_next (self, n): - self.next_node = n - - def get_data (self): - return self.data - - def set_data (self, d): - self.data = d - - -class LinkedList (object): - - def __init__(self, r = None): - self.root = r - self.size = 0 - - def get_size (self): - return self.size - - def add (self, d): - new_node = Node (d, self.root) - self.root = new_node - self.size += 1 - - def remove (self, d): - this_node = self.root - prev_node = None - - while this_node: - if this_node.get_data() == d: - if prev_node: - prev_node.set_next(this_node.get_next()) - else: - self.root = this_node.get_next() - self.size -= 1 - return True # data removed - else: - prev_node = this_node - this_node = this_node.get_next() - return False # data not found - - def find (self, d): - this_node = self.root - while this_node: - if this_node.get_data() == d: - return d - else: - this_node = this_node.get_next() - return None - -myList = LinkedList() -myList.add(5) -myList.add(8) -myList.add(12) -print("size="+str(myList.get_size())) -myList.remove(8) -print("size="+str(myList.get_size())) -print(myList.remove(12)) -print("size="+str(myList.get_size())) +class Node(object): + + def __init__ (self, d, n = None): + self.data = d + self.next_node = n + + def get_next (self): + return self.next_node + + def set_next (self, n): + self.next_node = n + + def get_data (self): + return self.data + + def set_data (self, d): + self.data = d + + +class LinkedList (object): + + def __init__(self, r = None): + self.root = r + self.size = 0 + + def get_size (self): + return self.size + + def add (self, d): + new_node = Node (d, self.root) + self.root = new_node + self.size += 1 + + def remove (self, d): + this_node = self.root + prev_node = None + + while this_node: + if this_node.get_data() == d: + if prev_node: + prev_node.set_next(this_node.get_next()) + else: + self.root = this_node.get_next() + self.size -= 1 + return True # data removed + else: + prev_node = this_node + this_node = this_node.get_next() + return False # data not found + + def find (self, d): + this_node = self.root + while this_node: + if this_node.get_data() == d: + return d + else: + this_node = this_node.get_next() + return None + +myList = LinkedList() +myList.add(5) +myList.add(8) +myList.add(12) +print("size="+str(myList.get_size())) +myList.remove(8) +print("size="+str(myList.get_size())) +print(myList.remove(12)) +print("size="+str(myList.get_size())) print(myList.find(5)) \ No newline at end of file diff --git a/LinkedLists/LinkedList1.py b/LinkedLists/LinkedList1.py new file mode 100644 index 00000000..6cca75fd --- /dev/null +++ b/LinkedLists/LinkedList1.py @@ -0,0 +1,118 @@ +class Node(object): + + def __init__ (self, d, n = None): + self.data = d + self.next_node = n + + def get_next (self): + return self.next_node + + def set_next (self, n): + self.next_node = n + + def get_data (self): + return self.data + + def set_data (self, d): + self.data = d + + def to_string (self): + return "Node value: " + str(self.data); + + def has_next (self): + if self.get_next() is None: + return False; + return True; + + def compare_to (self, y): + if self.to_string() < y.to_string(): + return -1; + elif self.to_string() > y.to_string(): + return 1; + return 0; + + +class LinkedList (object): + + def __init__ (self, r = None): + self.root = r + self.size = 0 + + def get_size (self): + return self.size + + def add (self, d): + new_node = Node (d, self.root); + self.root = new_node; + self.size += 1; + + def add_node (self, n): + n.set_next(self.root); + self.root = n; + self.size += 1; + + def remove (self, d): + this_node = self.root + prev_node = None + + while this_node: + if this_node.get_data() == d: + if prev_node: # removing node that is not the root + prev_node.set_next(this_node.get_next()) + else: # removing root node + self.root = this_node.get_next() + self.size -= 1 + return True # data removed + else: + prev_node = this_node + this_node = this_node.get_next() + return False # data not found + + def find (self, d): + this_node = self.root + while this_node: + if this_node.get_data() == d: + return d + else: + this_node = this_node.get_next() + return None + + def print_list (self): + print ("Print List.........."); + if self.root is None: + return; + current = self.root; + print (current.to_string()); + while current.has_next(): + current = current.get_next(); + print (current.to_string()); + + def sort (self): + if self.size > 1: + newlist = []; + current = self.root; + newlist.append(self.root); + while current.has_next(): + current = current.get_next(); + newlist.append(current); + newlist = sorted(newlist, key = lambda node: node.get_data(), reverse = True); + newll = LinkedList(); + for node in newlist: + newll.add_node(node); + return newll; + return self; + +myList = LinkedList() +myList.add(5) +myList.add(9) +myList.add(3) +myList.add(8) +myList.add(9) +print("size="+str(myList.get_size())) +myList.print_list(); +myList = myList.sort(); +myList.print_list(); +myList.remove(8) +print("size="+str(myList.get_size())) +print(myList.remove(12)) +print("size="+str(myList.get_size())) \ No newline at end of file diff --git a/LinkedLists/LinkedList2.py b/LinkedLists/LinkedList2.py new file mode 100644 index 00000000..93d730bf --- /dev/null +++ b/LinkedLists/LinkedList2.py @@ -0,0 +1,126 @@ +class Node(object): + + def __init__ (self, d, n = None): + self.data = d + self.next_node = n + + def get_next (self): + return self.next_node + + def set_next (self, n): + self.next_node = n + + def get_data (self): + return self.data + + def set_data (self, d): + self.data = d + + def has_next (self): + if self.get_next() is None: + return False + return True + + def to_string (self): + return "Node value: " + str(self.data) + +class LinkedList (object): + + def __init__ (self, r = None): + self.root = r + self.size = 0 + + def get_size (self): + return self.size + + def add (self, d): + new_node = Node (d, self.root) + self.root = new_node + self.size += 1 + + def add_node (self, n): + n.set_next(self.root) + self.root = n + self.size += 1 + + def remove (self, d): + this_node = self.root + prev_node = None + + while this_node is not None: + if this_node.get_data() == d: + if prev_node is not None: + prev_node.set_next(this_node.get_next()) + else: + self.root = this_node.get_next() + self.size -= 1 + return True # data removed + else: + prev_node = this_node + this_node = this_node.get_next() + return False # data not found + + def find (self, d): + this_node = self.root + while this_node is not None: + if this_node.get_data() == d: + return d + elif this_node.get_next() == None: + return False + else: + this_node = this_node.get_next() + + def print_list (self): + print ("Print List..........") + if self.root is None: + return + this_node = self.root + print (this_node.to_string()) + while this_node.has_next(): + this_node = this_node.get_next() + print (this_node.to_string()) + + def sort (self): + if self.size > 1: + newlist = []; + current = self.root; + newlist.append(self.root); + while current.has_next(): + current = current.get_next(); + newlist.append(current); + newlist = sorted(newlist, key = lambda node: node.get_data(), reverse = True); + newll = LinkedList(); + for node in newlist: + newll.add_node(node); + return newll; + return self; + +def main(): + myList = LinkedList() + myList.add(5) + myList.add(9) + myList.add(3) + myList.add(8) + myList.add(9) + print("size="+str(myList.get_size())) + myList.print_list() + myList = myList.sort() + myList.print_list() + myList.remove(8) + print("size="+str(myList.get_size())) + print("Remove 15", myList.remove(15)) + print("size="+str(myList.get_size())) + print("Find 25", myList.find(25)) + myList.print_list() + +main() + + + + + + + + + + diff --git a/Matplotlib/Python Matplotlib Tutorial.ipynb b/Matplotlib/Python Matplotlib Tutorial.ipynb new file mode 100644 index 00000000..a5a7f77c --- /dev/null +++ b/Matplotlib/Python Matplotlib Tutorial.ipynb @@ -0,0 +1,436 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python Matplotlib Tutorial\n", + "Ten examples using Python 3.6" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 1. Simple plot with 4 numbers" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot([1, 3, 2, 4])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 2. Points have x and y values; add title and axis labels" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot([1, 2, 3, 4], [1, 4, 9, 16])\n", + "plt.title('Test Plot', fontsize=8, color='g')\n", + "plt.xlabel('number n')\n", + "plt.ylabel('n^2')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 3. Change figure size. plot red dots; set axis scales x: 0-6 and y: 0-20" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJ4AAAEzCAYAAADAcLr/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAEqRJREFUeJzt3X+Q1PV9x/Hnix9WD4liiAZEwLYIIxe5hIuY0TaYFERg1FbbyhwpxkwuZBqN0yZVy4xxkjiTTk3SoWZkroYi7ZU4JTEhhqCYhAmZQePdFSJUDKggl0NBSdBw0gR994/9Hp7H7t1ye9zndvf1mLnZ/X6+n93vZ+E13+9+97v7/igiMBtsw1IPwKqTg2dJOHiWhINnSTh4loSDZ0n0GTxJF0j6iaRnJO2Q9Nms/RxJGyXtym7HFHj8kqzPLklLBvoFWHlSX5/jSRoHjIuINkmjgVbgOuAm4FBEfEXSHcCYiLi9x2PPAVqAeiCyx86MiF8P+CuxstLnHi8i9kdEW3b/deAZ4HzgWuDBrNuD5MLY01XAxog4lIVtIzBvIAZu5e2k3uNJmgy8H3gSOC8i9kMunMC5eR5yPrCv23J71mZVbkSxHSWdCXwbuC0iXpNU1MPytOU9tktqBBoBRo0aNXPatGnFDs2GkNbW1lci4j199SsqeJJGkgtdc0R8J2t+WdK4iNifvQ88kOeh7cDsbssTgE35thERTUATQH19fbS0tBQzNBtiJO0tpl8xZ7UCvgk8ExFf67ZqHdB1lroE+F6ehz8KzJU0JjvrnZu1WZUr5j3e5cDHgI9I2pr9zQe+AsyRtAuYky0jqV7SAwARcQj4EvBU9vfFrM2qXJ8fp6TgQ235ktQaEfV99fOVC0vCwbMkHDxLwsGzJBw8S8LBsyQcPEvCwbMkHDxLwsGzJBw8S8LBsyQcPEvCwbMkHDxLwsGzJBw8S8LBsyQcPEvCwbMkHDxLwsGzJBw8S8LBsyQcPEuiz6I9klYCC4EDEVGbtT0ETM26nA38JiLq8jx2D/A68CZwrJhfmFt1KKZa1CrgPmB1V0NE/HXXfUlfBQ738vgrI+KV/g7QKlOfwYuIn2YFGU+QVZL6K+AjAzssq3Slvsf7E+DliNhVYH0Aj0lqzQovmgEnURG0gEXAml7WXx4RHZLOBTZK2hkRP83XsXtF0IkTJ5Y4LBvq+r3HkzQC+AvgoUJ9IqIjuz0APAxc2kvfpoioj4j697ynz0qmVuZKOdT+GbAzItrzrZQ0KpueAEmjyFUD3V7C9qyCFFOKdg2wBZgqqV3SJ7JVN9LjMCtpvKT12eJ5wM8kbQN+DvwgIjYM3NCtnBVzVruoQPtNedo6gPnZ/eeBGSWOzyqUr1xYEg6eJeHgWRIOniXh4FkSDp4l4eBZEg6eJeHgWRIOniXh4FkSDp4l4eCVo+ZmmDwZhg3L3TY3px7RSSv1G8g22JqbobEROjtzy3v35pYBGhrSjeskeY9XbpYtezt0XTo7c+1lxMErNy++eHLtQ5SDV24K/RCqzH4g5eCVm3vugZqad7bV1OTay4iDV24aGqCpCSZNAil329RUVicW4LPa8tTQUHZB68l7PEvCwbMkHDxLwsGzJBw8S6KYEhYrJR2QtL1b292SfiVpa/Y3v8Bj50l6VtJuSXcM5MCtvBWzx1sFzMvT/vWIqMv+1vdcKWk48A3gauBiYJGki0sZrFWOPoOX1bM71I/nvhTYHRHPR8TvgG8B1/bjeawClfIe7zOSfpEdisfkWX8+sK/bcnvWZtbv4N0P/BFQB+wHvpqnj/K0RaEnlNQoqUVSy8GDB/s5LCsX/QpeRLwcEW9GxFvAv5G/0mc7cEG35QlARy/P6YqgVaRfwZM0rtvin5O/0udTwBRJF0o6jVwhx3X92Z5VnmImWFkDzAbGSmoHvgDMllRH7tC5B/hU1nc88EBEzI+IY5I+AzwKDAdWRsSOU/IqrOwoouDbrmTq6+ujpaUl9TCsHyS1FjODk69cWBIOniXh4FkSDp4l4eBZEg6eJeHgWRIOniXh4FkSDp4l4eBZEg6eJeHgWRIOniXh4FkSDp4l4eBZEg6eJeHgWRIOniXh4FkSDp4l4eBZEg6eJeHgWRL9rQj6z5J2ZmXKHpZ0doHH7pH0dFY11KUB7Lj+VgTdCNRGxCXAL4E7e3n8lVnV0D7LGlj16FdF0Ih4LCKOZYtPkCtBZla0gXiPdzPwwwLrAnhMUqukxgHYllWIkuYyk7QMOAYUmpv88ojokHQusFHSzmwPmu+5GoFGgIllNgWmnbx+7/EkLQEWAg1RoNZZRHRktweAh8lfObSrryuCVpH+VgSdB9wOXBMRnQX6jJI0uus+MJf8lUOtChXzccoaYAswVVK7pE8A9wGjyR0+t0pakfUdL6lrzovzgJ9J2gb8HPhBRGw4Ja/Cyk6f7/EiYlGe5m8W6NsBzM/uPw/MKGl0VrF85cKScPAsCQfPknDwLAkHz5Jw8CwJB8+ScPAsCQfPknDwLAkHz5Jw8CwJB8+ScPAsCQfPknDwLAkHz5Jw8CwJB8+ScPAsCQfPknDwLAkHz5Jw8CwJB8+SKCp4BaqCniNpo6Rd2e2YAo9dkvXZlRX6MSt6j7eKE6uC3gH8KCKmAD/Klt9B0jnAF4BZ5CpFfaFQQK26FBW8fFVBgWuBB7P7DwLX5XnoVcDGiDgUEb8mV8K2Z4CtCpXyHu+8iNgPkN2em6fP+cC+bsvtWZtVuVN9cqE8bXmLOEpqlNQiqeXgwYOneFiWWinBe1nSOIDs9kCePu3ABd2WJwAd+Z7MFUGrSynBWwd0naUuAb6Xp8+jwFxJY7KTirlZm1W5Yj9OyVcV9CvAHEm7gDnZMpLqJT0AEBGHgC8BT2V/X8zarMqpQN3spOrr66OlxRMBlSNJrcVMpuMrF5aEg2dJOHiWhINnSTh4loSDN1iam2HyZBg2LHfbXGj6t+pQ0iR6VqTmZmhshM5s9q29e3PLAA0N6caVkPd4g2HZsrdD16WzM9depRy8wfDiiyfXXgUcvMFQaP7dKp6X18EbDPfcAzU172yrqcm1VykHbzA0NEBTE0yaBFLutqmpak8swGe1g6ehoaqD1pP3eJaEg2dJOHiWhINnSTh4loSDZ0k4eJaEg2dJOHiWhINnSTh4loSDZ0n0O3iSpkra2u3vNUm39egzW9Lhbn3uKn3IVgn6/e2UiHgWqAOQNBz4FfBwnq6bI2Jhf7djlWmgDrUfBZ6LiL0D9HxW4QYqeDcCawqs+5CkbZJ+KGn6AG3PylzJwZN0GnAN8N95VrcBkyJiBvCvwHd7eR5XBK0iA7HHuxpoi4iXe66IiNci4rfZ/fXASElj8z2JK4JWl4EI3iIKHGYlvVeSsvuXZtt7dQC2aWWupN9cSKohVw30U93algJExArgBuDTko4BbwA3xlCsBGmDrqTgRUQn8O4ebSu63b8PuK+UbVhl8pULS8LBsyQcPEvCwbMkHDxLwsGzJBw8S8LBMwBWrFjB6tWrAVi1ahUdHXnnOhwwrhZV4d58802GDx/eZ7+lS5cev79q1Spqa2sZP378KRuX93iD7MiRIyxYsIAZM2ZQW1vLQw89BMCGDRuYNm0aV1xxBbfeeisLF+a+O3v33Xdz7733Hn98bW0te/bsAeC6665j5syZTJ8+naampuN9zjzzTO666y5mzZrFli1baG1t5cMf/jAzZ87kqquuYv/+/SeMq2s7a9eupaWlhYaGBurq6njjjTdOyb+DgzfINmzYwPjx49m2bRvbt29n3rx5HD16lE9+8pN8//vfZ/Pmzbz00ktFPdfKlStpbW2lpaWF5cuX8+qrue9fHDlyhNraWp588klmzZrFLbfcwtq1a2ltbeXmm29mWS9Fv2+44Qbq6+tpbm5m69atnHHGGQPyunty8AbZ+973Ph5//HFuv/12Nm/ezFlnncXOnTu58MILmTJlCpJYvHhxUc+1fPlyZsyYwWWXXca+ffvYtWsXAMOHD+f6668H4Nlnn2X79u3MmTOHuro6vvzlL9Pe3n7KXl+x/B5vkF100UW0trayfv167rzzTubOncs111xD9u2xE4wYMYK33nrr+PLRo0cB2LRpE48//jhbtmyhpqaG2bNnH193+umnH39fFxFMnz6dLVu2nOJXdnK8xxtkHR0d1NTUsHjxYj73uc/R1tbGtGnTeOGFF3juuecAWLPm7a83Tp48mba2NgDa2tp44YUXADh8+DBjxoyhpqaGnTt38sQTT+Td3tSpUzl48ODx4P3+979nx44dvY5x9OjRvP766yW/1t54jzfInn76aT7/+c8zbNgwRo4cyf3338/pp59OU1MTCxYsYOzYsVxxxRVs374dgOuvv57Vq1dTV1fHBz/4QS666CIA5s2bx4oVK7jkkkuYOnUql112Wd7tnXbaaaxdu5Zbb72Vw4cPc+zYMW677TamTy/885ebbrqJpUuXcsYZZ7Bly5ZT8j7PM3QPQZs2beLee+/lkUceST2Uk+YZum1I86F2CJo9ezazZ89OPYxTyns8S8LBsyQcPEvCwbMkHDxLwsGzJBw8S2IgqkXtkfR0VvHzhMsNylkuabekX0j6QKnbtPI3UB8gXxkRrxRYdzUwJfubBdyf3VoVG4xD7bXA6sh5Ajhb0rhB2K4NYQMRvAAek9QqqTHP+vOBfd2W27M2q2IDcai9PCI6JJ0LbJS0MyJ+2m19vm84nvCVmCy0jQATJ04cgGHZUFbyHi8iOrLbA+Sqvl/ao0s7cEG35QnACb+dc0XQ6lJS8CSNkjS66z4wF9jeo9s64G+ys9vLgMMRceLPnKyqlHqoPQ94OPu9wAjgvyJiQ4+qoOuB+cBuoBP4eInbtApQakXQ54EZedq7VwUN4G9L2Y5VHl+5sCQcPEvCwbMkHDxLwsGzJBy8vjQ3w+TJMGxY7ra5OfWIKoJ/3tib5mZobITOztzy3r25ZYCGhnTjqgDe4/Vm2bK3Q9elszPXbiVx8Hrz4osn125Fc/B6U+hbMv72TMkcvN7ccw/U1LyzraYm124lcfB609AATU0waRJIudumJp9YDACf1falocFBOwW8x7MkHDxLwsGzJBw8S8LBsyQcPEvCwbMkHDxLwsGzJBw8S8LBsyQcPEui38GTdIGkn0h6RtIOSZ/N02e2pMNZtdCtku4qbbhWKUr5dsox4O8joi0r3NMqaWNE/G+PfpsjYmEJ27EK1O89XkTsj4i27P7rwDO44KIVaUDe40maDLwfeDLP6g9J2ibph5IKT5JqVaXkL4JKOhP4NnBbRLzWY3UbMCkifitpPvBdckW48z2PK4JWkVILM44kF7rmiPhOz/UR8VpE/Da7vx4YKWlsvudyRdDqUspZrYBvAs9ExNcK9Hlv1g9Jl2bbe7W/27TKUcqh9nLgY8DTkrZmbf8ITITjxRlvAD4t6RjwBnBjDMW56G3Q9Tt4EfEz8ld0797nPuC+/m7DKpevXFgSDp4l4eBZEg6eJeHgWRIOniXh4FkSDp4l4eBZEg6eJeHgWRIOniVRmcHzpChDXuWVovWkKGWh8vZ4nhSlLFRe8DwpSlmovOB5UpSyUHnB86QoZaHygudJUcpC5Z3VgidFKQOVt8ezsuDgWRIOniXh4FkSDp4lUWrRnnmSnpW0W9Idedb/gaSHsvVPZuXMzEoq2jMc+AZwNXAxsEjSxT26fQL4dUT8MfB14J/6uz2rLKXs8S4FdkfE8xHxO+BbwLU9+lwLPJjdXwt8tKt6lFW3UoJ3PrCv23I7J5aiPd4nIo4Bh4F3l7BNqxClXLnIt+fqWYKsmD65jt0qggL/J2l7CWM7FcYCr6QeRA9DcUxTi+lUSvDagQu6LU8AOgr0aZc0AjgLOJTvySKiCWgCkNQSEfUljG3AeUzFkdRSTL9SDrVPAVMkXSjpNOBGYF2PPuuAJdn9G4AfuzCjQWmFGY9J+gzwKDAcWBkROyR9EWiJiHXkStX+h6Td5PZ0Nw7EoK38lfTtlKyg9voebXd1u38U+Mt+PHVTKeM6RTym4hQ1JvnIZyn4kpklMaSC19cluATj6XOiwFQkDZf0P5IeST2WLpLOlrRW0s7s3+xDBfsOlUNtdgnul8Acch/DPAUsyjMp32COaRwwrvtEgcB1KcfURdLfAfXAu4bKJIWSHiQ3aeID2ScdNRHxm3x9h9Ier5hLcINqqE4UKGkCsAB4IPVYukh6F/Cn5D7JICJ+Vyh0MLSCV8wluGT6mChwsP0L8A/AW6kH0s0fAgeBf8/eAjwgaVShzkMpeEVfXhtsfUwUONhjWQgciIjWlOPIYwTwAeD+iHg/cAQo+D59KAWvmEtwg66viQITuBy4RtIecm9HPiLpP9MOCcj9/7VHRNcRYS25IOY1lIJXzCW4QVXMRIGDLSLujIgJETGZ3L/RjyNiceJhEREvAfskdX1J4KNAwZOwIfO72kKX4BIPK+9EgdkVGzvRLUBztuN4Hvh4oY5D5uMUqy5D6VBrVcTBsyQcPEvCwbMkHDxLwsGzJBw8S8LBsyT+H2CR15so5DDBAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(2,5)) # 2 inches wide x 5 inches tall\n", + "plt.plot([1, 2, 3, 4], [1, 4, 9, 16], 'ro') # red-o\n", + "plt.axis([0, 6, 0, 20]) # [xmin, xmax, ymin, ymax]\n", + "plt.annotate('square it', (3,6))\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 4. Bar chart with four bars" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAD8CAYAAABXe05zAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAADBdJREFUeJzt3X2MZXV9x/H3p6yUJ2tLmZoKxKGRaKxEoFNixZIWSKPFh9qaAEZT7MPWpvUpKKFpozZtaktpY1OrZotWiRSaok0NGEobRQQaYHhcccEYoIhoHWvSamuh4Ld/3LNhGGZ3Du7cufOdfb+SCXPP/c3Mdw5333P2zD13U1VIkvr4vlkPIEl6agy3JDVjuCWpGcMtSc0YbklqxnBLUjOGW5KaMdyS1IzhlqRmtk3jkx5xxBE1Pz8/jU8tSVvSLbfc8o2qmhuzdirhnp+fZ3FxcRqfWpK2pCT/Nnatp0okqRnDLUnNGG5JasZwS1IzhluSmjHcktSM4ZakZgy3JDVjuCWpmalcObkvQmY9wkwV/uPNkvbOI25JasZwS1IzhluSmjHcktSM4ZakZgy3JDVjuCWpGcMtSc0YbklqxnBLUjOGW5KaMdyS1IzhlqRmDLckNTMq3EneluSuJJ9PcmmSg6Y9mCRpdWuGO8mRwJuBhap6AXAAcNa0B5MkrW7sqZJtwMFJtgGHAA9NbyRJ0t6sGe6q+gpwIfAA8FXgP6vq6mkPJkla3ZhTJT8EvAo4BngWcGiS162ybnuSxSSLS0tL6z+pJAkYd6rkdOC+qlqqqv8DPgG8eOWiqtpRVQtVtTA3N7fec0qSBmPC/QDwoiSHJAlwGrBrumNJkvZkzDnuG4HLgVuBncPH7JjyXJKkPdg2ZlFVvQt415RnkSSN4JWTktSM4ZakZgy3JDVjuCWpGcMtSc0YbklqxnBLUjOGW5KaMdyS1IzhlqRmDLckNWO4JakZwy1JzRhuSWrGcEtSM4Zbkpox3JLUjOGWpGYMtyQ1Y7glqRnDLUnNGG5JasZwS1IzhluSmjHcktSM4ZakZgy3JDVjuCWpGcMtSc0YbklqxnBLUjOGW5KaMdyS1IzhlqRmDLckNWO4JakZwy1JzRhuSWpmVLiT/GCSy5PcnWRXkp+a9mCSpNVtG7nuL4Crquo1SQ4EDpniTJKkvVgz3El+ADgFOAegqh4BHpnuWJKkPRlzquTHgCXgb5LcluSiJIdOeS5J0h6MCfc24ETgA1V1AvDfwPkrFyXZnmQxyeLS0tI6jylJ2m1MuB8EHqyqG4fblzMJ+RNU1Y6qWqiqhbm5ufWcUZK0zJrnuKvqa0m+nOS5VXUPcBrwhemPJqmbnVf+2axHmKnjzjh3Q77O2GeVvAm4ZHhGyb3AG6Y3kiRpb0aFu6puBxamPIskaQSvnJSkZgy3JDVjuCWpGcMtSc0YbklqxnBLUjOGW5KaMdyS1IzhlqRmDLckNWO4JakZwy1JzRhuSWpm7Mu6qotk1hPMVtWsJ5CmziNuSWrGcEtSM4Zbkpox3JLUjOGWpGYMtyQ1Y7glqRnDLUnNGG5JasZwS1IzhluSmjHcktSM4ZakZgy3JDVjuCWpGcMtSc0YbklqxnBLUjOGW5KaMdyS1IzhlqRmDLckNWO4JakZwy1JzYwOd5IDktyW5IppDiRJ2runcsT9FmDXtAaRJI0zKtxJjgLOAC6a7jiSpLWMPeJ+L3Ae8N0pziJJGmHNcCd5OfD1qrpljXXbkywmWVxaWlq3ASVJTzTmiPtk4JVJ7gcuA05N8rGVi6pqR1UtVNXC3NzcOo8pSdptzXBX1e9U1VFVNQ+cBXy6ql439ckkSavyedyS1My2p7K4qq4BrpnKJJKkUTzilqRmDLckNWO4JakZwy1JzTylX05KW12SWY8wU1U16xE0gkfcktSM4ZakZgy3JDVjuCWpGcMtSc0YbklqxnBLUjOGW5KaMdyS1IzhlqRmDLckNWO4JakZwy1JzRhuSWrGcEtSM4Zbkpox3JLUjOGWpGYMtyQ1Y7glqRnDLUnNGG5JasZwS1IzhluSmjHcktSM4ZakZgy3JDVjuCWpGcMtSc0YbklqxnBLUjOGW5KaMdyS1Mya4U5ydJLPJNmV5K4kb9mIwSRJq9s2Ys2jwLlVdWuSpwO3JPnnqvrClGeTJK1izSPuqvpqVd06vP8tYBdw5LQHkySt7imd404yD5wA3DiNYSRJaxsd7iSHAR8H3lpV/7XK/duTLCZZXFpaWs8ZJUnLjAp3kqcxifYlVfWJ1dZU1Y6qWqiqhbm5ufWcUZK0zJhnlQT4ELCrqv58+iNJkvZmzBH3ycDrgVOT3D68/fyU55Ik7cGaTwesquuAbMAskqQRvHJSkpox3JLUjOGWpGYMtyQ1Y7glqRnDLUnNGG5JasZwS1IzhluSmjHcktSM4ZakZgy3JDVjuCWpGcMtSc0YbklqxnBLUjOGW5KaMdyS1IzhlqRmDLckNWO4JakZwy1JzRhuSWrGcEtSM4Zbkpox3JLUjOGWpGYMtyQ1Y7glqRnDLUnNGG5JasZwS1IzhluSmjHcktSM4ZakZgy3JDVjuCWpGcMtSc2MCneSlya5J8mXkpw/7aEkSXu2ZriTHAD8FfAy4PnA2UmeP+3BJEmrG3PEfRLwpaq6t6oeAS4DXjXdsSRJezIm3EcCX152+8FhmyRpBraNWJNVttWTFiXbge3DzW8nuWdfBpuhI4BvzOqLZ9Xd3cpM9x9x/+2LuP/20dv35YOfPXbhmHA/CBy97PZRwEMrF1XVDmDH2C+8WSVZrKqFWc/Rlftv37j/9s3+sv/GnCq5GTg2yTFJDgTOAj453bEkSXuy5hF3VT2a5LeBfwIOAD5cVXdNfTJJ0qrGnCqhqj4FfGrKs2wW7U/3zJj7b9+4//bNfrH/UvWk3zNKkjYxL3mXpGb2i3AneXWSSvK8EWvvT3LEKttvGP47n+S105hzM0ry7RW3z0nyvlnNs1Ws3K9aXZLHkty+7G0+yc8kuWLWs83SfhFu4GzgOibPiPmeVNWLh3fngf0m3NKMfaeqjl/2dv+sB9oMtny4kxwGnAz8KkO4h5/Y1yS5PMndSS7JiisPkhyc5Kokvz7c3n2E9MfATw8//d+2gd/KppPkFUluTHJbkn9J8sxh+7uTfHjYx/cmefOw/dAkVya5I8nnk5w52+9gtjLxp8O+2Ll8fyR5R5Kbk9yZ5PdnOedmluSkJDcMj8Ebkjx32H5AkguH/XpnkjcN208b1u4cHqPfP9vv4Hsz6lklzf0CcFVVfTHJN5OcOGw/AfhxJhcTXc8k7tcN9x3G5DVZLq6qi1d8vvOBt1fVy6c/+qZwcJLbl90+nMefx38d8KKqqiS/BpwHnDvc9zzgZ4GnA/ck+QDwUuChqjoDIMkzNuIb2MR+ETgeeCGTK/5uTnItcBxwLJPXCQrwySSnVNW1M5t0dpY//u6rqlevuP9u4JThacunA38E/BKTq7iPAU4Y7js8yUHAR4DThh5cDPwm8N4N+U7W0f4Q7rN5/H/MZcPtK4GbqupBgOGBMc/j4f5H4IKqumRjR92UvlNVx+++keQcYPeVaUcBf5fkR4EDgfuWfdyVVfUw8HCSrwPPBHYCFyb5E+CKqvrcRnwDm9hLgEur6jHg35N8FvhJ4BTg54DbhnWHMQn5/hjuJzz+VvEM4KNJjmXyUhxPG7afDnywqh4FqKpvJnkhk/h/cVjzUeC3aBjuLX2qJMkPA6cCFyW5H3gHcCaTo5iHly19jCf+ELseeNnK0yd6kr8E3ldVxwG/ARy07L4n7d/hD8xPMAn4e5K8c8Mm3Zz29PgK8J5l53WfU1Uf2sjBGvkD4DNV9QLgFTz+GAxPfk2lLfPneUuHG3gNk9Mdz66q+ao6mslR4UvW+Lh3Av8BvH+V+77F5K//mhztfGV4/5fXWpzkWcD/VNXHgAuBE9f4kK3uWuDM4XzsHJMj7ZuYXKX8K8PvZ0hyZJIfmeGcm9nyx+A5y7ZfDbwxyTaAJIczOa0yn+Q5w5rXA5/doDnX1VYP99nAP6zY9nHGPSvkrcBBSS5Ysf1O4NHhF2z79S8ngXcDf5/kc4x7RbbjgJuGU1O/C/zhFGfbtIaYPMzksXkncAfwaeC8qvpaVV0N/C3wr0l2ApfjwcKeXMDkb2/XM3lJjt0uAh4A7kxyB/Daqvpf4A1MHrM7ge8CH9zogdeDV05KG2w41/rXVXXSrGdRT1v9iFvaVJK8EbgU+L1Zz6K+POKWpGY84pakZgy3JDVjuCWpGcMtSc0YbklqxnBLUjP/D8ogdWIQeXCjAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.clf() # clear figure\n", + "x = np.arange(4)\n", + "y = [8.8, 5.2, 3.6, 5.9]\n", + "plt.xticks(x, ('Ankit', 'Hans', 'Joe', 'Flaco'))\n", + "# plt.bar(x, y)\n", + "# plt.bar(x, y, color='y')\n", + "plt.bar(x, y, color=['lime', 'r', 'k', 'tan'])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 5. Two sets of 10 random dots plotted" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAGENJREFUeJzt3X9w1fWd7/HnO6DSqGsXYWeLERK7bJE6lR8njJRKOa1a9F6hncFTNfY6Ox0zDdbV3e1QLUvqhM3sENhu1ymbmtvrda4cy5xNtRdd760z9lhG2tochGFNgZEhgCGtsMy1VzdStLz3j5PEJARykpyc7zmf83rMOCffTz755s0ZefH5fs7n8/2auyMiImGpiLoAERHJP4W7iEiAFO4iIgFSuIuIBEjhLiISIIW7iEiAFO4iIgEaNdzN7AkzO2Fmr5/n+2Zmj5nZITPbZ2aL8l+miIiMRS4j9yeBlRf4/q3A3L7/6oHWiZclIiITMXW0Du6+08yqL9BlNfC/PLvV9Zdm9lEz+5i7/+ZC550xY4ZXV1/otCIiMtzu3bv/3d1njtZv1HDPwVXAm4OOu/vaLhju1dXVZDKZPPx6EZHyYWZHc+mXjw9UbYS2EW9YY2b1ZpYxs8zJkyfz8KtFRGQk+Qj3buDqQcdVQM9IHd29zd1j7h6bOXPUqwoRERmnfIT7DuC/9a2auQH43Wjz7SIiMrlGnXM3sx8CK4AZZtYNfBu4CMDdvw+8ANwGHAJ6gb8YbzHvv/8+3d3dnD59erynKHnTpk2jqqqKiy66KOpSRKSE5bJa5q5Rvu/A/fkopru7m8svv5zq6mrMRprKD5u7c+rUKbq7u6mpqYm6HBEpYUW1Q/X06dNceeWVZRnsAGbGlVdeWdZXLhKOll0tpLvSQ9rSXWladrVEVFF5KapwB8o22PuV+59fwlE7q5bEttWkb/hTqKggfcOfkti2mtpZtVGXVhbysc5dROQc8Z/3kHr6DInb36GhElpjb5F6+hLi1/SAZh0nXdGN3KM2ZcoUFixYwHXXXcftt9/O22+/Paaff/TRR9myZcuI32tra2PevHnMmzePJUuW8Morr+SjZJHitH498QO/pyEDGz8LDRmIH/g9rF8fdWVlobTDPZmE6mqoqMi+JpMTPuVHPvIR9u7dy+uvv8706dPZunXrhM8J8Pzzz/P444/zyiuvcODAAb7//e9z991389vf/jYv5xcpOseOka6G1hhs+Fn2NV2dbZfJV7rhnkxCfT0cPQru2df6+rwEfL+lS5dy/PjxgePNmzdTW1vLpz71Kb797W8PtDc3N/OJT3yCm266iYMHD454rk2bNrF582ZmzJgBwKJFi7j33nvz9o+HSLFJL/kTEndA6l+gKZ19TdyRbZfJV7rhvn499PYObevtzdsl3x/+8AdeeuklVq1aBcCLL77IG2+8wa9+9Sv27t3L7t272blzJ7t372b79u3s2bOHZ555ho6OjhHP19nZyeLFi4e0xWIxOjs781KvSLHpuCdO6rlLiB/JHsePQOq5S+i4Jx5lWWWjdD9QPd+l3QQv+d577z0WLFjAkSNHWLx4MTfffDOQDfcXX3yRhQsXAvDuu+/yxhtv8M477/ClL32JyspKgIF/DHLh7lodI8Fa9/Ufwh8nswOuY8dg9mzif9tMvK4u6tLKQumO3GfPHlt7jvrn3I8ePcqZM2cGpk3cnUceeYS9e/eyd+9eDh06xFe/+lUgt+WL8+fPZ/fu3UPaXnvtNebPnz+hekWKWl0dHDkCZ89mXxXsBVO64d7cDH2j5QGVldn2PLjiiit47LHH2LJlC++//z5f+MIXeOKJJ3j33XcBOH78OCdOnGD58uU8++yzvPfee7zzzjs899xzI55v3bp1fPOb3+TUqVMA7N27lyeffJK1a9fmpV4RkcFKd1qmfwQw6JKP5ua8jgwWLlzI9ddfz/bt2/nKV77C/v37Wbp0KQCXXXYZ27ZtY9GiRXz5y19mwYIFzJkzhxtvvHHEc61atYrjx4/z6U9/GjPj8ssvZ9u2bXzsYx/LW70iIv0se2uYwovFYj78YR379+/n2muvjaSeYqL3QUTOx8x2u3tstH6lOy0jIiLnpXCXiZuEzWQiMjGlO+cuxaF/M1n/noP+zWSglREiEdLIXSZmkjeTieRFGV5dauQuEzNJm8lE8qZMry41cpeJmaTNZCJ5U6ZXlwr3Yd566y3uvvturrnmGhYvXszSpUt59tlnJ3TOVatW8dRTTw0c33fffWzevHmipRaHSd5MJjJhZXp1WbLhPhmP8HJ3vvjFL7J8+XIOHz48cFOw7u7uc/p+8MEHOZ/3scceo7Gxkbfffpuf//znvPrqqzz00EPjrrOo1NVBWxvMmQNm2de2tqAvd6XElOnVZcmGe+2sWhLtiYGAT3elSbQnJvQIr5/+9KdcfPHFfO1rXxtomzNnDg888AAATz75JHfccQe33347t9xyCy+//DIrVqxgzZo1zJs3j7q6OkbaFFZdXU19fT3r1q1j7dq1fO973+Oiiy4ad51FR/cPkWJWpleXJfuBarwmTmpNikR7goZYA62ZVlJrUsRrxn870c7OThYtWnTBPr/4xS/Yt28f06dP5+WXX2bPnj10dnYya9Ysli1bxq5du/jMZz5zzs994xvf4OMf/zg33ngjy5cvH3eNIjJGBbhVSTEq2ZE7ZAO+IdbAxp0baYg1TCjYR3L//fdz/fXXU1v74dXAzTffzPTp0weOlyxZQlVVFRUVFQO3Ch7Jvn37cHcOHDjA2bNn81qniIyiDK8uSzrc011pWjOtbFi+gdZM6zlz8GP1yU9+ktdee23geOvWrbz00kucPHlyoO3SSy8d8jOXXHLJwNdTpkwZcS7+7NmzrF27lqeeeoq5c+fS2to6oTpFREZTsuHeP8eeWpOiKd40MEUzkYD/3Oc+x+nTp4eEb+/wJVTj8PjjjzN37lxWrFjBd77zHVpaWob8gyEikm8lG+4dPR1D5tj75+A7ekZ+zF0uzIwf//jH/OxnP6OmpoYlS5Zw7733smnTpnGf88SJE2zatIktW7YAMGvWLB588EHWrVs37nNKESvDnZBSnHTL3yKk96FEDd8JCdlVGVoaKnmkW/6KFFqZ7oSU4qRwF8mXMt0JKcWp6MI9qmmiYlHuf/6SVqY7IaU4FVW4T5s2jVOnTpVtwLk7p06dYtq0aVGXIuNRpjshpTgV1Q7Vqqoquru7y3qZ4LRp06iqqoq6DBmPMt0JKcWpqFbLiIjIhWm1jIhIGVO4i4gESOEuIhIghbuISIAU7jIhk/FELBGZOIW7TMhkPBFLRCYup3A3s5VmdtDMDpnZwyN8f7aZpc1sj5ntM7Pb8l2oRojFafATsRrTjQO3Yc73g1NEZGxGDXczmwJsBW4F5gN3mdn8Yd3+Fki5+0LgTuCf812oRojFa7KfiCUiY5fLyH0JcMjdD7v7GWA7sHpYHwf+qO/rK4Ce/JWYpRFi8cr3E7FEZOJyCfergDcHHXf3tQ32KHCPmXUDLwAP5KW6YTRCLD6T8UQsEZm4XMLdRmgbfs+Cu4An3b0KuA14yszOObeZ1ZtZxswy47l/jEaIxWcynoglIhM36r1lzGwp8Ki7f6Hv+BEAd//7QX06gZXu/mbf8WHgBnc/cb7zjvXeMoNHiPGa+DnHIiLlIJ/3lukA5ppZjZldTPYD0x3D+hwDPt/3i68FpgF5vbWjRogiIrnL6a6QfUsbvwtMAZ5w92YzawIy7r6jb/XMfwcuIztls87dX7zQOXVXSBGRsct15J7T/dzd/QWyH5QObmsc9PWvgWVjLVJERCaHdqiKiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4T4eySRUV0NFRfY1mYy6IhGRIaZGXUDJSSahvh56e7PHR49mjwHq6qKrS0RkEI3cx2r9+g+DvV9vb7ZdRKRIKNzH6tixsbWLiERA4T5GLbddQbp6aFu6OtsuIlIsFO5jVLt6LYkEAwGfroZEItsuIlIscgp3M1tpZgfN7JCZPXyePgkz+7WZdZrZ0/kts3jE72sm9WffInFnBY1xSNxZQerPvkX8vuaC1tGyq4V0V3pIW7orTcuuloLWISLFadRwN7MpwFbgVmA+cJeZzR/WZy7wCLDM3T8JPDQJtRaN+H3NNNyyno2fhYZb1hc82AFqZ9WSaE8MBHy6K02iPUHtrNqC1yIixSeXkfsS4JC7H3b3M8B2YPWwPvcBW939/wG4+4n8lllc0l1pWjOtbFi+gdZM6zkj6EKI18RJrUmRaE/QmG4k0Z4gtSZFvCZe8FpEpPjkEu5XAW8OOu7uaxvsz4E/N7NdZvZLM1uZrwKLTf8IObUmRVO8aSBgowr4hlgDG3dupCHWoGAXkQG5hLuN0ObDjqcCc4EVwF3AD8zso+ecyKzezDJmljl58uRYay0KHT0dQ0bI/SPojp6OgtdSDFcQIlKccgn3buDqQcdVQM8Iff63u7/v7l3AQbJhP4S7t7l7zN1jM2fOHG/NkVq3bN05I+R4TZx1y9YVtI5iuoIQkdEVehFELuHeAcw1sxozuxi4E9gxrM+PgTiAmc0gO01zOJ+FylDFdAUhIqMr9CIIcx8+wzJCJ7PbgO8CU4An3L3ZzJqAjLvvMDMD/gFYCfwBaHb37Rc6ZywW80wmM+E/gIhIqegP9IZYA62Z1nEtgjCz3e4eG7VfLuE+GRTukk8tu1qonVU75C9KuitNR09HwafMRC6kMd3Ixp0b2bB8A03xpjH/fK7hrh2qEgSt+5dSUMhFELrlrwRh8Lr/iVzyikyWwYsg4jVx4tXxSd2fopG7BEPr/qWYFXoRhObcJRj5+LBKpNhpzl3KSrorTWLbalI/mkLT5/+O1I+mkNi2Wuv+pWwp3CUIHf/aRurpM8RffQvcib/6Fqmnz9Dxr21RlyYSCU3LSBiqq7PPsx1uzhw4cqTQ1YhMGk3LSHnR4w9FhlC4Sxhmzx5bu0jgFO4ShuZmqKwc2lZZmW0XKUMKdwlDXR20tWXn2M2yr21t2XaRMqQdqhKOujqFuUgfjdxFRAJUWuGeTGaXvFVUZF+TyagrEhEpSqUzLZNMQn099PZmj48ezR6DLsVFRIYpnZH7+vUfBnu/3t5su4iIDFE64a5NKiIiOSudcNcmFRGRnJVOuGuTiohIzkon3LVJRURKXQFX/JXOahnQJhURKV0FXvFXOiN3EZFSVuAVfwp3EQlay66Wc57Ile5K07KrpbCFFHjFn8JdRIJWO6uWRHtiIOD7n7VbO6u2sIUUeMWfwl1EghaviZNakyLRnqAx3UiiPRHNw9MLvOJP4S4iwYvXxGmINbBx50YaYg2FD3Yo+Io/hbuIBC/dlaY108qG5RtozbSeMwdfMHV12Wf6nj2bfZ3E1X8KdxEJWv8ce2pNiqZ408AUTWQBXyAKdxEJWkdPx5A59v45+I6ejogrm1zm7pH84lgs5plMJpLfLSJSqsxst7vHRuunkbuISIAU7iIiAVK4i4gESOEuIhIghbuISIAU7iIiAVK4i4gESOEuIhIghbuISIByCnczW2lmB83skJk9fIF+a8zMzWzU3VMiIjJ5Rg13M5sCbAVuBeYDd5nZ/BH6XQ78JfBqvosUEZGxyWXkvgQ45O6H3f0MsB1YPUK/jUALcDqP9YmIyDjkEu5XAW8OOu7uaxtgZguBq939+TzWJiIi45RLuNsIbQO3kjSzCuAfgb8Z9URm9WaWMbPMyZMnc69SRETGJJdw7wauHnRcBfQMOr4cuA542cyOADcAO0b6UNXd29w95u6xmTNnjr9qERG5oFzCvQOYa2Y1ZnYxcCewo/+b7v47d5/h7tXuXg38Eljl7rpZu4hIREYNd3f/APg68BNgP5By904zazKzVZNdoIiIjN3UXDq5+wvAC8PaGs/Td8XEyxIRkYnQDlURkQAp3EVEAqRwFxEJkMJdRCRACncRkQAp3EVEAqRwFxEJkMJdRCRACncRkQAp3EVEAqRwFwlRMgnV1VBRkX1NJqOuSAosp3vLiEgJSSahvh56e7PHR49mjwHq6qKrSwpKI3eR0Kxf/2Gw9+vtzbZL2VC4i4Tm2LGxtUuQFO4ioZk9e2ztEiSFu0hompuhsnJoW2Vltl3KhsJdJDR1ddDWBnPmgFn2ta1NH6aWGa2WEQlRXZ3CvMxp5C4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEiCFu4hIgBTuIiIBUriLiARI4S4iEqCcwt3MVprZQTM7ZGYPj/D9vzazX5vZPjN7yczm5L9UERHJ1ajhbmZTgK3ArcB84C4zmz+s2x4g5u6fAtqBlnwXKiIiuctl5L4EOOTuh939DLAdWD24g7un3b237/CXQFV+yxQRkbHIJdyvAt4cdNzd13Y+XwX+z0SKEhGRiZmaQx8boc1H7Gh2DxADPnue79cD9QCzZ8/OsUQRERmrXEbu3cDVg46rgJ7hnczsJmA9sMrdfz/Sidy9zd1j7h6bOXPmeOoVEZEc5BLuHcBcM6sxs4uBO4EdgzuY2ULgcbLBfiL/ZYqIyFiMGu7u/gHwdeAnwH4g5e6dZtZkZqv6um0GLgP+xcz2mtmO85xOREQKIJc5d9z9BeCFYW2Ng76+Kc91iYjIBGiHqohIgBTuIiIBUriL5EnLrhbSXekhbemuNC27tGFbCk/hXsqSSaiuhoqK7GsyGXVFZa12Vi2J9sRAwKe70iTaE9TOqo24MilHOX2gKkUomYT6eujtu+vD0aPZY4C6uujqKmPxmjipNSkS7QkaYg20ZlpJrUkRr4lHXZqUIY3cS9X69R8Ge7/e3my7RCZeE6ch1sDGnRtpiDUo2CUyCvdSdezY2NqlINJdaVozrWxYvoHWTOs5c/CFoLl/AYV76TrfvXl0z57I9M+xp9akaIo3DUzRFDrgNfcvoHAvXc3NUFk5tK2yMtsukejo6Rgyx94/B9/R01HQOgbP/TemGwf+wdEUUXkx9xFv8DjpYrGYZzKZSH53MJLJ7Bz7sWPZEXtzsz5MlQGN6UY27tzIhuUbaIo3RV2O5ImZ7Xb32Gj9NHIvZXV1cOQInD2bfVWwS59imPuXaCncRQJTLHP/Ei2Fu0hgimXuX6KlOXcRkRKiOXcRkTKmcBcRCZDCXUQkQAp3EZEAKdxFRAIU2WoZMzsJHB3nj88A/j2P5ZQ6vR9D6f34kN6LoUJ4P+a4+8zROkUW7hNhZplclgKVC70fQ+n9+JDei6HK6f3QtIyISIAU7iIiASrVcG+LuoAio/djKL0fH9J7MVTZvB8lOecuIiIXVqojdxERuYCSC3czW2lmB83skJk9HHU9UTGzq80sbWb7zazTzB6MuqZiYGZTzGyPmT0fdS1RM7OPmlm7mR3o+/9kadQ1RcXM/qrv78nrZvZDM5sWdU2TraTC3cymAFuBW4H5wF1mNj/aqiLzAfA37n4tcANwfxm/F4M9COyPuogi8U/A/3X3ecD1lOn7YmZXAX8JxNz9OmAKcGe0VU2+kgp3YAlwyN0Pu/sZYDuwOuKaIuHuv3H31/q+fofsX9yroq0qWmZWBfwX4AdR1xI1M/sjYDnwPwDc/Yy7vx1tVZGaCnzEzKYClUBPxPVMulIL96uANwcdd1PmgQZgZtXAQuDVaCuJ3HeBdcDZqAspAtcAJ4H/2TdN9QMzuzTqoqLg7seBLcAx4DfA79z9xWirmnylFu42QltZL/cxs8uAHwEPufv/j7qeqJjZfwVOuPvuqGspElOBRUCruy8E/gMoy8+ozOyPyV7h1wCzgEvN7J5oq5p8pRbu3cDVg46rKIPLq/Mxs4vIBnvS3Z+Jup6ILQNWmdkRstN1nzOzbdGWFKluoNvd+6/m2smGfTm6Cehy95Pu/j7wDPDpiGuadKUW7h3AXDOrMbOLyX4osiPimiJhZkZ2PnW/u38n6nqi5u6PuHuVu1eT/f/ip+4e/OjsfNz9t8CbZvaJvqbPA7+OsKQoHQNuMLPKvr83n6cMPlyeGnUBY+HuH5jZ14GfkP3E+wl374y4rKgsA74C/JuZ7e1r+5a7vxBhTVJcHgCSfQOhw8BfRFxPJNz9VTNrB14ju8psD2WwU1U7VEVEAlRq0zIiIpIDhbuISIAU7iIiAVK4i4gESOEuIhIghbuISIAU7iIiAVK4i4gE6D8BREGazCMNnkAAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "d = {'Red O' : np.random.rand(10),\n", + " 'Grn X' : np.random.rand(10)}\n", + "df = pd.DataFrame(d)\n", + "df.plot(style=['ro','gx'])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 6. Time series - six months of random floats" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ts = pd.Series(np.random.randn(180), index=pd.date_range('1/1/2018', periods=180))\n", + "df = pd.DataFrame(np.random.randn(180, 3), index=ts.index, columns=list('ABC'))\n", + "df.cumsum().plot()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 7. Random dots in a scatter" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "N = 50\n", + "x = np.random.rand(N)\n", + "y = np.random.rand(N)\n", + "colors = np.random.rand(N)\n", + "sizes = (30 * np.random.rand(N))**2 # 0 to 15 point radii\n", + "plt.scatter(x, y, s=sizes, c=colors, alpha=0.5)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 8. Load csv file and show multiple chart types\n", + "Or use plt.figlegend() to show a legend outside the plot area" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " month avg_high avg_low record_high record_low avg_precipitation\n", + "0 Jan 58 42 74 22 2.95\n", + "1 Feb 61 45 78 26 3.02\n", + "2 Mar 65 48 84 25 2.34\n", + "3 Apr 67 50 92 28 1.02\n", + "4 May 71 53 98 35 0.48\n", + "5 Jun 75 56 107 41 0.11\n", + "6 Jul 77 58 105 44 0.00\n", + "7 Aug 77 59 102 43 0.03\n", + "8 Sep 77 57 103 40 0.17\n", + "9 Oct 73 54 96 34 0.81\n", + "10 Nov 64 48 84 30 1.70\n", + "11 Dec 58 42 73 21 2.56\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = pd.read_csv('Fremont_weather.txt')\n", + "print(df)\n", + "plt.bar(df['month'], df['record_high'], color='r')\n", + "plt.bar(df['month'], df['record_low'], color='c')\n", + "plt.plot(df['month'], df['avg_high'], color='k')\n", + "plt.plot(df['month'], df['avg_low'], color='b')\n", + "plt.legend()\n", + "plt.show() " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 9.1. Subplots, part 1\n", + "221 = top left subplot \n", + "222 = top right subplot \n", + "223 = bottom left subplot \n", + "224 = bottom right subplot " + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig = plt.figure()\n", + "fig.suptitle('My SubPlots')\n", + "fig.add_subplot(221)\n", + "plt.plot([np.log(n) for n in range(1,10)])\n", + "fig.add_subplot(222, facecolor='y')\n", + "fig.add_subplot(223)\n", + "fig.add_subplot(224) \n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 9.2. Subplots, part 2" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, plots = plt.subplots(2, sharex=True)\n", + "fig.suptitle('Sharing X axis')\n", + "x = range(0,200,5)\n", + "y = [n**0.8 for n in x]\n", + "plots[0].plot(x, y, color='r')\n", + "plots[1].scatter(x, y)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "### 10. Save figure to image file" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(4,3), dpi=100)\n", + "plt.plot([245, 170, 148, 239, 161, 196, 112, 258])\n", + "plt.axis([0, 7, 0, 300])\n", + "plt.title('Flight Data')\n", + "plt.xlabel('Speed')\n", + "plt.savefig('Flights.png')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Matplotlib/pyplot.py b/Matplotlib/pyplot.py new file mode 100644 index 00000000..736716b6 --- /dev/null +++ b/Matplotlib/pyplot.py @@ -0,0 +1,90 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +# 1. simple plot with 4 numbers +plt.plot([1, 3, 2, 4]) +plt.show() + +# 2. points have x and y values; add title and axis labels +plt.plot([1, 2, 3, 4], [1, 4, 9, 16]) +plt.title('Test Plot', fontsize=8, color='g') +plt.xlabel('number n') +plt.ylabel('n^2') +plt.show() + +# 3. change figure size. plot red dots; set axis scales x: 0-6 and y: 0-20 +plt.figure(figsize=(1,5)) # 1 inch wide x 5 inches tall +plt.plot([1, 2, 3, 4], [1, 4, 9, 16], 'ro') # red-o +plt.axis([0, 6, 0, 20]) # [xmin, xmax, ymin, ymax] +plt.annotate('square it', (3,6)) +plt.show() + +# 4. bar chart with four bars +plt.clf() # clear figure +x = np.arange(4) +y = [8.8, 5.2, 3.6, 5.9] +plt.xticks(x, ('Ankit', 'Hans', 'Joe', 'Flaco')) +plt.bar(x, y) +# plt.bar(x, y, color='y') +# plt.bar(x, y, color=['lime', 'r', 'k', 'tan']) +plt.show() + +# 5. two sets of 10 random dots plotted +d = {'Red O' : np.random.rand(10), + 'Grn X' : np.random.rand(10)} +df = pd.DataFrame(d) +df.plot(style=['ro','gx']) +plt.show() + +# 6. time series - six months of random floats +ts = pd.Series(np.random.randn(180), index=pd.date_range('1/1/2018', periods=180)) +df = pd.DataFrame(np.random.randn(180, 3), index=ts.index, columns=list('ABC')) +df.cumsum().plot() +plt.show() + +# 7. random dots in a scatter +N = 50 +x = np.random.rand(N) +y = np.random.rand(N) +colors = np.random.rand(N) +sizes = (30 * np.random.rand(N))**2 # 0 to 15 point radii +plt.scatter(x, y, s=sizes, c=colors, alpha=0.5) +plt.show() + +# 8. load csv file and show multiple chart types +df = pd.read_csv('Fremont_weather.txt') +print(df) +plt.bar(df['month'], df['record_high'], color='r') +plt.bar(df['month'], df['record_low'], color='c') +plt.plot(df['month'], df['avg_high'], color='k') +plt.plot(df['month'], df['avg_low'], color='b') +plt.legend() # or plt.figlegend for legend outside the plot area +plt.show() + +# 9. subplots +fig = plt.figure() +fig.suptitle('My SubPlots') +fig.add_subplot(221) #top left +plt.plot([np.log(n) for n in range(1,10)]) +fig.add_subplot(222, facecolor='y') #top right +fig.add_subplot(223) #bottom left +fig.add_subplot(224) #bottom right +plt.show() + +fig, plots = plt.subplots(2, sharex=True) +fig.suptitle('Sharing X axis') +x = range(0,200,5) +y = [n**0.8 for n in x] +plots[0].plot(x, y, color='r') +plots[1].scatter(x, y) + +# 10. save figure to image file +plt.figure(figsize=(4,3), dpi=100) +plt.plot([245, 170, 148, 239, 161, 196, 112, 258]) +plt.axis([0, 7, 0, 300]) +plt.title('Flight Data') +plt.xlabel('Speed') +# plt.savefig('Flights.png') +plt.show() + diff --git a/NLTK/NLTK.ipynb b/NLTK/NLTK.ipynb new file mode 100644 index 00000000..7c65f6a8 --- /dev/null +++ b/NLTK/NLTK.ipynb @@ -0,0 +1,665 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python NLTK Natural Language Tool Kit\n", + "## Topics covered in this video\n", + "- Exploring the NLTK corpus \n", + "- Dictionary definitions \n", + "- Punctuation and stop words \n", + "- Stemming and lemmatization \n", + "- Sentence and word tokenizers \n", + "- Parts of speech tagging \n", + "- word2vec \n", + "- Clustering and classifying\n", + "\n", + "### NLTK Setup\n", + "First you need to install the nltk library with 'pip install nltk' or some equivalent shell command. \n", + "Then you need to download the nltk corpus by running \n", + "```python \n", + "import nltk \n", + "nltk.download()```\n", + "This will open the NLTK downloader dialog window where you should just click Download All. The corpus is a large and varied body of sample documents that you'll need for this video, including dictionaries and word lists like stop words. You can uninstall it later if you have a shortage of disk space with *pip uninstall nltk*.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "260819\n", + "19317\n", + "['[', 'Moby', 'Dick', 'by', 'Herman', 'Melville', '1851', ']', 'ETYMOLOGY', '.']\n", + "['[', 'Sense', 'and', 'Sensibility', 'by', 'Jane', 'Austen', '1811', ']', 'CHAPTER']\n" + ] + } + ], + "source": [ + "import nltk\n", + "from nltk.book import *\n", + "\n", + "print(type(text1))\n", + "print(len(text1))\n", + "print(len(set(text1)))\n", + "print(text1[:10])\n", + "print(text2[:10])" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['austen-emma.txt', 'austen-persuasion.txt', 'austen-sense.txt', 'bible-kjv.txt', 'blake-poems.txt', 'bryant-stories.txt', 'burgess-busterbrown.txt', 'carroll-alice.txt', 'chesterton-ball.txt', 'chesterton-brown.txt', 'chesterton-thursday.txt', 'edgeworth-parents.txt', 'melville-moby_dick.txt', 'milton-paradise.txt', 'shakespeare-caesar.txt', 'shakespeare-hamlet.txt', 'shakespeare-macbeth.txt', 'whitman-leaves.txt']\n", + "37360\n", + "3106\n", + "['What', 'say', 'you', '?']\n", + "950\n" + ] + } + ], + "source": [ + "from nltk.corpus import gutenberg\n", + "print(gutenberg.fileids())\n", + "hamlet = gutenberg.words('shakespeare-hamlet.txt')\n", + "print(len(hamlet))\n", + "hamlet_sentences = gutenberg.sents('shakespeare-hamlet.txt')\n", + "print(len(hamlet_sentences))\n", + "print(hamlet_sentences[1024])\n", + "print(len(gutenberg.paras('shakespeare-hamlet.txt')))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get the count of a word in a document, or the context of every occurence of a word in a document." + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "26\n", + "Displaying 7 of 7 matches:\n", + "r him ,\" said I , now flying into a passion again at this unaccountable farrago\n", + " employed in the celebration of the Passion of our Lord ; though in the Vision \n", + "ce all mortal interests to that one passion ; nevertheless it may have been tha\n", + "ing with the wildness of his ruling passion , yet were by no means incapable of\n", + "it , however promissory of life and passion in the end , it is above all things\n", + "o ' s lordly chest . So have I seen Passion and Vanity stamping the living magn\n", + " Guernseyman , flying into a sudden passion . \" Oh ! keep cool -- cool ? yes , \n", + "None\n", + "Displaying 5 of 5 matches:\n", + "one ,\" said Elinor , \" who has your passion for dead leaves .\" \" No ; my feelin\n", + "r daughters , without extending the passion to her ; and Elinor had the satisfa\n", + "r , if he was to be in the greatest passion !-- and Mr . Donavan thinks just th\n", + "edness I could have borne , but her passion -- her malice -- At all events it m\n", + "ling a sacrifice to an irresistible passion , as once she had fondly flattered \n", + "None\n" + ] + } + ], + "source": [ + "print(text1.count('horse'))\n", + "print(text1.concordance('passion'))\n", + "print(text2.concordance('passion'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**FreqDist and most_common** \n", + "We can use FreqDist to find the number of occurrences of each word in the text. \n", + "By getting len(vocab) we get the number of unique words in the text (including punctuation). \n", + "And we can get the most common words easily too." + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "19317\n", + "[(',', 18713), ('the', 13721), ('.', 6862), ('of', 6536), ('and', 6024), ('a', 4569), ('to', 4542), (';', 4072), ('in', 3916), ('that', 2982), (\"'\", 2684), ('-', 2552), ('his', 2459), ('it', 2209), ('I', 2124), ('s', 1739), ('is', 1695), ('he', 1661), ('with', 1659), ('was', 1632)]\n" + ] + } + ], + "source": [ + "vocab = nltk.FreqDist(text1)\n", + "print(len(vocab))\n", + "print(vocab.most_common(20))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we got the 80 most common words, filtered only the ones with at least 3 characters, then sorted them descending by number of occurences. \n", + "A better way is to first remove all the *stop words* (see below), then get the FreqDist." + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('that', 2982), ('with', 1659), ('this', 1280), ('from', 1052), ('whale', 906), ('have', 760), ('there', 715), ('were', 680), ('which', 640), ('like', 624), ('their', 612), ('they', 586), ('some', 578), ('then', 571), ('when', 553), ('upon', 538), ('into', 520), ('ship', 507), ('more', 501), ('Ahab', 501), ('them', 471), ('what', 442), ('would', 421), ('been', 415), ('other', 412), ('over', 403)]\n" + ] + } + ], + "source": [ + "mc = sorted([w for w in vocab.most_common(80) if len(w[0]) > 3], key=lambda x: x[1], reverse=True)\n", + "print(mc)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A dispersion plot shows you where in the document a word is used. You can pass in a list of words." + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "text1.dispersion_plot(['capture', 'whale', 'life', 'death', 'kill'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dictionary definitions\n", + "Use wordnet synsets to get word definitions and examples of usage. \n", + "The [0] is required because synsets returns a list, with an entry for each POS." + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "unmitigated.a.01 - not diminished or moderated in intensity or severity; sometimes used as an intensifier\n", + "['unmitigated suffering', 'an unmitigated horror', 'an unmitigated lie']\n" + ] + } + ], + "source": [ + "from nltk.corpus import wordnet as wn\n", + "w = wn.synsets(\"unmitigated\")[0]\n", + "print(w.name(), '-', w.definition())\n", + "print(w.examples())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Punctuation and Stop Words\n", + "Text analysis is often faster and easier if you can remove useless words. \n", + "NLTK provides a list of these stop words so it's easy to filter them out of your text prior to processing. \n", + "Here, 15% of our text is punctuation, and 40% is stop words. So we shrink the text by more than half by stripping out punctuation and stop words." + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\n", + "['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', \"you're\", \"you've\", \"you'll\", \"you'd\", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', \"she's\", 'her', 'hers', 'herself', 'it', \"it's\", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', \"that'll\", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', \"don't\", 'should', \"should've\", 'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', \"aren't\", 'couldn', \"couldn't\", 'didn', \"didn't\", 'doesn', \"doesn't\", 'hadn', \"hadn't\", 'hasn', \"hasn't\", 'haven', \"haven't\", 'isn', \"isn't\", 'ma', 'mightn', \"mightn't\", 'mustn', \"mustn't\", 'needn', \"needn't\", 'shan', \"shan't\", 'shouldn', \"shouldn't\", 'wasn', \"wasn't\", 'weren', \"weren't\", 'won', \"won't\", 'wouldn', \"wouldn't\"]\n", + "260819\n", + "221767\n", + "122226\n" + ] + } + ], + "source": [ + "from string import punctuation\n", + "print(punctuation)\n", + "without_punct = [w for w in text1 if w not in punctuation] # this is called a list comprehension\n", + "\n", + "from nltk.corpus import stopwords\n", + "sw = stopwords.words('english')\n", + "print(sw)\n", + "without_sw = [w for w in without_punct if w not in sw] \n", + "\n", + "print(len(text1))\n", + "print(len(without_punct))\n", + "print(len(without_sw))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Stemming and Lemmatization\n", + "These term normalization algorithms strip the word endings off to reduce the number of root words for easier matching. \n", + "This is useful for search term matching. [NLTK stemming docs](https://www.nltk.org/api/nltk.stem.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "is is\n", + "are are\n", + "bought bought\n", + "buys buy\n", + "giving give\n", + "jumps jump\n", + "jumped jump\n", + "birds bird\n", + "do do\n", + "does doe\n", + "did did\n", + "doing do\n" + ] + } + ], + "source": [ + "from nltk.stem.porter import PorterStemmer\n", + "st = PorterStemmer()\n", + "words = ['is', 'are', 'bought', 'buys', 'giving', 'jumps', 'jumped', 'birds', 'do', 'does', 'did', 'doing']\n", + "for word in words:\n", + " print(word, st.stem(word))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**WordNet Lemmatizer** \n", + "The difference is that the result of stemming may not be an actual word, but lemmatization returns the root word. NLTK supports both. \n", + "You can also try the Lancaster or Snowball stemmers. The Snowball stemmer supports numerous languages: Arabic, Danish, Dutch, English, Finnish, French, German, Hungarian, Italian, Norwegian, Portuguese, Romanian, Russian, Spanish and Swedish." + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "is is\n", + "are are\n", + "bought bought\n", + "buys buy\n", + "giving giving\n", + "jumps jump\n", + "jumped jumped\n", + "birds bird\n", + "do do\n", + "does doe\n", + "did did\n", + "doing doing\n" + ] + } + ], + "source": [ + "from nltk.stem import WordNetLemmatizer\n", + "wnl = WordNetLemmatizer()\n", + "words = ['is', 'are', 'bought', 'buys', 'giving', 'jumps', 'jumped', 'birds', 'do', 'does', 'did', 'doing']\n", + "for word in words:\n", + " print(word, wnl.lemmatize(word))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Sentence and Word Tokenizers\n", + "Sentence tokenizer breaks text down into a list of sentences. It's pretty good at handling punctuation and decimal numbers. \n", + "[Word tokenizer](https://www.nltk.org/api/nltk.tokenize.html) breaks a string down into a list of words and punctuation. \n", + "It is also easy to get parts of speech using nltk.pos_tag. There are different tagsets, depending on how much detail you want. I like universal." + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Hello.', 'I am Joe!', 'I like Python.', '263.5 is a big number.']\n", + "['The', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy', 'dog', '.']\n" + ] + } + ], + "source": [ + "from nltk.tokenize import sent_tokenize, word_tokenize\n", + "s = 'Hello. I am Joe! I like Python. 263.5 is a big number.' # 4 sentences\n", + "print(sent_tokenize(s))\n", + "\n", + "w = word_tokenize('The quick brown fox jumps over the lazy dog.')\n", + "print(w)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parts of Speech Tagging\n", + "To break a block of text down into its parts of speech use pos_tag. \n", + "The default tagset uses 2 or 3 letter tokens that are hard for me to understand. [StackOverflow](https://stackoverflow.com/questions/15388831/what-are-all-possible-pos-tags-of-nltk) has a great decoder for the default POS tags. \n", + "The Universal tagset gives a more familiar looking tag (noun, verb, adj). \n", + "NLTK includes several other tagsets you can try." + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['The', 'quick', 'brown', 'fox', 'jumps', 'over', 'the', 'lazy', 'dog', '.']\n", + "[('The', 'DT'), ('quick', 'JJ'), ('brown', 'NN'), ('fox', 'NN'), ('jumps', 'VBZ'), ('over', 'IN'), ('the', 'DT'), ('lazy', 'JJ'), ('dog', 'NN'), ('.', '.')]\n", + "[('The', 'DET'), ('quick', 'ADJ'), ('brown', 'NOUN'), ('fox', 'NOUN'), ('jumps', 'VERB'), ('over', 'ADP'), ('the', 'DET'), ('lazy', 'ADJ'), ('dog', 'NOUN'), ('.', '.')]\n" + ] + } + ], + "source": [ + "w = word_tokenize('The quick brown fox jumps over the lazy dog.')\n", + "print(w)\n", + "print(nltk.pos_tag(w))\n", + "print(nltk.pos_tag(w, tagset='universal'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Word2Vec\n", + "[Word2Vec](https://radimrehurek.com/gensim/models/word2vec.html) uses neural networks to analyze words in a corpus by using the contexts of words. \n", + "It takes as its input a large corpus of text, and maps unique words to a vector space, such that \n", + "words that share common contexts in the corpus are located in close proximity to one another in the space. \n", + "Word2Vec does NOT look at word meanings, it only finds words that are used in combination with other words. So *frying* and *pan* may have a high similarity. \n", + "You can see here the context of one word (pain) for two different corpora. \n", + "This uses the popular gensim library, which is not part of NLTK." + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('person', 0.9992861747741699), ('favourable', 0.998925507068634), ('meaning', 0.9987690448760986), ('effect', 0.9987439513206482), ('comfortable', 0.998741626739502), ('delay', 0.9987210035324097)]\n", + "[('even', 0.9980006217956543), ('moment', 0.9979783296585083), ('hence', 0.9979231357574463), ('without', 0.9979217052459717), ('separate', 0.9979064464569092), ('Now', 0.9979038238525391)]\n" + ] + } + ], + "source": [ + "from gensim.models import Word2Vec\n", + "emma_vec = Word2Vec(gutenberg.sents('austen-emma.txt'))\n", + "leaves_vec = Word2Vec(gutenberg.sents('whitman-leaves.txt'))\n", + "print(emma_vec.wv.most_similar('pain', topn=6))\n", + "print(leaves_vec.wv.most_similar('pain', topn=6))" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "30103\n", + "[('mercy', 0.9284727573394775),\n", + " ('liveth', 0.9062315821647644),\n", + " ('truth', 0.8941866159439087),\n", + " ('grace', 0.8938426375389099),\n", + " ('glory', 0.8936725854873657),\n", + " ('salvation', 0.8859732747077942),\n", + " ('hosts', 0.8839103579521179),\n", + " ('confession', 0.8837960958480835)]\n", + "[('making', 0.9873884916305542),\n", + " ('abundant', 0.9802387952804565),\n", + " ('realm', 0.98007732629776),\n", + " ('powers', 0.9798883199691772),\n", + " ('twice', 0.9775580763816833)]\n" + ] + } + ], + "source": [ + "from gensim.models import Word2Vec\n", + "from nltk.corpus import stopwords\n", + "from string import punctuation\n", + "import pprint as pp\n", + "\n", + "bible_sents = gutenberg.sents('bible-kjv.txt')\n", + "sw = stopwords.words('english')\n", + "bible = [[w.lower() for w in s if w not in punctuation and w not in sw] for s in bible_sents]\n", + "print(len(bible))\n", + "\n", + "bible_vec = Word2Vec(bible)\n", + "pp.pprint(bible_vec.wv.most_similar('god', topn=8))\n", + "pp.pprint(bible_vec.wv.most_similar('creation', topn=5))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### k-Means Clustering\n", + "[Clustering](http://www.nltk.org/api/nltk.cluster.html) groups similar items together. \n", + "The K-means clusterer starts with k arbitrarily chosen means (or centroids) then assigns each vector to the cluster with the closest mean. It then recalculates the means of each cluster as the centroid of its vector members. This process repeats until the cluster memberships stabilize. [NLTK docs on this example](https://www.nltk.org/_modules/nltk/cluster/kmeans.html) \n", + "This example clusters int vectors, which you can think of as points on a plane. But you could also use clustering to cluster similar documents by vocabulary/topic." + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "k-means trial 0\n", + "iteration\n", + "iteration\n", + "Clustered: [array([2, 1]), array([1, 3]), array([4, 7]), array([6, 7])]\n", + "As: [0, 0, 1, 1]\n", + "Means: [array([1.5, 2. ]), array([5., 7.])]\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from nltk.cluster import KMeansClusterer, euclidean_distance\n", + "\n", + "vectors = [np.array(f) for f in [[2, 1], [1, 3], [4, 7], [6, 7]]]\n", + "means = [[4, 3], [5, 5]]\n", + "\n", + "clusterer = KMeansClusterer(2, euclidean_distance, initial_means=means)\n", + "clusters = clusterer.cluster(vectors, True, trace=True)\n", + "\n", + "print('Clustered:', vectors)\n", + "print('As:', clusters)\n", + "print('Means:', clusterer.means())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**k-Means Clustering, Example-2** \n", + "In this example we cluster an array of 6 points into 2 clusters. \n", + "The initial centroids are randomly chosen by the clusterer, and it does 10 iterations to regroup the clusters and recalculate centroids. " + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Clustered: [array([3, 3]), array([1, 2]), array([4, 2]), array([4, 0]), array([2, 3]), array([3, 1])]\n", + "As: [0, 0, 1, 1, 0, 1]\n", + "Means: [array([2. , 2.66666667]), array([3.66666667, 1. ])]\n", + "classify([2 2]): 0\n" + ] + } + ], + "source": [ + "vectors = [np.array(f) for f in [[3, 3], [1, 2], [4, 2], [4, 0], [2, 3], [3, 1]]]\n", + "\n", + "# test k-means using 2 means, euclidean distance, and 10 trial clustering repetitions with random seeds\n", + "clusterer = KMeansClusterer(2, euclidean_distance, repeats=10)\n", + "clusters = clusterer.cluster(vectors, True)\n", + "centroids = clusterer.means()\n", + "print('Clustered:', vectors)\n", + "print('As:', clusters)\n", + "print('Means:', centroids)\n", + "\n", + "# classify a new vector\n", + "vector = np.array([2,2])\n", + "print('classify(%s):' % vector, end=' ')\n", + "print(clusterer.classify(vector))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Plot a Chart of the Clusters in Example-2** \n", + "Make a Scatter Plot of the two clusters using matplotlib.pyplot. \n", + "We plot all the points in cluster-0 blue, and all the points in cluster-1 red. Then we plot the two centroids in orange. \n", + "I used list comprehensions to create new lists for all the x0, y0, x1 and y1 values." + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAESZJREFUeJzt3V2IZGedx/Hvb158aSIGMg2GZHo6YG5UjMZiNuKyBEWIWUkuzEVkVo0oDe6KisLiOqBrYC680cWNGBoTTHZbjUQJY0iQQALqhUl6sknMiy6D2ZlMCKSNOjG0KBP/e1FnzKTtnqqeru7qfub7gaLOec4zdf5PPdO/PnPOqalUFZKktmwbdwGSpNEz3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkN2jGuHe/ataump6fHtXtJ2pIOHTr0m6qaHNRvbOE+PT3N/Pz8uHYvSVtSkiPD9PO0jCQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwaGe5LXJHkgySNJHk/y5WX6vDrJbUkOJ7k/yfR6FCtJGs4wR+5/At5dVZcAbwOuSHLZkj4fA35XVW8EvgZ8ZbRlajObm4Ppadi2rf88Nzfuis5uzodgiPvcq/89fC92qzu7x9Lv5rsa+Pdu+XbghiQpv8OveXNzMDMDi4v99SNH+usA+/aNr66zlfOhk4Y6555ke5KHgeeAe6rq/iVdLgCeBqiqE8Bx4LxRFqrNaf/+l4PkpMXFfrs2nvOhk4YK96p6qareBlwI7E3yljPZWZKZJPNJ5hcWFs7kJbTJHD26unatL+dDJ63qbpmq+j1wH3DFkk3PALsBkuwAXg88v8yfn62qXlX1JicH/tcI2gKmplbXrvXlfOikYe6WmUxybrf8WuC9wC+XdDsIfKRbvga41/PtZ4cDB2Bi4pVtExP9dm0850MnDXPkfj5wX5JHgQfpn3O/M8n1Sa7q+twEnJfkMPBZ4PPrU642m337YHYW9uyBpP88O+vFu3FxPnRSxnWA3ev1yv8VUpJWJ8mhquoN6ucnVCWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3rd1Tc3DHNHxnW//5qblxVySd9XaMuwBtcU/NwQMz8FL3rcyLR/rrABf5DRHSuHjkrrV5ZP/LwX7SS4v9dkljY7hrbRaPrq5d0oYw3LU2E1Ora5e0IQx3rc0lB2D7xCvbtk/02yWNjeGutbloH+ydhYk9QPrPe2e9mCqNmXfLaO0u2meYS5uMR+6S1CDDXZIaNDDck+xOcl+SJ5I8nuTTy/S5PMnxJA93jy+uT7mSpGEMc879BPC5qnooyeuAQ0nuqaonlvT7aVW9f/QlSpJWa+CRe1U9W1UPdct/AJ4ELljvwiRJZ25V59yTTANvB+5fZvM7kzyS5O4kbx5BbZKkMzT0rZBJzgF+AHymql5YsvkhYE9VvZjkSuAO4OJlXmMGmAGYmvITjJK0XoY6ck+yk36wz1XVD5dur6oXqurFbvkuYGeSXcv0m62qXlX1Jicn11i6JGklw9wtE+Am4Mmq+uoKfd7Q9SPJ3u51nx9loZKk4Q1zWuZdwIeAXyR5uGv7AjAFUFU3AtcAn0hyAvgjcG1V1TrUK0kawsBwr6qfARnQ5wbghlEVJUlaGz+hKkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatDAcE+yO8l9SZ5I8niSTy/TJ0m+nuRwkkeTXLo+5cLcHExPw7Zt/ee5ufXakySNyBiCa8cQfU4An6uqh5K8DjiU5J6qeuKUPu8DLu4efwd8s3seqbk5mJmBxcX++pEj/XWAfftGvTdJGoExBdfAI/eqeraqHuqW/wA8CVywpNvVwK3V93Pg3CTnj7rY/ftffn9OWlzst0vSpjSm4FrVOfck08DbgfuXbLoAePqU9WP87S8AkswkmU8yv7CwsLpKgaNHV9cuSWM3puAaOtyTnAP8APhMVb1wJjurqtmq6lVVb3JyctV/fmpqde2SNHZjCq6hwj3JTvrBPldVP1ymyzPA7lPWL+zaRurAAZiYeGXbxES/XZI2pTEF1zB3ywS4CXiyqr66QreDwIe7u2YuA45X1bMjrBPoX3uYnYU9eyDpP8/OejFV0iY2puBKVZ2+Q/L3wE+BXwB/6Zq/AEwBVNWN3S+AG4ArgEXgo1U1f7rX7fV6NT9/2i6SpCWSHKqq3qB+A2+FrKqfARnQp4B/Gb48SdJ68hOqktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDVoYLgnuTnJc0keW2H75UmOJ3m4e3xx9GVKklZjxxB9vg3cANx6mj4/rar3j6QiSdKaDTxyr6qfAL/dgFokSSMyqnPu70zySJK7k7x5RK8pSTpDw5yWGeQhYE9VvZjkSuAO4OLlOiaZAWYApqamRrBrSdJy1nzkXlUvVNWL3fJdwM4ku1boO1tVvarqTU5OrnXXkqQVrDnck7whSbrlvd1rPr/W15UknbmBp2WSfBe4HNiV5BjwJWAnQFXdCFwDfCLJCeCPwLVVVetWsSRpoIHhXlUfHLD9Bvq3SkqSNgk/oSpJDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWrQwHBPcnOS55I8tsL2JPl6ksNJHk1y6ejLlCStxjBH7t8GrjjN9vcBF3ePGeCbay9L0hmbm4Ppadi2rf88NzfuisbnqTm4Yxq+s63//NTZ817sGNShqn6SZPo0Xa4Gbq2qAn6e5Nwk51fVsyOqUdKw5uZgZgYWF/vrR4701wH27RtfXePw1Bw8MAMvde/F4pH+OsBF7b8XozjnfgHw9Cnrx7o2SRtt//6Xg/2kxcV++9nmkf0vB/tJLy32288CG3pBNclMkvkk8wsLCxu5a+nscPTo6tpbtrjCmFdqb8wowv0ZYPcp6xd2bX+jqmarqldVvcnJyRHsWtIrTE2trr1lEyuMeaX2xowi3A8CH+7umrkMOO75dmlMDhyAiYlXtk1M9NvPNpccgO1L3ovtE/32s8DAC6pJvgtcDuxKcgz4ErAToKpuBO4CrgQOA4vAR9erWEkDnLxoun9//1TM1FQ/2M+2i6nw8kXTR/b3T8VMTPWD/Sy4mAqQ/k0uG6/X69X8/PxY9i1JW1WSQ1XVG9TPT6hKUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaNFS4J7kiya+SHE7y+WW2X5dkIcnD3ePjoy9VkjSsHYM6JNkOfAN4L3AMeDDJwap6YknX26rqk+tQoyRplYY5ct8LHK6qX1fVn4HvAVevb1mSpLUYJtwvAJ4+Zf1Y17bUB5I8muT2JLuXe6EkM0nmk8wvLCycQbmSpGGM6oLqj4DpqnorcA9wy3Kdqmq2qnpV1ZucnBzRriVJSw0T7s8Apx6JX9i1/VVVPV9Vf+pWvwW8YzTlSZLOxDDh/iBwcZKLkrwKuBY4eGqHJOefsnoV8OToSpQkrdbAu2Wq6kSSTwI/BrYDN1fV40muB+ar6iDwqSRXASeA3wLXrWPNkqQBUlVj2XGv16v5+fmx7FuStqokh6qqN6ifn1CVpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUFDhXuSK5L8KsnhJJ9fZvurk9zWbb8/yfSoC5UkDW9guCfZDnwDeB/wJuCDSd60pNvHgN9V1RuBrwFfGXWhkrRlzc3B9DRs29Z/nptb910Oc+S+FzhcVb+uqj8D3wOuXtLnauCWbvl24D1JMroyJWmLmpuDmRk4cgSq+s8zM+se8MOE+wXA06esH+valu1TVSeA48B5oyhQkra0/fthcfGVbYuL/fZ1tKEXVJPMJJlPMr+wsLCRu5ak8Th6dHXtIzJMuD8D7D5l/cKubdk+SXYArweeX/pCVTVbVb2q6k1OTp5ZxZK0lUxNra59RIYJ9weBi5NclORVwLXAwSV9DgIf6ZavAe6tqhpdmZK0RR04ABMTr2ybmOi3r6OB4d6dQ/8k8GPgSeD7VfV4kuuTXNV1uwk4L8lh4LPA39wuKUlnpX37YHYW9uyBpP88O9tvX0cZ1wF2r9er+fn5sexbkraqJIeqqjeon59QlaQGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0a262QSRaAI2t4iV3Ab0ZUzri1MhbHsbm0Mg5oZyyjGMeeqhr4Ef+xhftaJZkf5l7PraCVsTiOzaWVcUA7Y9nIcXhaRpIaZLhLUoO2crjPjruAEWplLI5jc2llHNDOWDZsHFv2nLskaWVb+chdkrSCTR/uSW5O8lySx1bYniRfT3I4yaNJLt3oGocxxDguT3I8ycPd44sbXeMwkuxOcl+SJ5I8nuTTy/TZ9HMy5Dg2/ZwkeU2SB5I80o3jy8v0eXWS27r5uD/J9MZXenpDjuO6JAunzMfHx1HrMJJsT/I/Se5cZtvGzEdVbeoH8A/ApcBjK2y/ErgbCHAZcP+4az7DcVwO3DnuOocYx/nApd3y64D/Bd601eZkyHFs+jnp3uNzuuWdwP3AZUv6/DNwY7d8LXDbuOs+w3FcB9ww7lqHHM9nge8s9/dno+Zj0x+5V9VPgN+epsvVwK3V93Pg3CTnb0x1wxtiHFtCVT1bVQ91y3+g/wUuS78wfdPPyZDj2PS69/jFbnVn91h6Ie1q4JZu+XbgPUmyQSUOZchxbAlJLgT+EfjWCl02ZD42fbgP4QLg6VPWj7EFf0g77+z+WXp3kjePu5hBun9Ovp3+UdapttScnGYcsAXmpDsF8DDwHHBPVa04H9X/ZrXjwHkbW+VgQ4wD4APdqb7bk+xeZvtm8B/AvwJ/WWH7hsxHC+Heiofof6z4EuA/gTvGXM9pJTkH+AHwmap6Ydz1nKkB49gSc1JVL1XV2+h/ef3eJG8Zd01nYohx/AiYrqq3Avfw8tHvppHk/cBzVXVo3LW0EO7PAKf+Br+wa9tSquqFk/8sraq7gJ1Jdo25rGUl2Uk/EOeq6ofLdNkSczJoHFtpTgCq6vfAfcAVSzb9dT6S7ABeDzy/sdUNb6VxVNXzVfWnbvVbwDs2urYhvAu4Ksn/Ad8D3p3kv5f02ZD5aCHcDwIf7u7QuAw4XlXPjruo1UryhpPn3ZLspT83m+4HsKvxJuDJqvrqCt02/ZwMM46tMCdJJpOc2y2/Fngv8Msl3Q4CH+mWrwHure5q3mYxzDiWXLe5iv51kk2lqv6tqi6sqmn6F0vvrap/WtJtQ+Zjx6hfcNSSfJf+XQu7khwDvkT/YgtVdSNwF/27Mw4Di8BHx1Pp6Q0xjmuATyQ5AfwRuHaz/QB23gV8CPhFd34U4AvAFGypORlmHFthTs4Hbkmynf4vn+9X1Z1Jrgfmq+og/V9i/5XkMP2L+teOr9wVDTOOTyW5CjhBfxzXja3aVRrHfPgJVUlqUAunZSRJSxjuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ16P8BWkYAtNa0NncAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "\n", + "x0 = np.array([x[0] for idx, x in enumerate(vectors) if clusters[idx]==0])\n", + "y0 = np.array([x[1] for idx, x in enumerate(vectors) if clusters[idx]==0])\n", + "plt.scatter(x0,y0, color='blue')\n", + "x1 = np.array([x[0] for idx, x in enumerate(vectors) if clusters[idx]==1])\n", + "y1 = np.array([x[1] for idx, x in enumerate(vectors) if clusters[idx]==1])\n", + "plt.scatter(x1,y1, color='red')\n", + "\n", + "xc = np.array([x[0] for x in centroids])\n", + "yc = np.array([x[1] for x in centroids])\n", + "plt.scatter(xc,yc, color='orange')\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Numpy/Numpy commands.txt b/Numpy/Numpy commands.txt new file mode 100644 index 00000000..c76953ba --- /dev/null +++ b/Numpy/Numpy commands.txt @@ -0,0 +1,155 @@ +https://docs.scipy.org/doc/numpy-1.12.0/reference/ + +Numpy + install Numpy using pip + import numpy as np + ndarray - an N-dimensional array, which describes a collection of “items” of the same type + array(list) # constructor + asarray(a[, dtype, order]) # Convert the input to an array + Constants: + ndarray.shape tuple of array dimensions + ndarray.size number of elements in array + ndarray.itemsize size of one element + ndarray.dtype data type of elements + ndarray.flat 1D iterator over elements of array + Common Functions + np.tolist() + np.reshape(a, (3,2)) + np.swapaxes(axis1, axis2) + np.copy() + arange() + Statistics Functions: + np.sum(a, axis) + np.prod + np.min + np.max + np.mean + np.std standard deviation + np.var + np.sort(axis) + Other Functions: + String operations + logical operations - AND, OR, XOR, NOT, >, <, =, ... + trig functions + complex numbers (real + imaginary) + polynomials + 2D matrix operations + Fourier transforms +==================================================================================== +import numpy as np +x = [0,1,2,3,4,5] +a = np.array(x) + +index: a[2] + +slice: a[start:stop:step] + a[1:4:2] + a[3:] + a[:3] +a.shape +a.size +a.itemsize +a.dtype + +b = np.array([[1,2,3], [4,5,6]]) +b.swapaxes(0,1) + +a = np.arange(0,6) +a = np.arange(0,6).reshape(2,3) +======================================================================================== +import numpy as np + +pip install numpy +pip install numpy --upgrade + +import numpy as np + +a = np.array([2,3,4]) + +a = np.arange(1, 12, 2) # (from, to, step) + +a = np.linspace(1, 12, 6) # (first, last, num_elements) float data type + +a.reshape(3,2) +a = a.reshape(3,2) + +a.size + +a.shape + +a.dtype + +a.itemsize + +# this works: +b = np.array([(1.5,2,3), (4,5,6)]) + +# but this does not work: +b = np.array(1,2,3) # square brackets are required + +a < 4 # prints True/False + +a * 3 # multiplies each element by 3 +a *= 3 # saves result to a + +a = np.zeros((3,4)) + +a = np.ones((2,3)) + +a = np.array([2,3,4], dtype=np.int16) + +a = np.random.random((2,3)) + +np.set_printoptions(precision=2, suppress=True) # show 2 decimal places, suppress scientific notation + +a = np.random.randint(0,10,5) +a.sum() +a.min() +a.max() +a.mean() +a.var() # variance +a.std() # standard deviation + + +a.sum(axis=1) +a.min(axis=0) + +a.argmin() # index of min element +a.argmax() # index of max element +a.argsort() # returns array of indices that would put the array in sorted order +a.sort() # in place sort + +# indexing, slicing, iterating +a = np.arange(10)**2 +a[2] +a[2:5] + +for i in a: + print (i ** 2) +a[::-1] # reverses array + +for i in a.flat: + print (i) + + +a.transpose() + +a.ravel() # flattens to 1D + +# read in csv data file +data = np.loadtxt("data.txt", dtype=np.uint8, delimiter=",", skiprows=1 ) +# loadtxt does not handle missing values. to handle such exceptions use genfromtxt instead. + +data = np.loadtxt("data.txt", dtype=np.uint8, delimiter=",", skiprows=1, usecols=[0,1,2,3]) + +np.random.shuffle(a) + +a = np.random.random(5) + +np.random.choice(a) + +np.random.random_integers(5,10,2) # (low, high inclusive, size) + + + + diff --git a/Numpy/Numpy.pptx b/Numpy/Numpy.pptx new file mode 100644 index 00000000..329ca480 Binary files /dev/null and b/Numpy/Numpy.pptx differ diff --git a/Numpy/Python Numpy Intro.ipynb b/Numpy/Python Numpy Intro.ipynb new file mode 100644 index 00000000..637478b8 --- /dev/null +++ b/Numpy/Python Numpy Intro.ipynb @@ -0,0 +1,558 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python Numpy Intro\n", + "An introduction to the [Python Numpy](http://www.numpy.org/) numerical python library. \n", + "The core data structure behind Numpy is the n-dimensional [Numpy Array](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html). It is 3x to 10x faster and more memory efficient than Python's lists because, similar to Java arrays, it uses contiguous blocks of memory, and all elements are the same data type so there is no type checking at runtime. The Numpy library also includes many built-in code-saving mathematical functions that can be performed on an entire array or any slice of an array with a single line of code (ie. no for loops). \n", + "Numpy n-dimensional arrays are also sometimes referred to as nd-arrays.\n", + "\n", + "**Install Numpy** using pip: pip install numpy\n", + "The convention for importing numpy is *import numpy as np*.\n", + "\n", + "import numpy as np\n", + "\n", + "### Creating a Numpy Array\n", + "There are MANY ways to instantiate a numpy array. I covered the most common ones below. [Docs here cover more constructors](https://docs.scipy.org/doc/numpy-1.13.0/reference/routines.array-creation.html).\n", + "- Pass in a list to the array() constructor\n", + "- Use the arange function, similar to the range function but used for Numpy arrays. Uses arguments, (start, stop+1, step).\n", + "- Use linspace to create an array of n equally spaced values. Uses arguments (start, stop, number of items).\n", + "- Create an array empty, full of ones or zeros, or full of any fill value. Uses argument (shape) in the form of a tuple. \n", + "\n", + "You can pass in dtype as an optional argument for any of these. This is especially useful if you want to limit memory usage for a very large array of small integers because int8 and int16 use much less space than the default int32." + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1 3 5 7 9 11]\n", + "[ 1 3 5 7 9 11]\n", + "[5. 5.25 5.5 5.75 6. 6.25 6.5 6.75 7. 7.25 7.5 7.75 8. ]\n", + "[[0. 0.]\n", + " [0. 0.]\n", + " [0. 0.]\n", + " [0. 0.]]\n", + "[[1 1 1]\n", + " [1 1 1]]\n", + "[88 88 88 88 88 88]\n", + "[25 30 35 40]\n", + "[[ 1 3 5]\n", + " [ 7 9 11]]\n", + "[[0 0 0]\n", + " [0 0 0]]\n" + ] + } + ], + "source": [ + "a = np.array([1,3,5,7,9,11])\n", + "print(a)\n", + "\n", + "a = np.arange(1, 12, 2) # (start, stop, step)\n", + "print(a)\n", + "\n", + "a = np.linspace(5, 8, 13) # (start, stop, number of items)\n", + "print(a)\n", + "\n", + "a = np.zeros((4, 2))\n", + "print(a)\n", + "\n", + "a = np.ones((2, 3), dtype=np.int16)\n", + "print(a)\n", + "\n", + "a = np.full((6,), 88)\n", + "print(a)\n", + "\n", + "a = np.fromstring('25 30 35 40', dtype=np.int, sep=' ')\n", + "print(a)\n", + "\n", + "a = np.array([[1,3,5],[7,9,11]])\n", + "print(a)\n", + "\n", + "b = np.zeros_like(a) # _like gives you a new array in the same shape as the argument.\n", + "print(b)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Numpy Array Attributes\n", + "Get size (number of items), shape (dimensions), itemsize(bytes of memory for each item), and dtype (numpy data type). \n", + "See how many bytes of memory space the whole array uses from the product of size and itemsize. See [complete list of attributes and methods](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "6\n", + "(2, 3)\n", + "2\n", + "4\n", + "int32\n", + "24\n" + ] + } + ], + "source": [ + "print(a.size)\n", + "print(a.shape)\n", + "print(a.ndim)\n", + "print(a.itemsize)\n", + "print(a.dtype)\n", + "print(a.nbytes) # same as a.size * a.itemsize" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Indexing and Slicing\n", + "Use square brackets to get any item of an array by index. Multi-dimensional arrays can use multiple square brackets.\n", + "\n", + "There are three arguments for slicing arrays, all are optional: [start:stop:step]. \n", + " If start is left blank it defaults to 0. If stop is left blank it defaults to the end of the array. Step defaults to 1." + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 1 3 5]\n", + " [ 7 9 11]]\n", + "[ 7 9 11]\n", + "5\n", + "[]\n", + "[[1 3 5]]\n", + "[[ 7 9 11]]\n", + "[[3]\n", + " [9]]\n" + ] + } + ], + "source": [ + "print(a)\n", + "print(a[1])\n", + "print(a[0][2])\n", + "print(b[2:4])\n", + "\n", + "print(a[:1])\n", + "print(a[1:3:2])\n", + "print(a[:, 1:2]) # all elements on dimension 0, only element 1 on dimension 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reshape, Swap Axes, Flatten\n", + "See full list of [array manipulation routines](https://docs.scipy.org/doc/numpy/reference/routines.array-manipulation.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[-9 -8 -7]\n", + " [-6 -5 -4]]\n", + "[[-9 -6]\n", + " [-8 -5]\n", + " [-7 -4]]\n", + "[-9 -6 -8 -5 -7 -4]\n" + ] + } + ], + "source": [ + "c = np.arange(-9, -3,).reshape(2,3)\n", + "print(c)\n", + "\n", + "c = c.swapaxes(0,1)\n", + "print(c)\n", + "\n", + "c = c.flatten()\n", + "print(c)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Use dtype to Save Space\n", + "Default data types (int32 and float64) are memory hogs. If you don't need the higher precision you can save a lot of memory space and improve speed of operations by using smaller data types. For large data sets this makes a big difference." + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "int32 \n", + "400\n", + "int8 \n", + "100\n" + ] + } + ], + "source": [ + "d = np.arange(0,100)\n", + "print(d.dtype, type(d[1]))\n", + "print(d.nbytes)\n", + "\n", + "d = np.arange(0,100, dtype='int8')\n", + "print(d.dtype, type(d[1]))\n", + "print(d.nbytes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### UpCasting, Rounding, Print Formatting\n", + "Data type of all Items is upcast to the most precise element. " + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "float64\n", + "[[1.57 2. 3. ]\n", + " [4. 5. 6. ]]\n", + "[[1.57 2. 3. ]\n", + " [4. 5. 6. ]]\n" + ] + } + ], + "source": [ + "e = np.array([(1.566666,2,3), (4,5,6)])\n", + "print(e.dtype)\n", + "\n", + "e = e.round(4)\n", + "print(e)\n", + "\n", + "np.set_printoptions(precision=2, suppress=True) # show 2 decimal places, suppress scientific notation\n", + "print(e)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Numpy Data Types Available\n", + "uint is unsigned int, for positive numbers." + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'complex': [, ],\n", + " 'float': [,\n", + " ,\n", + " ],\n", + " 'int': [,\n", + " ,\n", + " ,\n", + " ,\n", + " ],\n", + " 'others': [,\n", + " ,\n", + " ,\n", + " ,\n", + " ],\n", + " 'uint': [,\n", + " ,\n", + " ,\n", + " ,\n", + " ]}\n" + ] + } + ], + "source": [ + "import pprint as pp\n", + "pp.pprint(np.sctypes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reading and Writing to Files\n", + "Can use [loadtxt](https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html#numpy.loadtxt), or [genfromtxt](https://docs.scipy.org/doc/numpy/reference/generated/numpy.genfromtxt.html#numpy.genfromtxt) to load data to load an entire file into an array at once. Genfromtxt is more fault tolerant. \n", + "Use [savetxt](https://docs.scipy.org/doc/numpy/reference/generated/numpy.savetxt.html#numpy.savetxt) to write an array to file." + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[9 3 8 7 6 1 0 4 2 5]\n", + " [1 7 4 9 2 6 8 3 5 0]\n", + " [4 8 3 9 5 7 2 6 0 1]\n", + " [1 7 4 2 5 9 6 8 0 3]\n", + " [0 7 5 2 8 6 3 4 1 9]\n", + " [5 9 1 4 7 0 3 6 8 2]]\n", + "int32\n" + ] + } + ], + "source": [ + "f = np.loadtxt('data.txt', skiprows=1, delimiter=',', dtype=np.int32)\n", + "print(f)\n", + "print(f.dtype)\n", + "\n", + "np.savetxt('data2.txt', f, delimiter=';', fmt='%d', header='a;b;c;d;e;f;g;h;i;j', comments='')" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[9 3 8 7 6 1 0 4 2 5]\n", + " [1 7 4 9 2 6 8 3 5 0]\n", + " [4 8 3 9 5 7 2 6 0 1]\n", + " [1 7 4 2 5 9 6 8 0 3]\n", + " [0 7 5 2 8 6 3 4 1 9]\n", + " [5 9 1 4 7 0 3 6 8 2]]\n" + ] + } + ], + "source": [ + "g = np.genfromtxt('data.txt', skip_header=1, delimiter=',', dtype=np.int32)\n", + "print(g)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mathematical Functions\n", + "Numpy has an extensive list of [math and scientific functions](https://docs.scipy.org/doc/numpy/reference/routines.html). \n", + "The best part is that you don't have to iterate. You can apply an operation to the entire array or a slice of an array at once." + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ True False True True True False False False False True]\n", + " [False True False True False True True False True False]\n", + " [False True False True True True False True False False]\n", + " [False True False False True True True True False False]\n", + " [False True True False True True False False False True]\n", + " [ True True False False True False False True True False]]\n", + "[[80 8 63 48 35 0 -1 15 3 24]\n", + " [ 0 48 15 80 3 35 63 8 24 -1]\n", + " [15 63 8 80 24 48 3 35 -1 0]\n", + " [ 0 48 15 3 24 80 35 63 -1 8]\n", + " [-1 48 24 3 63 35 8 15 0 80]\n", + " [24 80 0 15 48 -1 8 35 63 3]]\n" + ] + } + ], + "source": [ + "print(g > 4)\n", + "print(g ** 2 - 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0\n", + "9\n", + "270\n", + "4.5\n", + "8.25\n", + "2.8722813232690143\n", + "[45 45 45 45 45 45]\n", + "[0 3 1 2 2 0 0 3 0 0]\n", + "6\n", + "0\n", + "[[6 5 8 1 7 9 4 3 2 0]\n", + " [9 0 4 7 2 8 5 1 6 3]\n", + " [8 9 6 2 0 4 7 5 1 3]\n", + " [8 0 3 9 2 4 6 1 7 5]\n", + " [0 8 3 6 7 2 5 1 4 9]\n", + " [5 2 9 6 3 0 7 4 8 1]]\n" + ] + } + ], + "source": [ + "print(g.min())\n", + "print(g.max())\n", + "print(g.sum())\n", + "print(g.mean())\n", + "print(g.var()) # variance\n", + "print(g.std()) # standard deviation\n", + "\n", + "print(g.sum(axis=1))\n", + "print(g.min(axis=0))\n", + "\n", + "print(g.argmin()) # index of min element\n", + "print(g.argmax()) # index of max element\n", + "print(g.argsort()) # returns array of indices that would put the array in sorted order" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Column Operations\n", + "Apply functions only to specific columns by slicing, or create a new array from the columns you want, then work on them. \n", + "But Beware that creating a new pointer to the same data can screw up your data if you're not careful." + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[8]\n", + " [4]\n", + " [3]\n", + " [4]\n", + " [5]\n", + " [1]]\n", + "8\n", + "298.607881119482\n", + "[[ 9 3 8 70000 6 1 0 4 2 5]\n", + " [ 1 7 4 90000 2 6 8 3 5 0]\n", + " [ 4 8 3 90000 5 7 2 6 0 1]\n", + " [ 1 7 4 20000 5 9 6 8 0 3]\n", + " [ 0 7 5 20000 8 6 3 4 1 9]\n", + " [ 5 9 1 40000 7 0 3 6 8 2]]\n" + ] + } + ], + "source": [ + "print(g[:, 2:3])\n", + "print(g[:, 2:3].max())\n", + "\n", + "col3 = g[:, 3:4] # not a copy, just a pointer to a slice of g\n", + "print(col3.std())\n", + "\n", + "col3 *= 100 # Beware: this is applied to g data\n", + "print(g)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Numpy Random Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "np.set_printoptions(precision=5, suppress=True) # show 5 decimal places, suppress scientific notation\n", + "h = np.random.random(6)\n", + "print(h)\n", + "\n", + "h = np.random.randint(10, 99, 8) # (low, high inclusive, size)\n", + "print(h)\n", + "\n", + "np.random.shuffle(h) # in-place shuffle\n", + "print(h)\n", + "\n", + "print(np.random.choice(h))\n", + "\n", + "h.sort() # in-place sort\n", + "print(h)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Numpy/data.txt b/Numpy/data.txt new file mode 100644 index 00000000..d3e350d6 --- /dev/null +++ b/Numpy/data.txt @@ -0,0 +1,7 @@ +a,b,c,d,e,f,g,h,i,j +9,3,8,7,6,1,0,4,2,5 +1,7,4,9,2,6,8,3,5,0 +4,8,3,9,5,7,2,6,0,1 +1,7,4,2,5,9,6,8,0,3 +0,7,5,2,8,6,3,4,1,9 +5,9,1,4,7,0,3,6,8,2 diff --git a/Object Oriented Programming/Python OOP.pptx b/Object Oriented Programming/Python OOP.pptx new file mode 100644 index 00000000..5fcc108d Binary files /dev/null and b/Object Oriented Programming/Python OOP.pptx differ diff --git a/Object Oriented Programming/building our first class.py b/Object Oriented Programming/building our first class.py new file mode 100644 index 00000000..51411d5e --- /dev/null +++ b/Object Oriented Programming/building our first class.py @@ -0,0 +1,20 @@ +#Today we will learn how to create a class and other attributes of class +#Below is the method how classes are defined +class Student: + pass + +#Below is the method to create object , Here Varun and rohan are two objects of Class Student +Varun = Student() +larry = Student() + +# Now after creating objects we can use them to call variables +Varun.name = "Harry" +Varun.std = 12 +Varun.section = 1 +larry.std = 9 +larry.subjects = ["hindi", "physics"] +print(Varun.section, larry.subjects) + + + + diff --git a/Object Oriented Programming/classes.py b/Object Oriented Programming/classes.py new file mode 100644 index 00000000..b5585312 --- /dev/null +++ b/Object Oriented Programming/classes.py @@ -0,0 +1,70 @@ +# from shape_class import * + +class Shape: + def __init__(self, color=None): + self.color = color + + def get_color(self): + return self.color + + def __str__(self): + return self.get_color() + ' Shape' + +class Rectangle(Shape): + def __init__(self, color, length, width): + super().__init__(color) + self.length = length + self.width = width + + def get_area(self): + return self.length * self.width + + def get_perimeter(self): + return 2 * (self.length + self.width) + + def __str__(self): + return self.get_color() + ' ' + str(self.length) + 'x' + str(self.width) + ' ' + type(self).__name__ + +from math import pi +class Circle(Shape): + def __init__(self, color, radius): + super().__init__(color) + self.radius = radius + + def get_area(self): + return pi * self.radius ** 2 + + def get_perimeter(self): + return 2 * pi * self.radius + +def print_shape_data(self): + print('Shape: ', type(self).__name__) + print('Color: ', self.get_color()) + print('Area: ', self.get_area()) + print('Perimeter:', self.get_perimeter()) + +shape = Shape('red') +print('shape is', shape.get_color()) + +rect = Rectangle('blue', 6, 4) +print('rect is', rect.get_color(), ' with area:', rect.get_area(), ' and perimeter:', rect.get_perimeter()) + +circ = Circle('green', 5) +print('circ is', circ.get_color(), ' with area:', circ.get_area(), ' and perimeter:', circ.get_perimeter()) + +print('rect is a', type(rect).__name__) +print('circ is a', type(circ).__name__, '\n') + + +my_new_shape = Rectangle('yellow', 17, 9) +print_shape_data(my_new_shape) + +print(type(my_new_shape)) +print(my_new_shape) + + + + + + + diff --git a/Object Oriented Programming/shape_class.py b/Object Oriented Programming/shape_class.py new file mode 100644 index 00000000..1708e4dd --- /dev/null +++ b/Object Oriented Programming/shape_class.py @@ -0,0 +1,9 @@ +class Shape: + def __init__(self, color=None): + self.color = color + + def get_color(self): + return self.color + + def __str__(self): + return self.get_color() + ' Shape' diff --git a/Pandas/Admission_Predict_Ver1.1.csv b/Pandas/Admission_Predict_Ver1.1.csv new file mode 100644 index 00000000..ff4d3f2a --- /dev/null +++ b/Pandas/Admission_Predict_Ver1.1.csv @@ -0,0 +1,501 @@ +Serial No.,GRE Score,TOEFL Score,University Rating,SOP,LOR,CGPA,Research,Chance of Admit +1,337,118,4,4.5,4.5,9.65,1,0.92 +2,324,107,4,4,4.5,8.87,1,0.76 +3,316,104,3,3,3.5,8,1,0.72 +4,322,110,3,3.5,2.5,8.67,1,0.8 +5,314,103,2,2,3,8.21,0,0.65 +6,330,115,5,4.5,3,9.34,1,0.9 +7,321,109,3,3,4,8.2,1,0.75 +8,308,101,2,3,4,7.9,0,0.68 +9,302,102,1,2,1.5,8,0,0.5 +10,323,108,3,3.5,3,8.6,0,0.45 +11,325,106,3,3.5,4,8.4,1,0.52 +12,327,111,4,4,4.5,9,1,0.84 +13,328,112,4,4,4.5,9.1,1,0.78 +14,307,109,3,4,3,8,1,0.62 +15,311,104,3,3.5,2,8.2,1,0.61 +16,314,105,3,3.5,2.5,8.3,0,0.54 +17,317,107,3,4,3,8.7,0,0.66 +18,319,106,3,4,3,8,1,0.65 +19,318,110,3,4,3,8.8,0,0.63 +20,303,102,3,3.5,3,8.5,0,0.62 +21,312,107,3,3,2,7.9,1,0.64 +22,325,114,4,3,2,8.4,0,0.7 +23,328,116,5,5,5,9.5,1,0.94 +24,334,119,5,5,4.5,9.7,1,0.95 +25,336,119,5,4,3.5,9.8,1,0.97 +26,340,120,5,4.5,4.5,9.6,1,0.94 +27,322,109,5,4.5,3.5,8.8,0,0.76 +28,298,98,2,1.5,2.5,7.5,1,0.44 +29,295,93,1,2,2,7.2,0,0.46 +30,310,99,2,1.5,2,7.3,0,0.54 +31,300,97,2,3,3,8.1,1,0.65 +32,327,103,3,4,4,8.3,1,0.74 +33,338,118,4,3,4.5,9.4,1,0.91 +34,340,114,5,4,4,9.6,1,0.9 +35,331,112,5,4,5,9.8,1,0.94 +36,320,110,5,5,5,9.2,1,0.88 +37,299,106,2,4,4,8.4,0,0.64 +38,300,105,1,1,2,7.8,0,0.58 +39,304,105,1,3,1.5,7.5,0,0.52 +40,307,108,2,4,3.5,7.7,0,0.48 +41,308,110,3,3.5,3,8,1,0.46 +42,316,105,2,2.5,2.5,8.2,1,0.49 +43,313,107,2,2.5,2,8.5,1,0.53 +44,332,117,4,4.5,4,9.1,0,0.87 +45,326,113,5,4.5,4,9.4,1,0.91 +46,322,110,5,5,4,9.1,1,0.88 +47,329,114,5,4,5,9.3,1,0.86 +48,339,119,5,4.5,4,9.7,0,0.89 +49,321,110,3,3.5,5,8.85,1,0.82 +50,327,111,4,3,4,8.4,1,0.78 +51,313,98,3,2.5,4.5,8.3,1,0.76 +52,312,100,2,1.5,3.5,7.9,1,0.56 +53,334,116,4,4,3,8,1,0.78 +54,324,112,4,4,2.5,8.1,1,0.72 +55,322,110,3,3,3.5,8,0,0.7 +56,320,103,3,3,3,7.7,0,0.64 +57,316,102,3,2,3,7.4,0,0.64 +58,298,99,2,4,2,7.6,0,0.46 +59,300,99,1,3,2,6.8,1,0.36 +60,311,104,2,2,2,8.3,0,0.42 +61,309,100,2,3,3,8.1,0,0.48 +62,307,101,3,4,3,8.2,0,0.47 +63,304,105,2,3,3,8.2,1,0.54 +64,315,107,2,4,3,8.5,1,0.56 +65,325,111,3,3,3.5,8.7,0,0.52 +66,325,112,4,3.5,3.5,8.92,0,0.55 +67,327,114,3,3,3,9.02,0,0.61 +68,316,107,2,3.5,3.5,8.64,1,0.57 +69,318,109,3,3.5,4,9.22,1,0.68 +70,328,115,4,4.5,4,9.16,1,0.78 +71,332,118,5,5,5,9.64,1,0.94 +72,336,112,5,5,5,9.76,1,0.96 +73,321,111,5,5,5,9.45,1,0.93 +74,314,108,4,4.5,4,9.04,1,0.84 +75,314,106,3,3,5,8.9,0,0.74 +76,329,114,2,2,4,8.56,1,0.72 +77,327,112,3,3,3,8.72,1,0.74 +78,301,99,2,3,2,8.22,0,0.64 +79,296,95,2,3,2,7.54,1,0.44 +80,294,93,1,1.5,2,7.36,0,0.46 +81,312,105,3,2,3,8.02,1,0.5 +82,340,120,4,5,5,9.5,1,0.96 +83,320,110,5,5,4.5,9.22,1,0.92 +84,322,115,5,4,4.5,9.36,1,0.92 +85,340,115,5,4.5,4.5,9.45,1,0.94 +86,319,103,4,4.5,3.5,8.66,0,0.76 +87,315,106,3,4.5,3.5,8.42,0,0.72 +88,317,107,2,3.5,3,8.28,0,0.66 +89,314,108,3,4.5,3.5,8.14,0,0.64 +90,316,109,4,4.5,3.5,8.76,1,0.74 +91,318,106,2,4,4,7.92,1,0.64 +92,299,97,3,5,3.5,7.66,0,0.38 +93,298,98,2,4,3,8.03,0,0.34 +94,301,97,2,3,3,7.88,1,0.44 +95,303,99,3,2,2.5,7.66,0,0.36 +96,304,100,4,1.5,2.5,7.84,0,0.42 +97,306,100,2,3,3,8,0,0.48 +98,331,120,3,4,4,8.96,1,0.86 +99,332,119,4,5,4.5,9.24,1,0.9 +100,323,113,3,4,4,8.88,1,0.79 +101,322,107,3,3.5,3.5,8.46,1,0.71 +102,312,105,2,2.5,3,8.12,0,0.64 +103,314,106,2,4,3.5,8.25,0,0.62 +104,317,104,2,4.5,4,8.47,0,0.57 +105,326,112,3,3.5,3,9.05,1,0.74 +106,316,110,3,4,4.5,8.78,1,0.69 +107,329,111,4,4.5,4.5,9.18,1,0.87 +108,338,117,4,3.5,4.5,9.46,1,0.91 +109,331,116,5,5,5,9.38,1,0.93 +110,304,103,5,5,4,8.64,0,0.68 +111,305,108,5,3,3,8.48,0,0.61 +112,321,109,4,4,4,8.68,1,0.69 +113,301,107,3,3.5,3.5,8.34,1,0.62 +114,320,110,2,4,3.5,8.56,0,0.72 +115,311,105,3,3.5,3,8.45,1,0.59 +116,310,106,4,4.5,4.5,9.04,1,0.66 +117,299,102,3,4,3.5,8.62,0,0.56 +118,290,104,4,2,2.5,7.46,0,0.45 +119,296,99,2,3,3.5,7.28,0,0.47 +120,327,104,5,3,3.5,8.84,1,0.71 +121,335,117,5,5,5,9.56,1,0.94 +122,334,119,5,4.5,4.5,9.48,1,0.94 +123,310,106,4,1.5,2.5,8.36,0,0.57 +124,308,108,3,3.5,3.5,8.22,0,0.61 +125,301,106,4,2.5,3,8.47,0,0.57 +126,300,100,3,2,3,8.66,1,0.64 +127,323,113,3,4,3,9.32,1,0.85 +128,319,112,3,2.5,2,8.71,1,0.78 +129,326,112,3,3.5,3,9.1,1,0.84 +130,333,118,5,5,5,9.35,1,0.92 +131,339,114,5,4,4.5,9.76,1,0.96 +132,303,105,5,5,4.5,8.65,0,0.77 +133,309,105,5,3.5,3.5,8.56,0,0.71 +134,323,112,5,4,4.5,8.78,0,0.79 +135,333,113,5,4,4,9.28,1,0.89 +136,314,109,4,3.5,4,8.77,1,0.82 +137,312,103,3,5,4,8.45,0,0.76 +138,316,100,2,1.5,3,8.16,1,0.71 +139,326,116,2,4.5,3,9.08,1,0.8 +140,318,109,1,3.5,3.5,9.12,0,0.78 +141,329,110,2,4,3,9.15,1,0.84 +142,332,118,2,4.5,3.5,9.36,1,0.9 +143,331,115,5,4,3.5,9.44,1,0.92 +144,340,120,4,4.5,4,9.92,1,0.97 +145,325,112,2,3,3.5,8.96,1,0.8 +146,320,113,2,2,2.5,8.64,1,0.81 +147,315,105,3,2,2.5,8.48,0,0.75 +148,326,114,3,3,3,9.11,1,0.83 +149,339,116,4,4,3.5,9.8,1,0.96 +150,311,106,2,3.5,3,8.26,1,0.79 +151,334,114,4,4,4,9.43,1,0.93 +152,332,116,5,5,5,9.28,1,0.94 +153,321,112,5,5,5,9.06,1,0.86 +154,324,105,3,3,4,8.75,0,0.79 +155,326,108,3,3,3.5,8.89,0,0.8 +156,312,109,3,3,3,8.69,0,0.77 +157,315,105,3,2,2.5,8.34,0,0.7 +158,309,104,2,2,2.5,8.26,0,0.65 +159,306,106,2,2,2.5,8.14,0,0.61 +160,297,100,1,1.5,2,7.9,0,0.52 +161,315,103,1,1.5,2,7.86,0,0.57 +162,298,99,1,1.5,3,7.46,0,0.53 +163,318,109,3,3,3,8.5,0,0.67 +164,317,105,3,3.5,3,8.56,0,0.68 +165,329,111,4,4.5,4,9.01,1,0.81 +166,322,110,5,4.5,4,8.97,0,0.78 +167,302,102,3,3.5,5,8.33,0,0.65 +168,313,102,3,2,3,8.27,0,0.64 +169,293,97,2,2,4,7.8,1,0.64 +170,311,99,2,2.5,3,7.98,0,0.65 +171,312,101,2,2.5,3.5,8.04,1,0.68 +172,334,117,5,4,4.5,9.07,1,0.89 +173,322,110,4,4,5,9.13,1,0.86 +174,323,113,4,4,4.5,9.23,1,0.89 +175,321,111,4,4,4,8.97,1,0.87 +176,320,111,4,4.5,3.5,8.87,1,0.85 +177,329,119,4,4.5,4.5,9.16,1,0.9 +178,319,110,3,3.5,3.5,9.04,0,0.82 +179,309,108,3,2.5,3,8.12,0,0.72 +180,307,102,3,3,3,8.27,0,0.73 +181,300,104,3,3.5,3,8.16,0,0.71 +182,305,107,2,2.5,2.5,8.42,0,0.71 +183,299,100,2,3,3.5,7.88,0,0.68 +184,314,110,3,4,4,8.8,0,0.75 +185,316,106,2,2.5,4,8.32,0,0.72 +186,327,113,4,4.5,4.5,9.11,1,0.89 +187,317,107,3,3.5,3,8.68,1,0.84 +188,335,118,5,4.5,3.5,9.44,1,0.93 +189,331,115,5,4.5,3.5,9.36,1,0.93 +190,324,112,5,5,5,9.08,1,0.88 +191,324,111,5,4.5,4,9.16,1,0.9 +192,323,110,5,4,5,8.98,1,0.87 +193,322,114,5,4.5,4,8.94,1,0.86 +194,336,118,5,4.5,5,9.53,1,0.94 +195,316,109,3,3.5,3,8.76,0,0.77 +196,307,107,2,3,3.5,8.52,1,0.78 +197,306,105,2,3,2.5,8.26,0,0.73 +198,310,106,2,3.5,2.5,8.33,0,0.73 +199,311,104,3,4.5,4.5,8.43,0,0.7 +200,313,107,3,4,4.5,8.69,0,0.72 +201,317,103,3,2.5,3,8.54,1,0.73 +202,315,110,2,3.5,3,8.46,1,0.72 +203,340,120,5,4.5,4.5,9.91,1,0.97 +204,334,120,5,4,5,9.87,1,0.97 +205,298,105,3,3.5,4,8.54,0,0.69 +206,295,99,2,2.5,3,7.65,0,0.57 +207,315,99,2,3.5,3,7.89,0,0.63 +208,310,102,3,3.5,4,8.02,1,0.66 +209,305,106,2,3,3,8.16,0,0.64 +210,301,104,3,3.5,4,8.12,1,0.68 +211,325,108,4,4.5,4,9.06,1,0.79 +212,328,110,4,5,4,9.14,1,0.82 +213,338,120,4,5,5,9.66,1,0.95 +214,333,119,5,5,4.5,9.78,1,0.96 +215,331,117,4,4.5,5,9.42,1,0.94 +216,330,116,5,5,4.5,9.36,1,0.93 +217,322,112,4,4.5,4.5,9.26,1,0.91 +218,321,109,4,4,4,9.13,1,0.85 +219,324,110,4,3,3.5,8.97,1,0.84 +220,312,104,3,3.5,3.5,8.42,0,0.74 +221,313,103,3,4,4,8.75,0,0.76 +222,316,110,3,3.5,4,8.56,0,0.75 +223,324,113,4,4.5,4,8.79,0,0.76 +224,308,109,2,3,4,8.45,0,0.71 +225,305,105,2,3,2,8.23,0,0.67 +226,296,99,2,2.5,2.5,8.03,0,0.61 +227,306,110,2,3.5,4,8.45,0,0.63 +228,312,110,2,3.5,3,8.53,0,0.64 +229,318,112,3,4,3.5,8.67,0,0.71 +230,324,111,4,3,3,9.01,1,0.82 +231,313,104,3,4,4.5,8.65,0,0.73 +232,319,106,3,3.5,2.5,8.33,1,0.74 +233,312,107,2,2.5,3.5,8.27,0,0.69 +234,304,100,2,2.5,3.5,8.07,0,0.64 +235,330,113,5,5,4,9.31,1,0.91 +236,326,111,5,4.5,4,9.23,1,0.88 +237,325,112,4,4,4.5,9.17,1,0.85 +238,329,114,5,4.5,5,9.19,1,0.86 +239,310,104,3,2,3.5,8.37,0,0.7 +240,299,100,1,1.5,2,7.89,0,0.59 +241,296,101,1,2.5,3,7.68,0,0.6 +242,317,103,2,2.5,2,8.15,0,0.65 +243,324,115,3,3.5,3,8.76,1,0.7 +244,325,114,3,3.5,3,9.04,1,0.76 +245,314,107,2,2.5,4,8.56,0,0.63 +246,328,110,4,4,2.5,9.02,1,0.81 +247,316,105,3,3,3.5,8.73,0,0.72 +248,311,104,2,2.5,3.5,8.48,0,0.71 +249,324,110,3,3.5,4,8.87,1,0.8 +250,321,111,3,3.5,4,8.83,1,0.77 +251,320,104,3,3,2.5,8.57,1,0.74 +252,316,99,2,2.5,3,9,0,0.7 +253,318,100,2,2.5,3.5,8.54,1,0.71 +254,335,115,4,4.5,4.5,9.68,1,0.93 +255,321,114,4,4,5,9.12,0,0.85 +256,307,110,4,4,4.5,8.37,0,0.79 +257,309,99,3,4,4,8.56,0,0.76 +258,324,100,3,4,5,8.64,1,0.78 +259,326,102,4,5,5,8.76,1,0.77 +260,331,119,4,5,4.5,9.34,1,0.9 +261,327,108,5,5,3.5,9.13,1,0.87 +262,312,104,3,3.5,4,8.09,0,0.71 +263,308,103,2,2.5,4,8.36,1,0.7 +264,324,111,3,2.5,1.5,8.79,1,0.7 +265,325,110,2,3,2.5,8.76,1,0.75 +266,313,102,3,2.5,2.5,8.68,0,0.71 +267,312,105,2,2,2.5,8.45,0,0.72 +268,314,107,3,3,3.5,8.17,1,0.73 +269,327,113,4,4.5,5,9.14,0,0.83 +270,308,108,4,4.5,5,8.34,0,0.77 +271,306,105,2,2.5,3,8.22,1,0.72 +272,299,96,2,1.5,2,7.86,0,0.54 +273,294,95,1,1.5,1.5,7.64,0,0.49 +274,312,99,1,1,1.5,8.01,1,0.52 +275,315,100,1,2,2.5,7.95,0,0.58 +276,322,110,3,3.5,3,8.96,1,0.78 +277,329,113,5,5,4.5,9.45,1,0.89 +278,320,101,2,2.5,3,8.62,0,0.7 +279,308,103,2,3,3.5,8.49,0,0.66 +280,304,102,2,3,4,8.73,0,0.67 +281,311,102,3,4.5,4,8.64,1,0.68 +282,317,110,3,4,4.5,9.11,1,0.8 +283,312,106,3,4,3.5,8.79,1,0.81 +284,321,111,3,2.5,3,8.9,1,0.8 +285,340,112,4,5,4.5,9.66,1,0.94 +286,331,116,5,4,4,9.26,1,0.93 +287,336,118,5,4.5,4,9.19,1,0.92 +288,324,114,5,5,4.5,9.08,1,0.89 +289,314,104,4,5,5,9.02,0,0.82 +290,313,109,3,4,3.5,9,0,0.79 +291,307,105,2,2.5,3,7.65,0,0.58 +292,300,102,2,1.5,2,7.87,0,0.56 +293,302,99,2,1,2,7.97,0,0.56 +294,312,98,1,3.5,3,8.18,1,0.64 +295,316,101,2,2.5,2,8.32,1,0.61 +296,317,100,2,3,2.5,8.57,0,0.68 +297,310,107,3,3.5,3.5,8.67,0,0.76 +298,320,120,3,4,4.5,9.11,0,0.86 +299,330,114,3,4.5,4.5,9.24,1,0.9 +300,305,112,3,3,3.5,8.65,0,0.71 +301,309,106,2,2.5,2.5,8,0,0.62 +302,319,108,2,2.5,3,8.76,0,0.66 +303,322,105,2,3,3,8.45,1,0.65 +304,323,107,3,3.5,3.5,8.55,1,0.73 +305,313,106,2,2.5,2,8.43,0,0.62 +306,321,109,3,3.5,3.5,8.8,1,0.74 +307,323,110,3,4,3.5,9.1,1,0.79 +308,325,112,4,4,4,9,1,0.8 +309,312,108,3,3.5,3,8.53,0,0.69 +310,308,110,4,3.5,3,8.6,0,0.7 +311,320,104,3,3,3.5,8.74,1,0.76 +312,328,108,4,4.5,4,9.18,1,0.84 +313,311,107,4,4.5,4.5,9,1,0.78 +314,301,100,3,3.5,3,8.04,0,0.67 +315,305,105,2,3,4,8.13,0,0.66 +316,308,104,2,2.5,3,8.07,0,0.65 +317,298,101,2,1.5,2,7.86,0,0.54 +318,300,99,1,1,2.5,8.01,0,0.58 +319,324,111,3,2.5,2,8.8,1,0.79 +320,327,113,4,3.5,3,8.69,1,0.8 +321,317,106,3,4,3.5,8.5,1,0.75 +322,323,104,3,4,4,8.44,1,0.73 +323,314,107,2,2.5,4,8.27,0,0.72 +324,305,102,2,2,2.5,8.18,0,0.62 +325,315,104,3,3,2.5,8.33,0,0.67 +326,326,116,3,3.5,4,9.14,1,0.81 +327,299,100,3,2,2,8.02,0,0.63 +328,295,101,2,2.5,2,7.86,0,0.69 +329,324,112,4,4,3.5,8.77,1,0.8 +330,297,96,2,2.5,1.5,7.89,0,0.43 +331,327,113,3,3.5,3,8.66,1,0.8 +332,311,105,2,3,2,8.12,1,0.73 +333,308,106,3,3.5,2.5,8.21,1,0.75 +334,319,108,3,3,3.5,8.54,1,0.71 +335,312,107,4,4.5,4,8.65,1,0.73 +336,325,111,4,4,4.5,9.11,1,0.83 +337,319,110,3,3,2.5,8.79,0,0.72 +338,332,118,5,5,5,9.47,1,0.94 +339,323,108,5,4,4,8.74,1,0.81 +340,324,107,5,3.5,4,8.66,1,0.81 +341,312,107,3,3,3,8.46,1,0.75 +342,326,110,3,3.5,3.5,8.76,1,0.79 +343,308,106,3,3,3,8.24,0,0.58 +344,305,103,2,2.5,3.5,8.13,0,0.59 +345,295,96,2,1.5,2,7.34,0,0.47 +346,316,98,1,1.5,2,7.43,0,0.49 +347,304,97,2,1.5,2,7.64,0,0.47 +348,299,94,1,1,1,7.34,0,0.42 +349,302,99,1,2,2,7.25,0,0.57 +350,313,101,3,2.5,3,8.04,0,0.62 +351,318,107,3,3,3.5,8.27,1,0.74 +352,325,110,4,3.5,4,8.67,1,0.73 +353,303,100,2,3,3.5,8.06,1,0.64 +354,300,102,3,3.5,2.5,8.17,0,0.63 +355,297,98,2,2.5,3,7.67,0,0.59 +356,317,106,2,2,3.5,8.12,0,0.73 +357,327,109,3,3.5,4,8.77,1,0.79 +358,301,104,2,3.5,3.5,7.89,1,0.68 +359,314,105,2,2.5,2,7.64,0,0.7 +360,321,107,2,2,1.5,8.44,0,0.81 +361,322,110,3,4,5,8.64,1,0.85 +362,334,116,4,4,3.5,9.54,1,0.93 +363,338,115,5,4.5,5,9.23,1,0.91 +364,306,103,2,2.5,3,8.36,0,0.69 +365,313,102,3,3.5,4,8.9,1,0.77 +366,330,114,4,4.5,3,9.17,1,0.86 +367,320,104,3,3.5,4.5,8.34,1,0.74 +368,311,98,1,1,2.5,7.46,0,0.57 +369,298,92,1,2,2,7.88,0,0.51 +370,301,98,1,2,3,8.03,1,0.67 +371,310,103,2,2.5,2.5,8.24,0,0.72 +372,324,110,3,3.5,3,9.22,1,0.89 +373,336,119,4,4.5,4,9.62,1,0.95 +374,321,109,3,3,3,8.54,1,0.79 +375,315,105,2,2,2.5,7.65,0,0.39 +376,304,101,2,2,2.5,7.66,0,0.38 +377,297,96,2,2.5,2,7.43,0,0.34 +378,290,100,1,1.5,2,7.56,0,0.47 +379,303,98,1,2,2.5,7.65,0,0.56 +380,311,99,1,2.5,3,8.43,1,0.71 +381,322,104,3,3.5,4,8.84,1,0.78 +382,319,105,3,3,3.5,8.67,1,0.73 +383,324,110,4,4.5,4,9.15,1,0.82 +384,300,100,3,3,3.5,8.26,0,0.62 +385,340,113,4,5,5,9.74,1,0.96 +386,335,117,5,5,5,9.82,1,0.96 +387,302,101,2,2.5,3.5,7.96,0,0.46 +388,307,105,2,2,3.5,8.1,0,0.53 +389,296,97,2,1.5,2,7.8,0,0.49 +390,320,108,3,3.5,4,8.44,1,0.76 +391,314,102,2,2,2.5,8.24,0,0.64 +392,318,106,3,2,3,8.65,0,0.71 +393,326,112,4,4,3.5,9.12,1,0.84 +394,317,104,2,3,3,8.76,0,0.77 +395,329,111,4,4.5,4,9.23,1,0.89 +396,324,110,3,3.5,3.5,9.04,1,0.82 +397,325,107,3,3,3.5,9.11,1,0.84 +398,330,116,4,5,4.5,9.45,1,0.91 +399,312,103,3,3.5,4,8.78,0,0.67 +400,333,117,4,5,4,9.66,1,0.95 +401,304,100,2,3.5,3,8.22,0,0.63 +402,315,105,2,3,3,8.34,0,0.66 +403,324,109,3,3.5,3,8.94,1,0.78 +404,330,116,4,4,3.5,9.23,1,0.91 +405,311,101,3,2,2.5,7.64,1,0.62 +406,302,99,3,2.5,3,7.45,0,0.52 +407,322,103,4,3,2.5,8.02,1,0.61 +408,298,100,3,2.5,4,7.95,1,0.58 +409,297,101,3,2,4,7.67,1,0.57 +410,300,98,1,2,2.5,8.02,0,0.61 +411,301,96,1,3,4,7.56,0,0.54 +412,313,94,2,2.5,1.5,8.13,0,0.56 +413,314,102,4,2.5,2,7.88,1,0.59 +414,317,101,3,3,2,7.94,1,0.49 +415,321,110,4,3.5,4,8.35,1,0.72 +416,327,106,4,4,4.5,8.75,1,0.76 +417,315,104,3,4,2.5,8.1,0,0.65 +418,316,103,3,3.5,2,7.68,0,0.52 +419,309,111,2,2.5,4,8.03,0,0.6 +420,308,102,2,2,3.5,7.98,1,0.58 +421,299,100,3,2,3,7.42,0,0.42 +422,321,112,3,3,4.5,8.95,1,0.77 +423,322,112,4,3.5,2.5,9.02,1,0.73 +424,334,119,5,4.5,5,9.54,1,0.94 +425,325,114,5,4,5,9.46,1,0.91 +426,323,111,5,4,5,9.86,1,0.92 +427,312,106,3,3,5,8.57,0,0.71 +428,310,101,3,3.5,5,8.65,1,0.71 +429,316,103,2,2,4.5,8.74,0,0.69 +430,340,115,5,5,4.5,9.06,1,0.95 +431,311,104,3,4,3.5,8.13,1,0.74 +432,320,112,2,3.5,3.5,8.78,1,0.73 +433,324,112,4,4.5,4,9.22,1,0.86 +434,316,111,4,4,5,8.54,0,0.71 +435,306,103,3,3.5,3,8.21,0,0.64 +436,309,105,2,2.5,4,7.68,0,0.55 +437,310,110,1,1.5,4,7.23,1,0.58 +438,317,106,1,1.5,3.5,7.65,1,0.61 +439,318,110,1,2.5,3.5,8.54,1,0.67 +440,312,105,2,1.5,3,8.46,0,0.66 +441,305,104,2,2.5,1.5,7.79,0,0.53 +442,332,112,1,1.5,3,8.66,1,0.79 +443,331,116,4,4.5,4.5,9.44,1,0.92 +444,321,114,5,4.5,4.5,9.16,1,0.87 +445,324,113,5,4,5,9.25,1,0.92 +446,328,116,5,4.5,5,9.08,1,0.91 +447,327,118,4,5,5,9.67,1,0.93 +448,320,108,3,3.5,5,8.97,1,0.84 +449,312,109,2,2.5,4,9.02,0,0.8 +450,315,101,3,3.5,4.5,9.13,0,0.79 +451,320,112,4,3,4.5,8.86,1,0.82 +452,324,113,4,4.5,4.5,9.25,1,0.89 +453,328,116,4,5,3.5,9.6,1,0.93 +454,319,103,3,2.5,4,8.76,1,0.73 +455,310,105,2,3,3.5,8.01,0,0.71 +456,305,102,2,1.5,2.5,7.64,0,0.59 +457,299,100,2,2,2,7.88,0,0.51 +458,295,99,1,2,1.5,7.57,0,0.37 +459,312,100,1,3,3,8.53,1,0.69 +460,329,113,4,4,3.5,9.36,1,0.89 +461,319,105,4,4,4.5,8.66,1,0.77 +462,301,102,3,2.5,2,8.13,1,0.68 +463,307,105,4,3,3,7.94,0,0.62 +464,304,107,3,3.5,3,7.86,0,0.57 +465,298,97,2,2,3,7.21,0,0.45 +466,305,96,4,3,4.5,8.26,0,0.54 +467,314,99,4,3.5,4.5,8.73,1,0.71 +468,318,101,5,3.5,5,8.78,1,0.78 +469,323,110,4,4,5,8.88,1,0.81 +470,326,114,4,4,3.5,9.16,1,0.86 +471,320,110,5,4,4,9.27,1,0.87 +472,311,103,3,2,4,8.09,0,0.64 +473,327,116,4,4,4.5,9.48,1,0.9 +474,316,102,2,4,3.5,8.15,0,0.67 +475,308,105,4,3,2.5,7.95,1,0.67 +476,300,101,3,3.5,2.5,7.88,0,0.59 +477,304,104,3,2.5,2,8.12,0,0.62 +478,309,105,4,3.5,2,8.18,0,0.65 +479,318,103,3,4,4.5,8.49,1,0.71 +480,325,110,4,4.5,4,8.96,1,0.79 +481,321,102,3,3.5,4,9.01,1,0.8 +482,323,107,4,3,2.5,8.48,1,0.78 +483,328,113,4,4,2.5,8.77,1,0.83 +484,304,103,5,5,3,7.92,0,0.71 +485,317,106,3,3.5,3,7.89,1,0.73 +486,311,101,2,2.5,3.5,8.34,1,0.7 +487,319,102,3,2.5,2.5,8.37,0,0.68 +488,327,115,4,3.5,4,9.14,0,0.79 +489,322,112,3,3,4,8.62,1,0.76 +490,302,110,3,4,4.5,8.5,0,0.65 +491,307,105,2,2.5,4.5,8.12,1,0.67 +492,297,99,4,3,3.5,7.81,0,0.54 +493,298,101,4,2.5,4.5,7.69,1,0.53 +494,300,95,2,3,1.5,8.22,1,0.62 +495,301,99,3,2.5,2,8.45,1,0.68 +496,332,108,5,4.5,4,9.02,1,0.87 +497,337,117,5,5,5,9.87,1,0.96 +498,330,120,5,4.5,5,9.56,1,0.93 +499,312,103,4,4,5,8.43,0,0.73 +500,327,113,4,4.5,4.5,9.04,0,0.84 \ No newline at end of file diff --git a/Pandas/Fremont_weather.txt b/Pandas/Fremont_weather.txt new file mode 100644 index 00000000..e776cdc9 --- /dev/null +++ b/Pandas/Fremont_weather.txt @@ -0,0 +1,13 @@ +month,avg_high,avg_low,record_high,record_low,avg_precipitation +Jan,58,42,74,22,2.95 +Feb,61,45,78,26,3.02 +Mar,65,48,84,25,2.34 +Apr,67,50,92,28,1.02 +May,71,53,98,35,0.48 +Jun,75,56,107,41,0.11 +Jul,77,58,105,44,0.0 +Aug,77,59,102,43,0.03 +Sep,77,57,103,40,0.17 +Oct,73,54,96,34,0.81 +Nov,64,48,84,30,1.7 +Dec,58,42,73,21,2.56 \ No newline at end of file diff --git a/Pandas/Pandas - Change Column Names.ipynb b/Pandas/Pandas - Change Column Names.ipynb new file mode 100644 index 00000000..aa84e79c --- /dev/null +++ b/Pandas/Pandas - Change Column Names.ipynb @@ -0,0 +1,364 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pandas - Change Column Names" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDLabel
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
..................
1456.73.05.22.3Iris-virginica
1466.32.55.01.9Iris-virginica
1476.53.05.22.0Iris-virginica
1486.23.45.42.3Iris-virginica
1495.93.05.11.8Iris-virginica
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " A B C D Label\n", + "0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 5.0 3.6 1.4 0.2 Iris-setosa\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3 Iris-virginica\n", + "146 6.3 2.5 5.0 1.9 Iris-virginica\n", + "147 6.5 3.0 5.2 2.0 Iris-virginica\n", + "148 6.2 3.4 5.4 2.3 Iris-virginica\n", + "149 5.9 3.0 5.1 1.8 Iris-virginica\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('iris.data', names=['A','B','C','D','Label'])\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "axis=1 tells Pandas it's column names. \n", + "inplace=True tells Pandas to save the changes to our DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
aabbCDLabel
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
..................
1456.73.05.22.3Iris-virginica
1466.32.55.01.9Iris-virginica
1476.53.05.22.0Iris-virginica
1486.23.45.42.3Iris-virginica
1495.93.05.11.8Iris-virginica
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " aa bb C D Label\n", + "0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 5.0 3.6 1.4 0.2 Iris-setosa\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3 Iris-virginica\n", + "146 6.3 2.5 5.0 1.9 Iris-virginica\n", + "147 6.5 3.0 5.2 2.0 Iris-virginica\n", + "148 6.2 3.4 5.4 2.3 Iris-virginica\n", + "149 5.9 3.0 5.1 1.8 Iris-virginica\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.rename({'A':'aa', 'B':'bb'}, axis=1, inplace=True)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Pandas/Pandas - Delete Columns from DataFrame.ipynb b/Pandas/Pandas - Delete Columns from DataFrame.ipynb new file mode 100644 index 00000000..28293203 --- /dev/null +++ b/Pandas/Pandas - Delete Columns from DataFrame.ipynb @@ -0,0 +1,757 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pandas - Delete Columns from a DataFrame" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDLabel
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
..................
1456.73.05.22.3Iris-virginica
1466.32.55.01.9Iris-virginica
1476.53.05.22.0Iris-virginica
1486.23.45.42.3Iris-virginica
1495.93.05.11.8Iris-virginica
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " A B C D Label\n", + "0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 5.0 3.6 1.4 0.2 Iris-setosa\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3 Iris-virginica\n", + "146 6.3 2.5 5.0 1.9 Iris-virginica\n", + "147 6.5 3.0 5.2 2.0 Iris-virginica\n", + "148 6.2 3.4 5.4 2.3 Iris-virginica\n", + "149 5.9 3.0 5.1 1.8 Iris-virginica\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('iris.data', names=['A','B','C','D','Label'])\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1) to drop a single column\n, and saving it permanently ", + "df.drop('col_name', axis=1,inplace =True) \n", + "To save changes you must either set df = df.drop(), or add inplace=True." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ACDLabel
05.11.40.2Iris-setosa
14.91.40.2Iris-setosa
24.71.30.2Iris-setosa
34.61.50.2Iris-setosa
45.01.40.2Iris-setosa
...............
1456.75.22.3Iris-virginica
1466.35.01.9Iris-virginica
1476.55.22.0Iris-virginica
1486.25.42.3Iris-virginica
1495.95.11.8Iris-virginica
\n", + "

150 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " A C D Label\n", + "0 5.1 1.4 0.2 Iris-setosa\n", + "1 4.9 1.4 0.2 Iris-setosa\n", + "2 4.7 1.3 0.2 Iris-setosa\n", + "3 4.6 1.5 0.2 Iris-setosa\n", + "4 5.0 1.4 0.2 Iris-setosa\n", + ".. ... ... ... ...\n", + "145 6.7 5.2 2.3 Iris-virginica\n", + "146 6.3 5.0 1.9 Iris-virginica\n", + "147 6.5 5.2 2.0 Iris-virginica\n", + "148 6.2 5.4 2.3 Iris-virginica\n", + "149 5.9 5.1 1.8 Iris-virginica\n", + "\n", + "[150 rows x 4 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop('B', axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDLabel
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
..................
1456.73.05.22.3Iris-virginica
1466.32.55.01.9Iris-virginica
1476.53.05.22.0Iris-virginica
1486.23.45.42.3Iris-virginica
1495.93.05.11.8Iris-virginica
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " A B C D Label\n", + "0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 5.0 3.6 1.4 0.2 Iris-setosa\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3 Iris-virginica\n", + "146 6.3 2.5 5.0 1.9 Iris-virginica\n", + "147 6.5 3.0 5.2 2.0 Iris-virginica\n", + "148 6.2 3.4 5.4 2.3 Iris-virginica\n", + "149 5.9 3.0 5.1 1.8 Iris-virginica\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2) to drop multiple axes by name\n", + "df.drop(['col_name1', 'col_name2'], axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
BCD
03.51.40.2
13.01.40.2
23.21.30.2
33.11.50.2
43.61.40.2
............
1453.05.22.3
1462.55.01.9
1473.05.22.0
1483.45.42.3
1493.05.11.8
\n", + "

150 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " B C D\n", + "0 3.5 1.4 0.2\n", + "1 3.0 1.4 0.2\n", + "2 3.2 1.3 0.2\n", + "3 3.1 1.5 0.2\n", + "4 3.6 1.4 0.2\n", + ".. ... ... ...\n", + "145 3.0 5.2 2.3\n", + "146 2.5 5.0 1.9\n", + "147 3.0 5.2 2.0\n", + "148 3.4 5.4 2.3\n", + "149 3.0 5.1 1.8\n", + "\n", + "[150 rows x 3 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop(['A','Label'], axis=1, inplace=True)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3) to drop multiple axes by numerical column index\n", + "df.drop(df.columns[[idx1, idx2]], axis=1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
C
01.4
11.4
21.3
31.5
41.4
......
1455.2
1465.0
1475.2
1485.4
1495.1
\n", + "

150 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " C\n", + "0 1.4\n", + "1 1.4\n", + "2 1.3\n", + "3 1.5\n", + "4 1.4\n", + ".. ...\n", + "145 5.2\n", + "146 5.0\n", + "147 5.2\n", + "148 5.4\n", + "149 5.1\n", + "\n", + "[150 rows x 1 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.drop(df.columns[[0, 2]], axis=1, inplace=True)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Pandas/Pandas - Delete NaN Rows from DataFrame.ipynb b/Pandas/Pandas - Delete NaN Rows from DataFrame.ipynb new file mode 100644 index 00000000..79d4b0af --- /dev/null +++ b/Pandas/Pandas - Delete NaN Rows from DataFrame.ipynb @@ -0,0 +1,655 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pandas - Delete NaN Rows from DataFrame" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "[StackOverflow Thread](https://stackoverflow.com/questions/13413590/how-to-drop-rows-of-pandas-dataframe-whose-value-in-a-certain-column-is-nan)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
0-1.358720-0.4939670.258351
10.3383470.498426-0.050406
20.821865-0.376314-1.504425
3-0.5518760.6945950.540055
4-0.493108-0.5702550.160218
5-0.705264-1.9409840.321970
60.2571330.642613-0.416996
7-1.391762-1.689991-1.804575
81.459479-0.731590-0.061360
91.314401-1.666592-1.778455
10-0.353425-0.654705-0.155042
11-0.898696-0.7568020.539142
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "0 -1.358720 -0.493967 0.258351\n", + "1 0.338347 0.498426 -0.050406\n", + "2 0.821865 -0.376314 -1.504425\n", + "3 -0.551876 0.694595 0.540055\n", + "4 -0.493108 -0.570255 0.160218\n", + "5 -0.705264 -1.940984 0.321970\n", + "6 0.257133 0.642613 -0.416996\n", + "7 -1.391762 -1.689991 -1.804575\n", + "8 1.459479 -0.731590 -0.061360\n", + "9 1.314401 -1.666592 -1.778455\n", + "10 -0.353425 -0.654705 -0.155042\n", + "11 -0.898696 -0.756802 0.539142" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(np.random.randn(12,3))\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
0NaNNaNNaN
10.3383470.498426-0.050406
2NaN-0.376314-1.504425
3-0.551876NaN0.540055
4NaN-0.570255NaN
5-0.705264-1.9409840.321970
6NaNNaN-0.416996
7-1.391762-1.689991-1.804575
8NaN-0.731590NaN
91.314401NaN-1.778455
10NaN-0.654705-0.155042
11-0.898696-0.7568020.539142
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "0 NaN NaN NaN\n", + "1 0.338347 0.498426 -0.050406\n", + "2 NaN -0.376314 -1.504425\n", + "3 -0.551876 NaN 0.540055\n", + "4 NaN -0.570255 NaN\n", + "5 -0.705264 -1.940984 0.321970\n", + "6 NaN NaN -0.416996\n", + "7 -1.391762 -1.689991 -1.804575\n", + "8 NaN -0.731590 NaN\n", + "9 1.314401 NaN -1.778455\n", + "10 NaN -0.654705 -0.155042\n", + "11 -0.898696 -0.756802 0.539142" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.iloc[::2,0] = np.nan\n", + "df.iloc[::3,1] = np.nan\n", + "df.iloc[::4,2] = np.nan\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**- Drop all rows that have *any* NaN values**" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
10.3383470.498426-0.050406
5-0.705264-1.9409840.321970
7-1.391762-1.689991-1.804575
11-0.898696-0.7568020.539142
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "1 0.338347 0.498426 -0.050406\n", + "5 -0.705264 -1.940984 0.321970\n", + "7 -1.391762 -1.689991 -1.804575\n", + "11 -0.898696 -0.756802 0.539142" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dropna()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**- drop only if *all* columns are NaN**" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
10.3383470.498426-0.050406
2NaN-0.376314-1.504425
3-0.551876NaN0.540055
4NaN-0.570255NaN
5-0.705264-1.9409840.321970
6NaNNaN-0.416996
7-1.391762-1.689991-1.804575
8NaN-0.731590NaN
91.314401NaN-1.778455
10NaN-0.654705-0.155042
11-0.898696-0.7568020.539142
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "1 0.338347 0.498426 -0.050406\n", + "2 NaN -0.376314 -1.504425\n", + "3 -0.551876 NaN 0.540055\n", + "4 NaN -0.570255 NaN\n", + "5 -0.705264 -1.940984 0.321970\n", + "6 NaN NaN -0.416996\n", + "7 -1.391762 -1.689991 -1.804575\n", + "8 NaN -0.731590 NaN\n", + "9 1.314401 NaN -1.778455\n", + "10 NaN -0.654705 -0.155042\n", + "11 -0.898696 -0.756802 0.539142" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dropna(how='all')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**- Drop only if NaN in a specific column**" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
012
10.3383470.498426-0.050406
2NaN-0.376314-1.504425
3-0.551876NaN0.540055
5-0.705264-1.9409840.321970
6NaNNaN-0.416996
7-1.391762-1.689991-1.804575
91.314401NaN-1.778455
10NaN-0.654705-0.155042
11-0.898696-0.7568020.539142
\n", + "
" + ], + "text/plain": [ + " 0 1 2\n", + "1 0.338347 0.498426 -0.050406\n", + "2 NaN -0.376314 -1.504425\n", + "3 -0.551876 NaN 0.540055\n", + "5 -0.705264 -1.940984 0.321970\n", + "6 NaN NaN -0.416996\n", + "7 -1.391762 -1.689991 -1.804575\n", + "9 1.314401 NaN -1.778455\n", + "10 NaN -0.654705 -0.155042\n", + "11 -0.898696 -0.756802 0.539142" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.dropna(subset=[2])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Pandas/Pandas - Iterate Rows of a DataFrame.ipynb b/Pandas/Pandas - Iterate Rows of a DataFrame.ipynb new file mode 100644 index 00000000..e911a4be --- /dev/null +++ b/Pandas/Pandas - Iterate Rows of a DataFrame.ipynb @@ -0,0 +1,723 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pandas - Iterate Rows of a DataFrame " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01234
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
..................
1456.73.05.22.3Iris-virginica
1466.32.55.01.9Iris-virginica
1476.53.05.22.0Iris-virginica
1486.23.45.42.3Iris-virginica
1495.93.05.11.8Iris-virginica
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4\n", + "0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 5.0 3.6 1.4 0.2 Iris-setosa\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3 Iris-virginica\n", + "146 6.3 2.5 5.0 1.9 Iris-virginica\n", + "147 6.5 3.0 5.2 2.0 Iris-virginica\n", + "148 6.2 3.4 5.4 2.3 Iris-virginica\n", + "149 5.9 3.0 5.1 1.8 Iris-virginica\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('iris.data', header=None)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get a List of one Column\n", + "Use a list comprehension." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[1.4,\n", + " 1.4,\n", + " 1.3,\n", + " 1.5,\n", + " 1.4,\n", + " 1.7,\n", + " 1.4,\n", + " 1.5,\n", + " 1.4,\n", + " 1.5,\n", + " 1.5,\n", + " 1.6,\n", + " 1.4,\n", + " 1.1,\n", + " 1.2,\n", + " 1.5,\n", + " 1.3,\n", + " 1.4,\n", + " 1.7,\n", + " 1.5,\n", + " 1.7,\n", + " 1.5,\n", + " 1.0,\n", + " 1.7,\n", + " 1.9,\n", + " 1.6,\n", + " 1.6,\n", + " 1.5,\n", + " 1.4,\n", + " 1.6,\n", + " 1.6,\n", + " 1.5,\n", + " 1.5,\n", + " 1.4,\n", + " 1.5,\n", + " 1.2,\n", + " 1.3,\n", + " 1.5,\n", + " 1.3,\n", + " 1.5,\n", + " 1.3,\n", + " 1.3,\n", + " 1.3,\n", + " 1.6,\n", + " 1.9,\n", + " 1.4,\n", + " 1.6,\n", + " 1.4,\n", + " 1.5,\n", + " 1.4,\n", + " 4.7,\n", + " 4.5,\n", + " 4.9,\n", + " 4.0,\n", + " 4.6,\n", + " 4.5,\n", + " 4.7,\n", + " 3.3,\n", + " 4.6,\n", + " 3.9,\n", + " 3.5,\n", + " 4.2,\n", + " 4.0,\n", + " 4.7,\n", + " 3.6,\n", + " 4.4,\n", + " 4.5,\n", + " 4.1,\n", + " 4.5,\n", + " 3.9,\n", + " 4.8,\n", + " 4.0,\n", + " 4.9,\n", + " 4.7,\n", + " 4.3,\n", + " 4.4,\n", + " 4.8,\n", + " 5.0,\n", + " 4.5,\n", + " 3.5,\n", + " 3.8,\n", + " 3.7,\n", + " 3.9,\n", + " 5.1,\n", + " 4.5,\n", + " 4.5,\n", + " 4.7,\n", + " 4.4,\n", + " 4.1,\n", + " 4.0,\n", + " 4.4,\n", + " 4.6,\n", + " 4.0,\n", + " 3.3,\n", + " 4.2,\n", + " 4.2,\n", + " 4.2,\n", + " 4.3,\n", + " 3.0,\n", + " 4.1,\n", + " 6.0,\n", + " 5.1,\n", + " 5.9,\n", + " 5.6,\n", + " 5.8,\n", + " 6.6,\n", + " 4.5,\n", + " 6.3,\n", + " 5.8,\n", + " 6.1,\n", + " 5.1,\n", + " 5.3,\n", + " 5.5,\n", + " 5.0,\n", + " 5.1,\n", + " 5.3,\n", + " 5.5,\n", + " 6.7,\n", + " 6.9,\n", + " 5.0,\n", + " 5.7,\n", + " 4.9,\n", + " 6.7,\n", + " 4.9,\n", + " 5.7,\n", + " 6.0,\n", + " 4.8,\n", + " 4.9,\n", + " 5.6,\n", + " 5.8,\n", + " 6.1,\n", + " 6.4,\n", + " 5.6,\n", + " 5.1,\n", + " 5.6,\n", + " 6.1,\n", + " 5.6,\n", + " 5.5,\n", + " 4.8,\n", + " 5.4,\n", + " 5.6,\n", + " 5.1,\n", + " 5.1,\n", + " 5.9,\n", + " 5.7,\n", + " 5.2,\n", + " 5.0,\n", + " 5.2,\n", + " 5.4,\n", + " 5.1]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "col2 = [x for x in df[2]]\n", + "col2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Iterate Rows with Index and each Row as a List\n", + "**DO NOT** try to change data in the df this way, but it is convenient for iterating. \n", + "Itertuples is supposed to be much faster than Iterrows for large datasets." + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 5.0 3.6 1.4 0.2 Iris-setosa\n", + "5 5.4 3.9 1.7 0.4 Iris-setosa\n", + "6 4.6 3.4 1.4 0.3 Iris-setosa\n", + "7 5.0 3.4 1.5 0.2 Iris-setosa\n", + "8 4.4 2.9 1.4 0.2 Iris-setosa\n", + "9 4.9 3.1 1.5 0.1 Iris-setosa\n", + "10 5.4 3.7 1.5 0.2 Iris-setosa\n", + "11 4.8 3.4 1.6 0.2 Iris-setosa\n", + "12 4.8 3.0 1.4 0.1 Iris-setosa\n", + "13 4.3 3.0 1.1 0.1 Iris-setosa\n", + "14 5.8 4.0 1.2 0.2 Iris-setosa\n", + "15 5.7 4.4 1.5 0.4 Iris-setosa\n", + "16 5.4 3.9 1.3 0.4 Iris-setosa\n", + "17 5.1 3.5 1.4 0.3 Iris-setosa\n", + "18 5.7 3.8 1.7 0.3 Iris-setosa\n", + "19 5.1 3.8 1.5 0.3 Iris-setosa\n", + "20 5.4 3.4 1.7 0.2 Iris-setosa\n", + "21 5.1 3.7 1.5 0.4 Iris-setosa\n", + "22 4.6 3.6 1.0 0.2 Iris-setosa\n", + "23 5.1 3.3 1.7 0.5 Iris-setosa\n", + "24 4.8 3.4 1.9 0.2 Iris-setosa\n", + "25 5.0 3.0 1.6 0.2 Iris-setosa\n", + "26 5.0 3.4 1.6 0.4 Iris-setosa\n", + "27 5.2 3.5 1.5 0.2 Iris-setosa\n", + "28 5.2 3.4 1.4 0.2 Iris-setosa\n", + "29 4.7 3.2 1.6 0.2 Iris-setosa\n", + "30 4.8 3.1 1.6 0.2 Iris-setosa\n", + "31 5.4 3.4 1.5 0.4 Iris-setosa\n", + "32 5.2 4.1 1.5 0.1 Iris-setosa\n", + "33 5.5 4.2 1.4 0.2 Iris-setosa\n", + "34 4.9 3.1 1.5 0.1 Iris-setosa\n", + "35 5.0 3.2 1.2 0.2 Iris-setosa\n", + "36 5.5 3.5 1.3 0.2 Iris-setosa\n", + "37 4.9 3.1 1.5 0.1 Iris-setosa\n", + "38 4.4 3.0 1.3 0.2 Iris-setosa\n", + "39 5.1 3.4 1.5 0.2 Iris-setosa\n", + "40 5.0 3.5 1.3 0.3 Iris-setosa\n", + "41 4.5 2.3 1.3 0.3 Iris-setosa\n", + "42 4.4 3.2 1.3 0.2 Iris-setosa\n", + "43 5.0 3.5 1.6 0.6 Iris-setosa\n", + "44 5.1 3.8 1.9 0.4 Iris-setosa\n", + "45 4.8 3.0 1.4 0.3 Iris-setosa\n", + "46 5.1 3.8 1.6 0.2 Iris-setosa\n", + "47 4.6 3.2 1.4 0.2 Iris-setosa\n", + "48 5.3 3.7 1.5 0.2 Iris-setosa\n", + "49 5.0 3.3 1.4 0.2 Iris-setosa\n", + "50 7.0 3.2 4.7 1.4 Iris-versicolor\n", + "51 6.4 3.2 4.5 1.5 Iris-versicolor\n", + "52 6.9 3.1 4.9 1.5 Iris-versicolor\n", + "53 5.5 2.3 4.0 1.3 Iris-versicolor\n", + "54 6.5 2.8 4.6 1.5 Iris-versicolor\n", + "55 5.7 2.8 4.5 1.3 Iris-versicolor\n", + "56 6.3 3.3 4.7 1.6 Iris-versicolor\n", + "57 4.9 2.4 3.3 1.0 Iris-versicolor\n", + "58 6.6 2.9 4.6 1.3 Iris-versicolor\n", + "59 5.2 2.7 3.9 1.4 Iris-versicolor\n", + "60 5.0 2.0 3.5 1.0 Iris-versicolor\n", + "61 5.9 3.0 4.2 1.5 Iris-versicolor\n", + "62 6.0 2.2 4.0 1.0 Iris-versicolor\n", + "63 6.1 2.9 4.7 1.4 Iris-versicolor\n", + "64 5.6 2.9 3.6 1.3 Iris-versicolor\n", + "65 6.7 3.1 4.4 1.4 Iris-versicolor\n", + "66 5.6 3.0 4.5 1.5 Iris-versicolor\n", + "67 5.8 2.7 4.1 1.0 Iris-versicolor\n", + "68 6.2 2.2 4.5 1.5 Iris-versicolor\n", + "69 5.6 2.5 3.9 1.1 Iris-versicolor\n", + "70 5.9 3.2 4.8 1.8 Iris-versicolor\n", + "71 6.1 2.8 4.0 1.3 Iris-versicolor\n", + "72 6.3 2.5 4.9 1.5 Iris-versicolor\n", + "73 6.1 2.8 4.7 1.2 Iris-versicolor\n", + "74 6.4 2.9 4.3 1.3 Iris-versicolor\n", + "75 6.6 3.0 4.4 1.4 Iris-versicolor\n", + "76 6.8 2.8 4.8 1.4 Iris-versicolor\n", + "77 6.7 3.0 5.0 1.7 Iris-versicolor\n", + "78 6.0 2.9 4.5 1.5 Iris-versicolor\n", + "79 5.7 2.6 3.5 1.0 Iris-versicolor\n", + "80 5.5 2.4 3.8 1.1 Iris-versicolor\n", + "81 5.5 2.4 3.7 1.0 Iris-versicolor\n", + "82 5.8 2.7 3.9 1.2 Iris-versicolor\n", + "83 6.0 2.7 5.1 1.6 Iris-versicolor\n", + "84 5.4 3.0 4.5 1.5 Iris-versicolor\n", + "85 6.0 3.4 4.5 1.6 Iris-versicolor\n", + "86 6.7 3.1 4.7 1.5 Iris-versicolor\n", + "87 6.3 2.3 4.4 1.3 Iris-versicolor\n", + "88 5.6 3.0 4.1 1.3 Iris-versicolor\n", + "89 5.5 2.5 4.0 1.3 Iris-versicolor\n", + "90 5.5 2.6 4.4 1.2 Iris-versicolor\n", + "91 6.1 3.0 4.6 1.4 Iris-versicolor\n", + "92 5.8 2.6 4.0 1.2 Iris-versicolor\n", + "93 5.0 2.3 3.3 1.0 Iris-versicolor\n", + "94 5.6 2.7 4.2 1.3 Iris-versicolor\n", + "95 5.7 3.0 4.2 1.2 Iris-versicolor\n", + "96 5.7 2.9 4.2 1.3 Iris-versicolor\n", + "97 6.2 2.9 4.3 1.3 Iris-versicolor\n", + "98 5.1 2.5 3.0 1.1 Iris-versicolor\n", + "99 5.7 2.8 4.1 1.3 Iris-versicolor\n", + "100 6.3 3.3 6.0 2.5 Iris-virginica\n", + "101 5.8 2.7 5.1 1.9 Iris-virginica\n", + "102 7.1 3.0 5.9 2.1 Iris-virginica\n", + "103 6.3 2.9 5.6 1.8 Iris-virginica\n", + "104 6.5 3.0 5.8 2.2 Iris-virginica\n", + "105 7.6 3.0 6.6 2.1 Iris-virginica\n", + "106 4.9 2.5 4.5 1.7 Iris-virginica\n", + "107 7.3 2.9 6.3 1.8 Iris-virginica\n", + "108 6.7 2.5 5.8 1.8 Iris-virginica\n", + "109 7.2 3.6 6.1 2.5 Iris-virginica\n", + "110 6.5 3.2 5.1 2.0 Iris-virginica\n", + "111 6.4 2.7 5.3 1.9 Iris-virginica\n", + "112 6.8 3.0 5.5 2.1 Iris-virginica\n", + "113 5.7 2.5 5.0 2.0 Iris-virginica\n", + "114 5.8 2.8 5.1 2.4 Iris-virginica\n", + "115 6.4 3.2 5.3 2.3 Iris-virginica\n", + "116 6.5 3.0 5.5 1.8 Iris-virginica\n", + "117 7.7 3.8 6.7 2.2 Iris-virginica\n", + "118 7.7 2.6 6.9 2.3 Iris-virginica\n", + "119 6.0 2.2 5.0 1.5 Iris-virginica\n", + "120 6.9 3.2 5.7 2.3 Iris-virginica\n", + "121 5.6 2.8 4.9 2.0 Iris-virginica\n", + "122 7.7 2.8 6.7 2.0 Iris-virginica\n", + "123 6.3 2.7 4.9 1.8 Iris-virginica\n", + "124 6.7 3.3 5.7 2.1 Iris-virginica\n", + "125 7.2 3.2 6.0 1.8 Iris-virginica\n", + "126 6.2 2.8 4.8 1.8 Iris-virginica\n", + "127 6.1 3.0 4.9 1.8 Iris-virginica\n", + "128 6.4 2.8 5.6 2.1 Iris-virginica\n", + "129 7.2 3.0 5.8 1.6 Iris-virginica\n", + "130 7.4 2.8 6.1 1.9 Iris-virginica\n", + "131 7.9 3.8 6.4 2.0 Iris-virginica\n", + "132 6.4 2.8 5.6 2.2 Iris-virginica\n", + "133 6.3 2.8 5.1 1.5 Iris-virginica\n", + "134 6.1 2.6 5.6 1.4 Iris-virginica\n", + "135 7.7 3.0 6.1 2.3 Iris-virginica\n", + "136 6.3 3.4 5.6 2.4 Iris-virginica\n", + "137 6.4 3.1 5.5 1.8 Iris-virginica\n", + "138 6.0 3.0 4.8 1.8 Iris-virginica\n", + "139 6.9 3.1 5.4 2.1 Iris-virginica\n", + "140 6.7 3.1 5.6 2.4 Iris-virginica\n", + "141 6.9 3.1 5.1 2.3 Iris-virginica\n", + "142 5.8 2.7 5.1 1.9 Iris-virginica\n", + "143 6.8 3.2 5.9 2.3 Iris-virginica\n", + "144 6.7 3.3 5.7 2.5 Iris-virginica\n", + "145 6.7 3.0 5.2 2.3 Iris-virginica\n", + "146 6.3 2.5 5.0 1.9 Iris-virginica\n", + "147 6.5 3.0 5.2 2.0 Iris-virginica\n", + "148 6.2 3.4 5.4 2.3 Iris-virginica\n", + "149 5.9 3.0 5.1 1.8 Iris-virginica\n" + ] + } + ], + "source": [ + "for i, row in df.iterrows():\n", + " print(i, row[0], row[1], row[2], row[3], row[4])" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 5.0 3.6 1.4 0.2 Iris-setosa\n", + "5 5.4 3.9 1.7 0.4 Iris-setosa\n", + "6 4.6 3.4 1.4 0.3 Iris-setosa\n", + "7 5.0 3.4 1.5 0.2 Iris-setosa\n", + "8 4.4 2.9 1.4 0.2 Iris-setosa\n", + "9 4.9 3.1 1.5 0.1 Iris-setosa\n", + "10 5.4 3.7 1.5 0.2 Iris-setosa\n", + "11 4.8 3.4 1.6 0.2 Iris-setosa\n", + "12 4.8 3.0 1.4 0.1 Iris-setosa\n", + "13 4.3 3.0 1.1 0.1 Iris-setosa\n", + "14 5.8 4.0 1.2 0.2 Iris-setosa\n", + "15 5.7 4.4 1.5 0.4 Iris-setosa\n", + "16 5.4 3.9 1.3 0.4 Iris-setosa\n", + "17 5.1 3.5 1.4 0.3 Iris-setosa\n", + "18 5.7 3.8 1.7 0.3 Iris-setosa\n", + "19 5.1 3.8 1.5 0.3 Iris-setosa\n", + "20 5.4 3.4 1.7 0.2 Iris-setosa\n", + "21 5.1 3.7 1.5 0.4 Iris-setosa\n", + "22 4.6 3.6 1.0 0.2 Iris-setosa\n", + "23 5.1 3.3 1.7 0.5 Iris-setosa\n", + "24 4.8 3.4 1.9 0.2 Iris-setosa\n", + "25 5.0 3.0 1.6 0.2 Iris-setosa\n", + "26 5.0 3.4 1.6 0.4 Iris-setosa\n", + "27 5.2 3.5 1.5 0.2 Iris-setosa\n", + "28 5.2 3.4 1.4 0.2 Iris-setosa\n", + "29 4.7 3.2 1.6 0.2 Iris-setosa\n", + "30 4.8 3.1 1.6 0.2 Iris-setosa\n", + "31 5.4 3.4 1.5 0.4 Iris-setosa\n", + "32 5.2 4.1 1.5 0.1 Iris-setosa\n", + "33 5.5 4.2 1.4 0.2 Iris-setosa\n", + "34 4.9 3.1 1.5 0.1 Iris-setosa\n", + "35 5.0 3.2 1.2 0.2 Iris-setosa\n", + "36 5.5 3.5 1.3 0.2 Iris-setosa\n", + "37 4.9 3.1 1.5 0.1 Iris-setosa\n", + "38 4.4 3.0 1.3 0.2 Iris-setosa\n", + "39 5.1 3.4 1.5 0.2 Iris-setosa\n", + "40 5.0 3.5 1.3 0.3 Iris-setosa\n", + "41 4.5 2.3 1.3 0.3 Iris-setosa\n", + "42 4.4 3.2 1.3 0.2 Iris-setosa\n", + "43 5.0 3.5 1.6 0.6 Iris-setosa\n", + "44 5.1 3.8 1.9 0.4 Iris-setosa\n", + "45 4.8 3.0 1.4 0.3 Iris-setosa\n", + "46 5.1 3.8 1.6 0.2 Iris-setosa\n", + "47 4.6 3.2 1.4 0.2 Iris-setosa\n", + "48 5.3 3.7 1.5 0.2 Iris-setosa\n", + "49 5.0 3.3 1.4 0.2 Iris-setosa\n", + "50 7.0 3.2 4.7 1.4 Iris-versicolor\n", + "51 6.4 3.2 4.5 1.5 Iris-versicolor\n", + "52 6.9 3.1 4.9 1.5 Iris-versicolor\n", + "53 5.5 2.3 4.0 1.3 Iris-versicolor\n", + "54 6.5 2.8 4.6 1.5 Iris-versicolor\n", + "55 5.7 2.8 4.5 1.3 Iris-versicolor\n", + "56 6.3 3.3 4.7 1.6 Iris-versicolor\n", + "57 4.9 2.4 3.3 1.0 Iris-versicolor\n", + "58 6.6 2.9 4.6 1.3 Iris-versicolor\n", + "59 5.2 2.7 3.9 1.4 Iris-versicolor\n", + "60 5.0 2.0 3.5 1.0 Iris-versicolor\n", + "61 5.9 3.0 4.2 1.5 Iris-versicolor\n", + "62 6.0 2.2 4.0 1.0 Iris-versicolor\n", + "63 6.1 2.9 4.7 1.4 Iris-versicolor\n", + "64 5.6 2.9 3.6 1.3 Iris-versicolor\n", + "65 6.7 3.1 4.4 1.4 Iris-versicolor\n", + "66 5.6 3.0 4.5 1.5 Iris-versicolor\n", + "67 5.8 2.7 4.1 1.0 Iris-versicolor\n", + "68 6.2 2.2 4.5 1.5 Iris-versicolor\n", + "69 5.6 2.5 3.9 1.1 Iris-versicolor\n", + "70 5.9 3.2 4.8 1.8 Iris-versicolor\n", + "71 6.1 2.8 4.0 1.3 Iris-versicolor\n", + "72 6.3 2.5 4.9 1.5 Iris-versicolor\n", + "73 6.1 2.8 4.7 1.2 Iris-versicolor\n", + "74 6.4 2.9 4.3 1.3 Iris-versicolor\n", + "75 6.6 3.0 4.4 1.4 Iris-versicolor\n", + "76 6.8 2.8 4.8 1.4 Iris-versicolor\n", + "77 6.7 3.0 5.0 1.7 Iris-versicolor\n", + "78 6.0 2.9 4.5 1.5 Iris-versicolor\n", + "79 5.7 2.6 3.5 1.0 Iris-versicolor\n", + "80 5.5 2.4 3.8 1.1 Iris-versicolor\n", + "81 5.5 2.4 3.7 1.0 Iris-versicolor\n", + "82 5.8 2.7 3.9 1.2 Iris-versicolor\n", + "83 6.0 2.7 5.1 1.6 Iris-versicolor\n", + "84 5.4 3.0 4.5 1.5 Iris-versicolor\n", + "85 6.0 3.4 4.5 1.6 Iris-versicolor\n", + "86 6.7 3.1 4.7 1.5 Iris-versicolor\n", + "87 6.3 2.3 4.4 1.3 Iris-versicolor\n", + "88 5.6 3.0 4.1 1.3 Iris-versicolor\n", + "89 5.5 2.5 4.0 1.3 Iris-versicolor\n", + "90 5.5 2.6 4.4 1.2 Iris-versicolor\n", + "91 6.1 3.0 4.6 1.4 Iris-versicolor\n", + "92 5.8 2.6 4.0 1.2 Iris-versicolor\n", + "93 5.0 2.3 3.3 1.0 Iris-versicolor\n", + "94 5.6 2.7 4.2 1.3 Iris-versicolor\n", + "95 5.7 3.0 4.2 1.2 Iris-versicolor\n", + "96 5.7 2.9 4.2 1.3 Iris-versicolor\n", + "97 6.2 2.9 4.3 1.3 Iris-versicolor\n", + "98 5.1 2.5 3.0 1.1 Iris-versicolor\n", + "99 5.7 2.8 4.1 1.3 Iris-versicolor\n", + "100 6.3 3.3 6.0 2.5 Iris-virginica\n", + "101 5.8 2.7 5.1 1.9 Iris-virginica\n", + "102 7.1 3.0 5.9 2.1 Iris-virginica\n", + "103 6.3 2.9 5.6 1.8 Iris-virginica\n", + "104 6.5 3.0 5.8 2.2 Iris-virginica\n", + "105 7.6 3.0 6.6 2.1 Iris-virginica\n", + "106 4.9 2.5 4.5 1.7 Iris-virginica\n", + "107 7.3 2.9 6.3 1.8 Iris-virginica\n", + "108 6.7 2.5 5.8 1.8 Iris-virginica\n", + "109 7.2 3.6 6.1 2.5 Iris-virginica\n", + "110 6.5 3.2 5.1 2.0 Iris-virginica\n", + "111 6.4 2.7 5.3 1.9 Iris-virginica\n", + "112 6.8 3.0 5.5 2.1 Iris-virginica\n", + "113 5.7 2.5 5.0 2.0 Iris-virginica\n", + "114 5.8 2.8 5.1 2.4 Iris-virginica\n", + "115 6.4 3.2 5.3 2.3 Iris-virginica\n", + "116 6.5 3.0 5.5 1.8 Iris-virginica\n", + "117 7.7 3.8 6.7 2.2 Iris-virginica\n", + "118 7.7 2.6 6.9 2.3 Iris-virginica\n", + "119 6.0 2.2 5.0 1.5 Iris-virginica\n", + "120 6.9 3.2 5.7 2.3 Iris-virginica\n", + "121 5.6 2.8 4.9 2.0 Iris-virginica\n", + "122 7.7 2.8 6.7 2.0 Iris-virginica\n", + "123 6.3 2.7 4.9 1.8 Iris-virginica\n", + "124 6.7 3.3 5.7 2.1 Iris-virginica\n", + "125 7.2 3.2 6.0 1.8 Iris-virginica\n", + "126 6.2 2.8 4.8 1.8 Iris-virginica\n", + "127 6.1 3.0 4.9 1.8 Iris-virginica\n", + "128 6.4 2.8 5.6 2.1 Iris-virginica\n", + "129 7.2 3.0 5.8 1.6 Iris-virginica\n", + "130 7.4 2.8 6.1 1.9 Iris-virginica\n", + "131 7.9 3.8 6.4 2.0 Iris-virginica\n", + "132 6.4 2.8 5.6 2.2 Iris-virginica\n", + "133 6.3 2.8 5.1 1.5 Iris-virginica\n", + "134 6.1 2.6 5.6 1.4 Iris-virginica\n", + "135 7.7 3.0 6.1 2.3 Iris-virginica\n", + "136 6.3 3.4 5.6 2.4 Iris-virginica\n", + "137 6.4 3.1 5.5 1.8 Iris-virginica\n", + "138 6.0 3.0 4.8 1.8 Iris-virginica\n", + "139 6.9 3.1 5.4 2.1 Iris-virginica\n", + "140 6.7 3.1 5.6 2.4 Iris-virginica\n", + "141 6.9 3.1 5.1 2.3 Iris-virginica\n", + "142 5.8 2.7 5.1 1.9 Iris-virginica\n", + "143 6.8 3.2 5.9 2.3 Iris-virginica\n", + "144 6.7 3.3 5.7 2.5 Iris-virginica\n", + "145 6.7 3.0 5.2 2.3 Iris-virginica\n", + "146 6.3 2.5 5.0 1.9 Iris-virginica\n", + "147 6.5 3.0 5.2 2.0 Iris-virginica\n", + "148 6.2 3.4 5.4 2.3 Iris-virginica\n", + "149 5.9 3.0 5.1 1.8 Iris-virginica\n" + ] + } + ], + "source": [ + "for row in df.itertuples(name=None): \n", + " print(row[0], row[1], row[2], row[3], row[4], row[5])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Pandas/Pandas Reorder Columns in DataFrame.ipynb b/Pandas/Pandas Reorder Columns in DataFrame.ipynb new file mode 100644 index 00000000..0f030051 --- /dev/null +++ b/Pandas/Pandas Reorder Columns in DataFrame.ipynb @@ -0,0 +1,420 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pandas - Reorder Columns\n", + "Swap two columns, or change the order of columns" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ABCDLabel
05.13.51.40.2Iris-setosa
14.93.01.40.2Iris-setosa
24.73.21.30.2Iris-setosa
34.63.11.50.2Iris-setosa
45.03.61.40.2Iris-setosa
..................
1456.73.05.22.3Iris-virginica
1466.32.55.01.9Iris-virginica
1476.53.05.22.0Iris-virginica
1486.23.45.42.3Iris-virginica
1495.93.05.11.8Iris-virginica
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " A B C D Label\n", + "0 5.1 3.5 1.4 0.2 Iris-setosa\n", + "1 4.9 3.0 1.4 0.2 Iris-setosa\n", + "2 4.7 3.2 1.3 0.2 Iris-setosa\n", + "3 4.6 3.1 1.5 0.2 Iris-setosa\n", + "4 5.0 3.6 1.4 0.2 Iris-setosa\n", + ".. ... ... ... ... ...\n", + "145 6.7 3.0 5.2 2.3 Iris-virginica\n", + "146 6.3 2.5 5.0 1.9 Iris-virginica\n", + "147 6.5 3.0 5.2 2.0 Iris-virginica\n", + "148 6.2 3.4 5.4 2.3 Iris-virginica\n", + "149 5.9 3.0 5.1 1.8 Iris-virginica\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('iris.data', names=['A','B','C','D','Label'])\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1) get a list of the column names." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['A', 'B', 'C', 'D', 'Label']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titles = list(df.columns)\n", + "titles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2) Swap or move whatever columns you want in the list." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['A', 'C', 'B', 'D', 'Label']" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "titles[1], titles[2] = titles[2], titles[1]\n", + "titles" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "3) Reassign the columns in the DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ACBDLabel
05.11.43.50.2Iris-setosa
14.91.43.00.2Iris-setosa
24.71.33.20.2Iris-setosa
34.61.53.10.2Iris-setosa
45.01.43.60.2Iris-setosa
..................
1456.75.23.02.3Iris-virginica
1466.35.02.51.9Iris-virginica
1476.55.23.02.0Iris-virginica
1486.25.43.42.3Iris-virginica
1495.95.13.01.8Iris-virginica
\n", + "

150 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " A C B D Label\n", + "0 5.1 1.4 3.5 0.2 Iris-setosa\n", + "1 4.9 1.4 3.0 0.2 Iris-setosa\n", + "2 4.7 1.3 3.2 0.2 Iris-setosa\n", + "3 4.6 1.5 3.1 0.2 Iris-setosa\n", + "4 5.0 1.4 3.6 0.2 Iris-setosa\n", + ".. ... ... ... ... ...\n", + "145 6.7 5.2 3.0 2.3 Iris-virginica\n", + "146 6.3 5.0 2.5 1.9 Iris-virginica\n", + "147 6.5 5.2 3.0 2.0 Iris-virginica\n", + "148 6.2 5.4 3.4 2.3 Iris-virginica\n", + "149 5.9 5.1 3.0 1.8 Iris-virginica\n", + "\n", + "[150 rows x 5 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df[titles]\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Pandas/Pandas and Matplotlib Tackle Graduate Admissions Data.ipynb b/Pandas/Pandas and Matplotlib Tackle Graduate Admissions Data.ipynb new file mode 100644 index 00000000..414ccaf5 --- /dev/null +++ b/Pandas/Pandas and Matplotlib Tackle Graduate Admissions Data.ipynb @@ -0,0 +1,620 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Pandas and Matplotlib Tackle Graduate Admissions Data\n", + "Source: Kaggle, https://www.kaggle.com/mohansacharya/graduate-admissions/ \n", + " Mohan S Acharya, Asfia Armaan, Aneeta S Antony : A Comparison of Regression Models for Prediction of Graduate Admissions, IEEE International Conference on Computational Intelligence in Data Science 2019 \n", + " \n", + "### 1. Load in Data\n", + "Import numpy, pandas and matplotlib libraries, and load data into a Pandas dataframe. \n", + "Print data shape and summary info of data." + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(500, 9)\n", + "\n", + "RangeIndex: 500 entries, 0 to 499\n", + "Data columns (total 9 columns):\n", + "Serial No. 500 non-null int64\n", + "GRE Score 500 non-null int64\n", + "TOEFL Score 500 non-null int64\n", + "University Rating 500 non-null int64\n", + "SOP 500 non-null float64\n", + "LOR 500 non-null float64\n", + "CGPA 500 non-null float64\n", + "Research 500 non-null int64\n", + "Chance of Admit 500 non-null float64\n", + "dtypes: float64(4), int64(5)\n", + "memory usage: 35.2 KB\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "df = pd.read_csv('Admission_Predict_Ver1.1.csv')\n", + "print(df.shape)\n", + "df.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. Clean up Columns\n", + "Delete the first column (serial num) since Pandas already assigns an id to each row. \n", + "Rename the columns to simpler names. Print first 5 rows." + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gretoeflratingsoplorgparesearchchance
033711844.54.59.6510.92
132410744.04.58.8710.76
231610433.03.58.0010.72
332211033.52.58.6710.80
431410322.03.08.2100.65
\n", + "
" + ], + "text/plain": [ + " gre toefl rating sop lor gpa research chance\n", + "0 337 118 4 4.5 4.5 9.65 1 0.92\n", + "1 324 107 4 4.0 4.5 8.87 1 0.76\n", + "2 316 104 3 3.0 3.5 8.00 1 0.72\n", + "3 322 110 3 3.5 2.5 8.67 1 0.80\n", + "4 314 103 2 2.0 3.0 8.21 0 0.65" + ] + }, + "execution_count": 99, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = df.drop(['Serial No.'], axis=1)\n", + "new_names = {'GRE Score':'gre', 'TOEFL Score':'toefl', 'University Rating':'rating', 'SOP':'sop', 'LOR':'lor', 'CGPA':'gpa', 'Research':'research', 'Chance of Admit ':'chance'}\n", + "df.rename(columns=new_names, inplace=True)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. Scope out Data\n", + "Show boxplots to see high-level distribution of main columns. \n", + "Use pandas.describe() to see high-level distribution of data." + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gretoeflratingsoplorgparesearchchance
count500.000000500.000000500.000000500.000000500.00000500.000000500.000000500.00000
mean316.472000107.1920003.1140003.3740003.484008.5764400.5600000.72174
std11.2951486.0818681.1435120.9910040.925450.6048130.4968840.14114
min290.00000092.0000001.0000001.0000001.000006.8000000.0000000.34000
25%308.000000103.0000002.0000002.5000003.000008.1275000.0000000.63000
50%317.000000107.0000003.0000003.5000003.500008.5600001.0000000.72000
75%325.000000112.0000004.0000004.0000004.000009.0400001.0000000.82000
max340.000000120.0000005.0000005.0000005.000009.9200001.0000000.97000
\n", + "
" + ], + "text/plain": [ + " gre toefl rating sop lor gpa \\\n", + "count 500.000000 500.000000 500.000000 500.000000 500.00000 500.000000 \n", + "mean 316.472000 107.192000 3.114000 3.374000 3.48400 8.576440 \n", + "std 11.295148 6.081868 1.143512 0.991004 0.92545 0.604813 \n", + "min 290.000000 92.000000 1.000000 1.000000 1.00000 6.800000 \n", + "25% 308.000000 103.000000 2.000000 2.500000 3.00000 8.127500 \n", + "50% 317.000000 107.000000 3.000000 3.500000 3.50000 8.560000 \n", + "75% 325.000000 112.000000 4.000000 4.000000 4.00000 9.040000 \n", + "max 340.000000 120.000000 5.000000 5.000000 5.00000 9.920000 \n", + "\n", + " research chance \n", + "count 500.000000 500.00000 \n", + "mean 0.560000 0.72174 \n", + "std 0.496884 0.14114 \n", + "min 0.000000 0.34000 \n", + "25% 0.000000 0.63000 \n", + "50% 1.000000 0.72000 \n", + "75% 1.000000 0.82000 \n", + "max 1.000000 0.97000 " + ] + }, + "execution_count": 100, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "plt.figure(1, figsize=(14,4))\n", + "for i in range(1,6):\n", + " plt.subplot(1,5,i)\n", + " plt.boxplot(df[df.columns[i]])\n", + " plt.title(df.columns[i])\n", + "plt.show()\n", + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. Detailed Data Distribution\n", + "Plot histograms for main columns to show detailed distribution of data" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df[['gre','toefl','rating','gpa','research','chance']].hist(figsize=(14, 9),bins=16,linewidth='1',edgecolor='k',grid=False)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 5. Show Correlation with Chance of Acceptance\n", + "Calculate correlation between each data column and Chance of Acceptance. \n", + "Here we can see GPA, GRE score and TOEFL score are the most important features because they have the best correlation with acceptance. \n", + "Research appears to be the least important feature." + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAEh5JREFUeJzt3XmUZGV9xvHvI4iobAen9egMOERH48R4NI7ELQYjMYAG4nGDuIRonJNEXOIWjAYCOXFP9CRidIwGNAqCRh11DLiAJAo6jQg6IGaCIC0aRkXcooj+8se9LWXT3VXdU90NL9/POX36Lm/V/VXVfZ9661bVrVQVkqS23GalC5AkjZ/hLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWrQriu14VWrVtXatWtXavOSdIt0wQUXfKuqJoa1W7FwX7t2LZOTkyu1eUm6RUpy5SjtPCwjSQ0y3CWpQYa7JDXIcJekBhnuktSgoeGe5O1JrknypTnWJ8k/Jtme5OIkvzH+MiVJCzHKyP1k4JB51h8KrOv/NgL/vPNlSZJ2xtBwr6pzge/M0+QI4B3VOR/YJ8ldx1WgJGnhxnHMfTVw1cD8VL9MkrRCxvEN1cyybNZf3U6yke7QDfvvv/8YNi1JC3fCCSes6PaPP/74Jd/GOEbuU8B+A/NrgKtna1hVm6pqQ1VtmJgYemoESdIijSPcNwNP7z8182Dguqr6xhiuV5K0SEMPyyQ5FTgIWJVkCjgeuC1AVb0Z2AIcBmwHfgT88VIVK0kazdBwr6qjhqwv4Nljq0iStNP8hqokNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUoKG/oSrp5uuEE05Y0e0ff/zxK7p9zc2RuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaNFK4JzkkyWVJtic5dpb1+yc5O8mFSS5Octj4S5UkjWpouCfZBTgJOBRYDxyVZP2MZi8HTq+qBwBHAm8ad6GSpNGNcsrfA4HtVXU5QJLTgCOASwbaFLBXP703cPU4i5zJ05xK0vxGCffVwFUD81PAb85o8zfAWUmeA9wROHgs1UmSFmWUY+6ZZVnNmD8KOLmq1gCHAe9McpPrTrIxyWSSyR07diy8WknSSEYJ9ylgv4H5Ndz0sMszgdMBquo8YHdg1cwrqqpNVbWhqjZMTEwsrmJJ0lCjHJbZCqxLcgDwdbo3TP9wRpuvAY8CTk5yH7pwv1UOzX0/QNLNwdCRe1XdABwDnAlcSvepmG1JTkxyeN/shcCzklwEnAocXVUzD91IkpbJSD+QXVVbgC0zlh03MH0J8LDxliZJWiy/oSpJDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUEj/cye2uCPd0u3Hoa7bhZ84pHGy8MyktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yM+5S0P4GXzdEjlyl6QGGe6S1CDDXZIaZLhLUoMMd0lq0EjhnuSQJJcl2Z7k2DnaPCnJJUm2JXn3eMuUJC3E0I9CJtkFOAn4XWAK2Jpkc1VdMtBmHfBS4GFVdW2SOy9VwZKk4UYZuR8IbK+qy6vqeuA04IgZbZ4FnFRV1wJU1TXjLVOStBCjhPtq4KqB+al+2aB7AfdK8ukk5yc5ZFwFSpIWbpRvqGaWZTXL9awDDgLWAP+Z5L5V9d1fuqJkI7ARYP/9919wsZKk0Ywycp8C9huYXwNcPUubD1bVT6vqq8BldGH/S6pqU1VtqKoNExMTi61ZkjTEKOG+FViX5IAkuwFHAptntPkA8EiAJKvoDtNcPs5CJUmjGxruVXUDcAxwJnApcHpVbUtyYpLD+2ZnAt9OcglwNvDiqvr2UhUtSZrfSGeFrKotwJYZy44bmC7gBf2fJGmF+Q1VSWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CB/IFvSkvCHxVeWI3dJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJatBI4Z7kkCSXJdme5Nh52j0hSSXZML4SJUkLNTTck+wCnAQcCqwHjkqyfpZ2ewLPBT477iIlSQszysj9QGB7VV1eVdcDpwFHzNLub4HXAD8eY32SpEUYJdxXA1cNzE/1y34hyQOA/arqw2OsTZK0SKOEe2ZZVr9YmdwGeD3wwqFXlGxMMplkcseOHaNXKUlakFHCfQrYb2B+DXD1wPyewH2Bc5JcATwY2Dzbm6pVtamqNlTVhomJicVXLUma1yjhvhVYl+SAJLsBRwKbp1dW1XVVtaqq1lbVWuB84PCqmlySiiVJQw0N96q6ATgGOBO4FDi9qrYlOTHJ4UtdoCRp4XYdpVFVbQG2zFh23BxtD9r5siRJO8NvqEpSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoNGCvckhyS5LMn2JMfOsv4FSS5JcnGSTyS5+/hLlSSNami4J9kFOAk4FFgPHJVk/YxmFwIbqup+wHuB14y7UEnS6EYZuR8IbK+qy6vqeuA04IjBBlV1dlX9qJ89H1gz3jIlSQsxSrivBq4amJ/ql83lmcBHZ1uRZGOSySSTO3bsGL1KSdKCjBLumWVZzdoweSqwAXjtbOuralNVbaiqDRMTE6NXKUlakF1HaDMF7Dcwvwa4emajJAcDLwN+u6p+Mp7yJEmLMcrIfSuwLskBSXYDjgQ2DzZI8gDgLcDhVXXN+MuUJC3E0HCvqhuAY4AzgUuB06tqW5ITkxzeN3stsAdwRpIvJNk8x9VJkpbBKIdlqKotwJYZy44bmD54zHVJknaC31CVpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNchwl6QGGe6S1CDDXZIaZLhLUoMMd0lqkOEuSQ0y3CWpQYa7JDXIcJekBhnuktQgw12SGmS4S1KDDHdJapDhLkkNMtwlqUGGuyQ1yHCXpAYZ7pLUIMNdkhpkuEtSgwx3SWqQ4S5JDTLcJalBhrskNWikcE9ySJLLkmxPcuws62+X5D39+s8mWTvuQiVJoxsa7kl2AU4CDgXWA0clWT+j2TOBa6vqnsDrgVePu1BJ0uhGGbkfCGyvqsur6nrgNOCIGW2OAE7pp98LPCpJxlemJGkhRgn31cBVA/NT/bJZ21TVDcB1wJ3GUaAkaeFSVfM3SJ4I/F5V/Uk//zTgwKp6zkCbbX2bqX7+f/o2355xXRuBjf3svYHLxnVDFmgV8K0V2vYw1rY41rY41rY4K1nb3atqYlijXUe4oilgv4H5NcDVc7SZSrIrsDfwnZlXVFWbgE0jbHNJJZmsqg0rXcdsrG1xrG1xrG1xbs61TRvlsMxWYF2SA5LsBhwJbJ7RZjPwR/30E4BP1rCXBJKkJTN05F5VNyQ5BjgT2AV4e1VtS3IiMFlVm4G3Ae9Msp1uxH7kUhYtSZrfKIdlqKotwJYZy44bmP4x8MTxlrakVvzQ0DysbXGsbXGsbXFuzrUBI7yhKkm65fH0A5LUIMN9GSTZJ8mf78Tln5vk0iTvSnJ0kjeOs755tvv8JHcYmN+SZJ/l2PYtUZIfrHQNulGSc5KM9ImWJCcnecJS17ScbrXh3n9kc7nsAyw63PvLHlZVTxlTPb+Qzlz7wfOBX4R7VR1WVd8ddw23Rv1pPZo0ZJ8a97aWsx/fojQb7kn+OsmXk3wsyalJXtQ/k78iyaeA5yWZSPK+JFv7v4ctUTmvAu6R5AtJXtv/fSnJF5M8eaDmF/d1XJzkhH7Zm4FfATYn+YtxFJNkbf9K4E3A54G3JZlMsm1gu88F7gacneTsftkVSVYNXP6t/WXOSnL7vs2D+vrPm76dY6r5jkk+kuSi/r57cpJHJbmwvx/fnuR2A3W+Osnn+r97jqOGBdSa2R7jJAclOTvJu4EvLlMtc/WDNyT5TF/jgX3bA/tlF/b/772A7czcp57W7wOfT3JGkj36dq9Kckm/j7yuXzZrP5yrnnSvXs9I8iHgrH7ZS/r7+qIkrxoo7Yn9PvCVJL81UO/T+xouSvLOfvEj+u1cnn4Un2SPJJ/ob8cXkxwx4/bO1gfumeTj/XV/Psk9+uU36d9Lqqqa+wM2AF8Abg/sCfw38CLgHOBNA+3eDTy8n94fuHSJ6lkLfKmffjzwMbqPld4F+BpwV+DRdO/Ah+5J98PAI/rLXAGs6qePBt44hnp+Djy4n9+3/79Lfx/db+Z2B+f7y98A3L9ffjrw1H76S8BD++lXTd/uMdyHjwfeOjC/N90pL+7Vz78DeP5AnS/rp58OfHiZ9rsfDHmMDwJ+CBxwM+gHb+3bPGJg39wL2LWfPhh432L2qX4fORe4Y7/uL4HjgH3pvpU+/UGOffr/s/bDuerp+8DUwH57KPAZ4A4z9udzgL/vpw8DPt5P/1pfx3Sf2hc4GTiDru+tpzufFnSfKNyrn14FbKfro/P1gc8Cj+und6d79Ttn/16qv1Zf0jwc+GBV/R9A/ww/7T0D0wcD63PjOc72SrJnVX1/iWs7tap+BvxvulcRD6LrZI8GLuzb7QGso+skS+HKqjq/n35SulND7EoXQuuBi4dc/qtV9YV++gJgbbrj8XtW1Wf65e8GHjumer8IvC7Jq+k6xvf6Gr7Srz8FeDbwhn7+1IH/rx9TDaOa6zH+HvC5qvrqMtYxVz84FaCqzk2y1/RjB5ySZB1QwG0XuL0rq+r8JI+l24c+3fet3YDz6G7/j4F/SfIRuscR5uiHdE/gc9Xzsar6zsDl/7WqftTfpsFvx/97//8CukAG+B3gvVX1ren2/bY/UFU/By5Jcpe+bYBXJHkE3ZPXaronbJi9D+wJrK6q9/fX/WOAJI9meft3s+E+3xkpfzgwfRvgIdM7/zKZq7YAr6yqtyxTHT8ESHIA3WjuQVV1bZKT6UYbw/xkYPpndKPDJTsTaFV9JckD6UZgr6R/OT7fReaYXg6j7n9Lbb46Zt4nBfwtcHZVPS7dbzKcs8DtTd+20IXvUTcpqDsE9Ci6LzoeQxe0s/bDJP80Tz2D92NmuT3TpvfTn3Fj3s3VfnCfnr7vngJMAA+sqp8muYIb+8dC+sBy9+9mj7n/F/D7SXbvj/U9Zo52Z9HtYAAkuf8S1fN9ulERdM/UT06yS5IJuhH75+i+AfyMgWOTq5PceYnqGbQXXUe5rh+tHDpH3UNV1bXA95M8uF80tm8qJ7kb8KOq+jfgdcBD6UZK08fTnwZ8auAiTx74f9646hjRXI/xcpuvH0y/D/Bw4Lqquo5upPz1fv3RO7Hd84GHTT82Se6Q5F59DXtX96XI5wPT/W2ufjhqPWfR9Z079Jffd0h9n6B7tXqnEdrvDVzTB/sjgbvPd8VV9T26c2z9QX/dt+vrWvb+3eTIvaq2JtkMXARcCUzSnYZ4pucCJyW5mO6+OBf40yWo59tJPp3uzcWP0h3yuIhu9PCSqvom8M0k9wHO618i/gB4KnDNuOuZUdtFSS4EtgGXA58eWL0J+GiSb1TVI0e8ymcCb03yQ7qR1mz3+2L8OvDaJD8Hfgr8GV3HOyPdJya2Am8eaH+7JJ+lG8DcZAS5xN4PPIQZj3GSX13OIob0g2uTfIbuyf0Z/bLX0B0GeQHwyZ3Y7o4kRwOnpn+TG3g53WDhg0l2pxvJTn9AYK5+OFI9VfUf/RPCZJLr6b5N/1fztN+W5O+ATyX5GTceKpnNu4APJZmke//iy/PfeqAbaLwl3Slafgo8sarOWu7+3ew3VJPsUVU/6J81zwU2VtXnV7qu1k3f7/30scBdq+p5y1zDFcCG6WOqt2az9QPgH4AXVdXkylanpdTkyL23Kd3PAe4OnGKwL5vHJHkp3b51JTv38l477yb9IP5I2q1CsyN3Sbo1a/UNVUm6VTPcJalBhrskNchwl6QGGe6S1CDDXZIa9P+D0nLwi/OzugAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gre 0.810351\n", + "toefl 0.792228\n", + "rating 0.690132\n", + "sop 0.684137\n", + "lor 0.645365\n", + "gpa 0.882413\n", + "research 0.545871\n", + "chance 1.000000\n", + "Name: chance, dtype: float64\n" + ] + } + ], + "source": [ + "correlation = df.corr()['chance']\n", + "plt.bar(df.columns, correlation, color='gray')\n", + "plt.show()\n", + "print(correlation)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 6. Plot Relations between GRE & TOEFL, and GPA & GRE\n", + "We can see a strong correlation between TOEFL and GRE scores -- people scoring high on one probably scored high on the other. \n", + "And we see a strong correlation between GPA and GRE -- people with a high GPA probably scored high on the GRE." + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "df.plot(kind='scatter', x='gre', y='toefl', color='green')\n", + "plt.xlabel(\"GRE\")\n", + "plt.ylabel(\"TOEFL\")\n", + "plt.show()\n", + "\n", + "df.plot(kind='scatter', x='gpa', y='gre', color='red')\n", + "plt.xlabel(\"GPA\")\n", + "plt.ylabel(\"GRE\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 7. Plot relations between Chance and other Features" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAEKCAYAAADn+anLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAIABJREFUeJztnX+cXGV97z/f3WQD2QST7G4Xan7aDXmZQhLJSom6KValXi8vUEly9dZGW5XW1tra2qJtX8KL+8NqorbIbStarkUrNsUWKRcLAqVga1o2NASIBVYCJAU2yybRJGBCdr/3j5ldZp55zpzvnHnOmXNmP+/Xixe7Z57zzPd5ZnaenHPe5zOiqiCEEEJ8dLS6AEIIIfmFiwQhhJBIuEgQQgiJhIsEIYSQSLhIEEIIiYSLBCGEkEi4SBBCCImEiwQhhJBIuEgQQgiJZFarC2iG3t5eXb58eavLIISQQrFr167nVbXP0rbQi8Ty5csxPDzc6jIIIaRQiMhT1rY83UQIISQSLhKEEEIi4SJBCCEkEi4ShBBCIuEiQQghJJLUFgkRuV5EDorIwxXbtonIf4jIHhH5OxFZUPHYJ0RkREQeFZGfT6suQsjMYGT0KG4a3o+R0aMtq2H82Ak8uP8Ixo+diNxmaQMAw/vG8bk7HsXwvvHsBgBA0vpmOhHZCOAYgBtU9ZzytosA3K2qp0Tk0wCgqleIyGoANwI4H8BPArgTwNmqOlHvOQYHB5UKLCHE5ZM3P4Qbdj49/fvWDUtx9aXnZlrDt3b/J6745h7M7ujAS5OT+Mxla6BA1bYt6xdjx64Dddt85rI12DG8H98deXlxGBrowVc/cEHi2kRkl6oOmtqm+fWlIrIcwK1Ti4Tz2DsAbFLVXxCRTwCAqn6q/NjtAK5S1e/V65+LBCHEZWT0KN78+Xtrtt/50Y0Y6J+fSQ3jx07g9Z++Gz9+aXJ625xZHQAUJ05Ff+b62szuACq6meamX7kAgyt6EtXXyCLRymsSvwzg2+WfXwlgf8VjB8rbahCRy0VkWESGx8bGUi6REFI0du8/0tD2NDhw+EXM7qj+eO3sEHRK/Y9cfxvxtr338eebKdFMSxYJEfkDAKcA/NXUJk8z73Krqtep6qCqDvb1me4qJ4TMINYtWdDQ9jRYvPB0vDRZ/c//iUnFhHoOCWLb+I88Nq7sbaZEM5kvEiLyXgAXA/gFfflc1wEASyqaLQbwTNa1EUKKz0D/fGzdsLRq29YNSzM71QQAPfPm4DOXrcFpszswf84snDa7A9s2rcG2TWurtm3dsDS2zWe3rMPQQPVppaGBnsSnmhol02sSIvJWAJ8D8LOqOlbR7qcBfB0vX7i+C8BKXrgmhCRlZPQodu8/gnVLFmS6QFQyfuwEDhx+EYsXno6eeXO82yxtgJLddO/jz2Pjyt6mF4hGrkmkFvAnIjcCuBBAr4gcAHAlgE8AmAPgOyICADtV9VdV9RER2QFgL0qnoX49boEghJB6DPTPT7Q4hPyA7pk3Z7qPKQ4fP4nHR4+iu6sTPfPm1PwexYq+eZg9qxOLF57e8JiaIdUjibThkQQhJCQ+bTWkfuqquWf3d+Ox0ePTv2/dsBTrly2KVWc/c9kaXLLO6/aYyI0CmzZcJAghofBpqyH10yg116WrU3By4uXPZZ8We9rsDvzzFT9X98ijHkVRYAkhJDf4tNWQ+qlVwS2fip/Gp8XO7ujAgcMvNlxDEgr9pUOEEBIKn7YaUj+1Krju2Z2JSa2p46XJycyuTfBIghBC4NdWQ+qnPjV3VX931e9bNyzF9s1rY7XYz1y2JvGppkbhNQlCCKkgbf3UVXN9qq5Vi01KLhRYQggpIj5t1dVPrQuJbwE48sJJPH3oBSzvmQvAr+r6avBtywIuEoQQUgdXi90yuBg7hg9EarLX3D2CoYEerOjtrkmifWLseE27ZtJcs4CnmwghJAKfFusSpclaaSbNNSlUYAkhJAB+LdZB/JqslazSXJPCRYIQQiLwa7EOTZ6NySrNNSlcJAghJAKfFusmt0Zpsr4k2lamuSaF1yQIISQGS3Kr1W4KqdMmhQosIYQ4WO8z8LWz6KeDK2qPCnx664K5XVi6aC4WzO2KfL48xJxPwUWCENL2+NJdfSmqlnbWvny4KbBvGOjB8FOHq/oafvJQjTp79aXnJhx58/B0EyGkrfFprL4UVUs7a18+LCmwbgLsFHd+dGPQIwoqsIQQUsansfpSVC3trH35sKTAugmwjeybFlwkCCFtjU9j9aWoWtpZ+/JhSYGNOrNjTZBNAy4ShJC2xqex+lJULe2sffnwpcAODfRU9bV981qvOtvKi9e8JkEImRE0Yzcl7cuHay61wm7i15cSQgiJhBeuCSGZMX7sBB7cfwTjx0401MayX15wax3eN47P3fEohveNR7YBgJsf2I8P/OX9uPmB/dPbRkaP4qbh/RgZPWp+vlbCIwlCSGKS3legQOJ7DbLGrX/potPx2Ojx6ceHBnqwaXBJzXj+92178dyPTk63O+uMLrxl9Zmx90A0cx+GFZ5uIoSkTtL7CubMEgCCE6cav9cgayxR4UDt/Q2NxIdX3gPRzH0YjcDTTYSQ1El6X0GndKCzQ+rulxdMUeEeJhv4t3flPRDN3IeRFlwkCCGJSHpfwYROYsL5FLXea5A1pqhwDx0NfMVE5T0QzdyHkRZcJAghiUh6X8G2TWuxbVOyew2yxlf/qv7uqjZDAz3YvnltTXz4WWd0VbU764yu2HsgmrkPIy1SuyYhItcDuBjAQVU9p7xtM4CrALwawPmqOlzevhzA9wE8Wt59p6r+atxz8JoEIa0n6X0FzdxrkDVurb64b994bn5gP2596DlcfO6ZePt5SwDY7oFIe27yEhX+FQDXArihYtvDAN4J4Iue9j9Q1XUp1kNIoSnyh6ovatsSv51XVvTNw+xZnVWngQ4fP4nHR4+iu6tzelznvHIBTk2W/j/Fwu4urOyfj4Xd0VHheZqb1BYJVb23fIRQue37QHSIFSHETxZaZKi6iqS3WnDHuGX9YuzYdSA23huKmm3rly2K7Stvc5WqAlteJG6dOt1Usf0eAB9zTjc9AuAxAD8C8Ieqel9c/zzdRGYCWWmRIeoqkt5qwaLAzpnVUTXeenR1Aicnoh/Paq6KqMA+C2Cpqr4GwG8D+LqInOFrKCKXi8iwiAyPjY1lWiQhrSCPWiRQfL3VQlIFNgqJ+cjN41zlYpFQ1ROqOl7+eReAHwA4O6Ltdao6qKqDfX19WZZJSEvIoxYJFF9vtZBUgY1CUb+vPM5VLhYJEekTkc7yz68CsBLAE62tipB8kEctMqquIumtFnxj3LphqTPmNV611bdt++Z1dfvK41ylqcDeCOBCAL0ARgFcCeAQgC8A6ANwBMBuVf15EbkMwNUATgGYAHClqv593HPwmgSZSeTVbiq63mrBHY813tu3zdJX2uRCgVXVd0c89Heett8E8M20aiEkinb/MMuir7T11qT3Ybj3KDTzfRI+vdVloH9+zX0Pru4K1M6Nb67y9L5M8z4JQnJNXrVSH0nTVpOOJy9zk3TclQmsd37/IK685RGcmNDY8fj6cvXWNwz0YPipw4n6ssxhXuZ+CqbAkhlJXrVSH0nTVpOOJy9zk3TcswQ4FfOx5huPry833bWZvixzyBRYQnJCXrVSH0nTVpOOJy9zk3Tcln/2+sbj68ty46+1L8sc5mXuK+EiQWYkedVKfSRNW006nrzMTdJxW/IcfOPx9WU502LtyzKHeZn7SrhIkBlJXrVSH0nTVpOOJy9zk3Tcn/tvtQmsrzitM3Y8vr62b15bo7IODfQk6ssyh3mZ+0p4TYLMaPJkkcSR1PJJ8/myIA92k6uyNtNXqDE3Qy4UWEKKQJ7SNuOw1JqX8bgfcpb7BaJw9VPrfm8/b8l0PDdgn5tv73kG39rzLC5dcxbe87oVAGr1Vp8Sm/c016RwkSCE1NCMhunuO7hsIb47Mj79uC8NNar/T978UKx+GjJ1du1V/4Af/riUwHf/k4ex7Y5H8eBVb61bUyPjsUAFNiA83URIeJrRMC2pqUBtGqqv/5HRo3jz5++t28+cWR0AFCcqnNekyujX/mUf/vCWvTXb/+clq6ePKKJqcpNg864fU4ElhCSmGQ3TmprqpqH6+t+9/0hsP50dgk4Jo4x+a8+zsdujanKNqqLrx5VwkSCEVNGMhmlNTXXTUH39r1uyAHFMTComNIwyeumas2K3R9Xkno8pun5cCRcJQkgVzWiYvn2HBnqq2vjSUH39D/TPj9VPt21ag22b1gZRRt/zuhV4xWmdVdtecVrn9KmmqJq2blgaLPmWCmxgeE2CkPRoRsMMaTdZ9NOQyujX/mVfjd0UV1PoGqjAEhJBXtz8omD5ALVGWFuw9u8qokdeOImnD72A5T1zp/uypp9a9FPL3FjH857XrYhcHKawpLs2U1ee4CJBckPe1L+846qYQwM9uN/RQ90E060blgKKmm1XX3puVd++18Knmlr6P/OMrulE1mvuHsHQQA+++oELasZjef2t+qmvLssY005p9dUfqq604OkmkgvykjxaFCx6qKuZ1uPOj26sOm3ivhY+1dTVPhvhpl+5AIMrXr5WYXn9o8bsJrVGJbfGjTHtlNao+kPU1ShUYEnhyKP6l2cseqirmVr7870WPtW0Ge59/Pmq3y2vf6R+6iS1RiW3xo0x7ZTWqPpD1JUmPN1EckEe1b88Y9FDXc3U2p/vtZiYVNhCuG1sXNlb9bvl9Y/UT52zIVFnR+LGmHZKa1T9IepKEx5JkFyQR/Uvz1j00O2b13l1Td+2yguovtfCp5pu27TG1L+byDo00FN1qinqOd3XP0o/3b55rTPu2uRWyxjTTmmNqj9EXWnCaxIkV9Buaow07Saramrpf3jfOO59/HlsXNlbs0DEPWfcmBupK8nzhdwv7bqsUIElLSfpmzzt1MwiL0K+2l0V0zd/rkIatc3F+lr4dFB324q+eZg9q7PqtEnSuG3f8/lwx2hVTy2Lo1XftSi9PvKUHstFggQnbwpf3uuyYFVS0xyPtQZ325bBxdgxfKDh/ZrpK6k6a63L7T8Pr09a8HQTCUpeVda81mXBr6QKAAmSPJq8Bp8WW1uXi3U/W1/x82BNbvXXEJ8ya52bPL3fqMCSlpFHhQ/Ib10WvEqqdKCzo1r1THM8Vi3WV5eLdT9TX4Z5sCa3emswpMxa56Yo7zcXnm4iQcmjwgfkty4LXiVVJwGt/kBLczxWLdZXl4t1P1NfhnmwJrd6a/DU6vZvnZuivN9ceCRBgpJHhS/PdVnwK6lrgyWPJq/Bp8XW1rV1w9JE+9n6ip8Ha3Krv4b4lFnr3BTl/eaS2jUJEbkewMUADqrqOeVtmwFcBeDVAM5X1eGK9p8A8H4AEwA+oqq3xz0Hr0nkl7xaRHmty0La6acha3C3Jd2vmb5crOps0v7z8PpYycs1ia8AeKuz7WEA7wRQdRVJRFYDeBeAny7v86ci0gkShPFjJ/Dg/iMYP3Yis+fsmTcHa5csyOwPwzrGpHWNjB7FTcP7MTJ6NPI5rTVY2vnaTKWfHj5+MnI8d+19Dlfc9CDu2vtcQ88HAMP7xvG5Ox7F8L7xyDa++fPVtfvpw/j6vz6F3U8fBgDsGzuGu74/in1jx+rud99jB/GFux/HfY8djHxO334+LOO29pX0fZP130EapGo3ichyALdOHUlUbL8HwMemjiTKRxFQ1U+Vf78dwFWq+r16/fNIIp4ia59W0h6jRZ/csn4xduw6EFuDpVZfG0uq6UWfvwePjR6f/n1Vfzd+7Y0rTXPzni/vxHdHXl4copJaLXOz84nxqjpOnyV4scLyGRrowYre7pr97njkuem0WKB0p/b3fv8tsc9nUVkHly2sGl9UGq6vr7STYVtBI0cSeVkkrgWwU1W/Vv79LwB8W1Vvqtc/F4n6FFn7tJL2GK36pIuvBkutvjaWVNO79j6H99+wq6bN7A7BS5P1NczhfePY9MWdNfu6Sa0uliTaZvnjLWvw9vOW1H2+WpW1Vj+14vaVdjJsq8jL6aZG8CkM3ldYRC4XkWERGR4bG0u5rGJTZO3TStpjtOqTLr4aLLX62lhSTe/YO+pto86fka8uN5E1brvv+dPi1odePm1mVlmbSKx1+0o7GbYI5GWROABgScXviwE842uoqtep6qCqDvb19WVSXFEpsvZpJe0xWvVJF18Nllp9bSypphet7ve2EcRrsm4ia9x23/OnxcXnnhn7fDUq66SWdNYEuH2lnQxbBPKySNwC4F0iMkdEVgBYCeDfWlxT4Smy9mkl7TFa9UlXzfTVYKnV18aSavqm1WdiVX93VZtV/d347JZ4DXNwRQ+GBqpPK/mSWq1z49Zx+qzqhWpooMe7n5sWe9YZXdOnmuo9n0VbdccXlYabRCtu97+zNBXYGwFcCKAXwCiAKwEcAvAFAH0AjgDYrao/X27/BwB+GcApAL+lqt+Oew5ek7CRVw0vJGmP0aJPJg2ss7axBNTdtfc53LF3FBet7sebVp9pfj4A5qRWF19dbh2+vn373fzAftz60HO4+NwzqxaIuOez6KfWNNxWJMNmTW4uXKcNFwmSBmn/sSe9FyBkrUkXPesHrbsopD2eIn1A5wFGhROSkLRVRktaaEid1kdSpden4fo00ifGjk/rptfcPYJV/d146tCLqY2naPpp0eCRBCFl0lYZrWmhLkl1Wh9RGmlXJ3ByIrr2uMcbIeR4iqif5oEiKrCEtJy0VUZrWqhLUp3WR7RGWr+GuMcbIeR42l0/zQM83URImbRVRmtaqEtSndZHtEZaXxmNe7wRQo6n3fXTPMAjCULKpK0yWtNCQ+m0PqI00u2b19WtYfvmdd79fNtc3XRVf3dq42l3/TQP8JoEIQ60m2g3tTu0m2Y4/EOz4/uAs34JfdL7HXz9h/rie+trONA/v+ZeC0sNC7u7sLJ/PhZ2v3zjm6+vFX3zMHtWZ93TPta5cfHdc2HZz3KfCamFi0SbQY3Qjk8FdZNVo0ia5hrytXDbbRlcjB3D8eps0vG5qm4zdVn7cqlMq73m7pGm0mqtr/VMh6eb2ghqhHaiVNDKZNUokqa5hnwtfO1ckr6GVlU3aV1zZgkAaThtNXRareW1bleowM5QqBHaiVJBLcmmSdNcQ74WvnYuSV9Dq6qbtK5O6UBnR3Wek6XW0Gm1WaTYtgNcJNoIaoR2olRQS7Jp0jTXkK+Fr51L0tcwStV1k1WT1jWhk2X1t7FaQ6fVZpFi2w5wkWgjqBHaiVJBLacfkqa5hnwtfO0s6qwFq6qbtK5tm9YmSlsNnVY7U081NQqvSbQhtJvsNGO8JLWbQvXtaxfyNbSous3UlbTWkGm1MxUqsDOcpDplKA0zipAfmKH+4H36ZsgPtMPHT+Lx0aPo7upMZW7d/n3P5/tQtYzR15fvPWJRUn37JZ2bBXO7sHTRXCyY2xXfuALfa03i4SJBMiGkDupLIw2lM1rVT4vCmVS7tM6V2//Z/d14bPR41fO5iaxDAz3YNLgkdjyvXbYQ95X3q1d71koqVdbsiTzdJCJfQJ1QGVX9SFpFWeHppmIQUgeNSiMNoTP61c9aXdOigybVLq1zFdW/ha7ODpycaCyJ1ld71koqVdZwhFJghwHsqvMfISZC6qBRaaQhdEav+unRNS06aFLt0jpXzY23ejGwJNH6njNrJZUqa2uod7rpzar6iyLym6r6J5lVRNqOkDpoVBppCJ3Rq37qJKDVi4QvudUdT1Lt0jpXzY1XUFm/JYnW95wbV/bimrtHatqlpaRSZW0N9f75sF5ElgH4ZRFZKCKLKv/LqkBSfELqoFFppCFON/jVz1pd06KDJtUurXPl639Vf3fN8/mU0e2b48fj7uerPWsllSpra6h3TeIjAD4E4FUA/hOlf35Moar6qvTLqw+vSRSLPNpN1udLajwlrdM6V27/lkRW63istWetpFJlbZ4gCqyqXgPgGhH5M1X9ULDqyIwgacKnD8t+ISOz940dm/7Qq1e7285Xgy811ULaOrJPP3W3WWsfXFF79GB5PZLODVXWbDHdTCcibwCwUlX/r4j0ApivqvtSry4GHknkk7QTZV0N8g0DPRh+6nDDz+fTKSuVUQCRSmel+gmUTvU8dejFIEmnVtz6V/V349E6CuzUeFb0dteMG4ogcwqkm5BLwtDIkUTsIiEiVwIYBLBKVc8WkZ8E8Deq+vrmS20OLhL5I+1EWYv6aXm+RhRSV+mMUj8rSZp0aqUZBTYJ1trTTMgl4QidAvsOAJcAOA4AqvoMAB7rES9pJ8padEfL8zWiTbpKZ5ziCSRPOrWStfZprT3NhFzSGiyLxEktHW4oAIhId0x7MoNJO1HWojtanq8RbdJVOuMUTyB50qmVrLVPa+1pJuSS1mBZJHaIyBcBLBCRDwK4E8CX0i2LFJW0E2V9GuTQQE/DzxelU1qUTp/6uaq/O0jSqZVmFFjfuEPMKZBuQi5pDdYL128BcBFKGuztqvodwz7XA7gYwEFVPae8bRGAvwawHMCTALao6mERuRDAtwBMXQz/W1W9Ou45eE0iv6SdKOtaSSHtJqvS6bYLGQyYtH7reHztQs0pkG5CLmmeNFJg9wCYehUfNO7zFQDXArihYtvHAdylqn8kIh8v/35F+bH7VPViY9+Foeh/CCGjqC0evrV/V4P0KZ2Wvnw65Yq+eZg9qzP29IfbzleDT6e13LeQ9N6QIy+cxNOHXsDynrl1a/eN21VSfRpuyHsU0tZ8SRgsdtMWANsA3IPSkcQQgN9V1ZtiOxdZDuDWiiOJRwFcqKrPishZAO5R1VXlI4mPNbpI5P1IouiaX80X2q9fjB27DsSOx5Kk6ksZXb9sUaL58qmsSfuyvmZuu8FlC6tU0yj9VIGabW4qq2+efcm3rrZ65hldeO5HJ2Ofz6f0WsZtTWAt+vt+JhBagX0QwFtU9WD59z4Ad6rqWkMhy1G9SBxR1QUVjx9W1YXlReKbAA4AeAalBeORuP7zvEgUXfOzfKG9bzzWJFUfc2Z1NKyMRqmgXZ2CkxPRKa0+rK+ZZW4aYXYHUK8rdyzN4iq9lnFbE1iL/r6fKYRWYDumFogy48b9GuEBAMvKC88XANwc1VBELheRYREZHhsbC1xGOIqu+Vm+0N43HmuSqg+3RTMqq0jj+qn1NbPMTWPUnxt3LM3iKryWcVsTWIv+vie1WN7p3xaR20XkfSLyPgD/D8BtCZ9vtHyaCeX/HwQAVf2Rqh4r/3wbgNnlO7trUNXrVHVQVQf7+voSlpE+Rdf8LF9o7xtPVJKqq4P6cFs0o7K6R8hJU2etY2yO+nMT+iuGXYXXMm5rAmvR3/ekFssioQC+CGANgLUArmvi+W4B8N7yz+9FyWiCiJwp5X8uicj55brGvT0UhKJrfpYvtPeNx5qk6ksZTaKMRqms2zfXT2m1jtk6Rt94fPqpb9tnt6yrO8/bN681aatnnVGdgRT1fK6xZRm3NYG16O97UovlmsQDqnqes22Pqq6J2e9GABcC6AUwCuBKlE4j7QCwFMDTADar6iER+TBKibOnALwI4LdV9V/iis/zNYkpZqrdlDRlNKTKGjJ11tLOqp+GtJvcbdbnSzpuq91U9Pd9uxNEgRWRDwH4NQCvEpE9FQ/NB/DPcR2r6rsjHnqTp+21KOmybUczml/aH3IWLF9ob9nPh0/DtPbvjjFpX83Mldu/Tz/1JaT6FFtXn7XOw1Pjx3H/k4ewcO5sDPTP9/a9YG4Xli6aiwVzXz7SuGvvc7hj7yguWt2PN60+0zsea6ptyMRfLi75o973SbwCwEIAn0LpfoYpjqrqoQxqi6UIRxJJSaoR5lU/DFlXqL4sqq61bzcVthHV1Ke3umqpTz/d+cQ4HqtIfT3rjC4cfvFUbN/ufqv6u3H7Ry9MNDchk27z+t5tR4IqsHmmXReJpBphXvXDkHWF6suq6lr6jkqFtaimUXprpVqaNPG1qxM4OWFr+xdb108fUVjnZs6sDgCKE6caU4195PW9266EVmBJxiTVCPOqH4asK1RfVlXX0ndUKqxFNY3SWyvV0qSJr9LAn/cde0enf7bOTWeHoFPy9bqS8HCRyCFJNcK86och6wrVl1XVtfQdlQprUU2jjuQr1dKkia8Ku6Z70er+6Z+tczMxqZjQfL2uJDxcJHJIUo0wr/phyLpC9WVVdS19+1JhrapplN5aeRE+Sj91U1/POqPL6Xudab9V/d3Tp5oamZttm9Zg26bGVWMfeX3vEl6TyDV5sJtCErKuUH2FTG5tRjW1qKW+Nq6lZO3bZzclnZs8vq6kPmmkwJIWkFQjzEO6ZkgtMmQNFqx1uouCTzW11uCqpb4Pdp9+uqynG69dvgjLeqK/C8yn5q5buhC980+rezrHl2Drm5uQr2se3rukGh5JkOCkrTJa+g/VJgpXeXUTWKOSaC0aqS9R1teXq7cODfTg/qcOV7XZMbw/NnXWN26r0kuKCRVY0jLSVhkt/YdqE0WU8uriptpaNVIfrs5q0Vuj0mXd7e64rUovKS5UYEnLSFtltPQfqk0UUcqriyu3WjVSf18ddX/37xSVHltf87UqvWRmwEWCBCVtldHSf6g2UUQpry7usYFVI/X3NVn3d/9OUUcn9TVfq9JLZgZcJEhQ0lYZLf2HahOFT3l1E1h9qbZWjdSXKLt987pYvXVooKeqzWe3rDOlzrrjtiq9ZGbAaxIkFdJWGS39h2oThWs3WZNoQybkuu18bSyps5bxkfaBCixJDeuHakiV0feBaUksdZNVrXUmjR33Kao+fHW5uqk11dZ9Tl8bXzKs5fXxJdhao8JdeP9DceEiQcy0IqXTl37qJqQmTVa1Pp9PP61US6+5ewSr+rvx1KEXY3VXX11PjB2v6suqm6at+VrmxjKnTHctNjzdREy0IqUzKv20MiE1abJqI8/nqqxR/VfvU6u7NpLKGqebpq35ulhei6R1kuyhAkuC04qUzqj008rtSZNVG3nc7c3yzyqf7tpIKmucbpq25utieS2S1knyDU83EROtSOmMSj+t3J40WbWRx93eou4+qGRCJwGCtc6ZAAARbElEQVStbtlIKmucbpq25utieS2S1knyDY8kiIlWpHRGpZ9Wnt5ImqzayPO52ur2zWtrFNFV/d2xumtUKmsS3TRtzdc6N3FzynTX4sNrEqQhWmGpWIyapMmq1uezqKVW3dXXf1LdNG3N14V2U3tABbaF5PGPoZl4Z7ddM2qr25e1Bp8OaqnLp6QmrcHXv6uIWhNSfeNxE2RDLnohdWTLa+HbxnTX4sJFIiB5VP2sX2jvqzPkeNy+tgwuxo7hA4n6Tqp+uuPesn4xduw6EESdbQZXLX3DQA+GnTRXX10+NTfr91sz7y9SDHi6KRB5VP38X2hv+/L6kOPx9eVi7Tup+mlJW41SVOM0z2aIUkur6/Irt+72rN9vzby/SGuhAtsC8qj6eb/Q3vjl9SHH4+vLxdp3UvXTkrYapajGaZ7NYOk7Sul1t2f9fmvm/UWKA083BSKPqp/3C+0nFXEpoFH7hvySexdr30nVT9+4XaIU1TjNsxksfUcd7bvbs36/NfP+IsWBRxKByKPq56vJ+uX1Icfj62vrhqWJ+k6qfvrG7dYQpaimdaoJ8KulbpprlNK7fXP865gmzby/SHHgNYnAtLvdFLKOZvpOqn5aakhqETWDJc01afBg2jTz/iKtITcKrIhcD+BiAAdV9ZzytkUA/hrAcgBPAtiiqoeldIL1TwC8DcALAN6nqg+kWV8apJ1+mmZNlrRVH9YPCbcva12WvqxzZanBp85mfW+DD59+akm6bQVUXtuHtK9JfAXAtQBuqNj2cQB3qeoficjHy79fAeC/AFhZ/u9nAPxZ+f8zkqSJmxZCpqamrUBadFdrcqulBuvchExudfv3KbC+2tN8j1jJo/ZNwpL66SYRWQ7g1oojiUcBXKiqz4rIWQDuUdVVIvLF8s83uu2i+s7j6aYQJE3ctBClLVamlVqfL20F0qK7Rs2Vq7NaamgkUdZHkuTWqLmvxFd7mu8RK3nUvomNvCuw/VMf/OX//0R5+ysB7K9od6C8rQoRuVxEhkVkeGxsLPViW0HSxE0LFh3V+nxpK5AW3TU6ubXxGhpJlPWRJLnVgq/2NN8jVvKofZPw5Mlu8v011vwTTlWvU9VBVR3s6+vLoKzsSZq4acGio1qfL0qBnND01Fm3r+jk1sZraCRR1keS5FYLvtrTfI9YyaP2TcLTikVitHyaCeX/HyxvPwBgSUW7xQCeybi2XJA0cdNClLYYKuEzpAJp0V2j5mr75nUN19BIomyo5Fbf3LsKrK/2NN8jVvKofZPwtOKaxDYA4xUXrhep6u+JyH8F8GGU7KafAXCNqp5fr+92vSYxRZoqZkj1M20F0tJXSD20FcmtFgXWOu6soe5aPPKkwN4I4EIAvSJyAMCVAP4IwA4ReT+ApwFsLje/DaUFYgQlBfaX0qytUVrhqPuUxyQ0k7Zqwac7+tRMt45mPuDcfX3aalJ916rFruibh9mzOqdPrzSTKOv272uT9YdxM+Mh7UOqi4SqvjvioTd52iqAX0+znqSEVCyzphUpnZb5Gly2cFohnWrj0zctyujQQA/uNyijlr6tabiWRNm0U23TTKyl2kqm4B3XMUSphq66mEf1z69cCgBJrXarkurD1TeTKqlJdVdrGq4lUTbtVNs0E2uptrY/eVdgC0W0YllNHtU/r6IqHejsSC891KqkWvZNqqQm1V2tabiWRNm0U23TTKyl2koqYQpsDNGKZTV5VP+8iqpOAlr9QRuydquSatk3qZKaVHe1puFaEmXTTrVNM7GWaiuphEcSMUSphu6X3OdR/fMrl2tTrd2qpLoKqU/ftCqpFmXUxapvJk2UTTvVNs3EWqqtpBJekzCS1wROC61I6bTMl9VuCqmMWvq2tstDqm3WmjRpDxq5JsFFghBCZhi8cN0g48dO4MH9RzB+7ESrS2mYtGvPw9z4arDUFapN6H3zMKcWilInSZcZf+G6yD542rXnYW6S3ueR9N6DkPc2hNwva4pSJ0mfGX26qcg+eNq152FukkaRJ733IOS9DSH3y5qi1EmSw9NNRorsg6ddex7mJmkUedJ7D0Le2xByv6wpSp0kG2b06aYi++Bp156HubHeo2C5ryBJm0bqCnlvRqspSp0kG2b0kUSRffC0a8/D3CSNIk9670HIextC7pc1RamTZMOMviYxRZF98LRrz8PcJL3PI1SbRupKc7+sKUqdpHFyExVeFEJGHefhDyvtD8es8b0+ltcsVBsr7RatXZQ6SbpwkQhI1tpgmnpoK8ZTJNy52TK4GDuGk0WFE5JneLopEFlrg0ljwJuJyKYGWcI3Ny6cK5JnqMC2gKy1waQx4M1EZFODLOGbGxfOFWkXeLopEFlrg0ljwJuJyKYGWcI3Ny6cK9Iu8EgiEFlrg0ljwJuJyKYGWcI3N0mjwgnJO7wmEZg8fFl9SLupSBZU1oSMCickS6jANkjIP+40tUFfnUn1UCtpjmd43zjuffx5bFzZi8EVPfE75Ax3bqiMknZkxi8SRdE8Q9aZhzG/58s78d2RcQDANXePYGigB1/9wAWZ1kAIiWdGX5MYP3YCV3xzD3780iSOnjiFH780id/75p7c5eeHrDMPYx7eNz69QExx38g4hveNR+xBCGkVM3qRKIrmGbLOPIz53sefb2g7IaR1zOhFoiiaZ8g68zDmjSt7G9pOCGkdM3qRKIrmGbLOPIx5cEUPhgaqL1QPDfQU8uI1Ie1OSxRYEflNAB8EIAC+pKp/LCJXlbeNlZv9vqreVq+fmZYCG7LOPIy56HYTIUWlEQU280VCRM4B8A0A5wM4CeAfAHwIwC8AOKaq26195fE+iayZCVHhWTMTx0xmFnm/T+LVAHaq6gsAICL/BOAdLaij8KStsuZBlc2amThmQurRimsSDwPYKCI9IjIXwNsALCk/9mER2SMi14vIwhbUVhjSVlnzoMpmzUwcMyFxZL5IqOr3AXwawHdQOtX0IIBTAP4MwE8BWAfgWQCf9e0vIpeLyLCIDI+NjfmazAjSVlnzoMpmzUwcMyFxtMRuUtW/UNXzVHUjgEMAHlfVUVWdUNVJAF9C6ZqFb9/rVHVQVQf7+vqyLDtXpK2y5kGVzZqZOGZC4mjJIiEiP1H+/1IA7wRwo4icVdHkHSidliIRpK2y5kGVzZqZOGZC4miVAnsfgB4ALwH4bVW9S0S+itKpJgXwJIBfUdVn6/VDu4l2UxrMxDGTmUXe7Sao6pBn2y+2opY8Y/mwSjt5NA/Jpll/aOdhzITkhRmfAptXqGKW4DwQ0lpmdCxHXqGKWYLzQEjr4SKRQ6hiluA8ENJ6uEjkEKqYJTgPhLQeLhI5hCpmCc4DIa2nJQpsKNpdgaWKWYLzQEhYcq/AEhuhVMyR0aPYvf8I1i1ZgIH++QEqyxYqqYS0Di4Sbc4nb34IN+x8evr3rRuW4upLz21hRYSQIsFrEm3MyOjRqgUCAG743tMYGT3aoooIIUWDi0Qbs3v/kYa2E0KICxeJNmbdkgUNbSeEEBcuEm3MQP98bN2wtGrb1g1LC3nxmhDSGnjhus25+tJzsfWC5YW2mwghrYOLxAxgoH9+qosD72MgpH3hIkGagimthLQ3vCZBEsOUVkLaHy4SJDFMaSWk/eEiQRLDlFZC2h8uEiQxTGklpP3hhWvSFJeseyVeP9BLu4mQNoWLBGkaprQS0r7wdBMhhJBIuEgQQgiJhIsEIYSQSLhIEEIIiYSLBCGEkEhEVVtdQ2JEZAzAU62uIyG9AJ5vdREJKXLtQLHrZ+2to8j1u7UvU9U+y46FXiSKjIgMq+pgq+tIQpFrB4pdP2tvHUWuv5naebqJEEJIJFwkCCGERMJFonVc1+oCmqDItQPFrp+1t44i15+4dl6TIIQQEgmPJAghhETCRSJlROStIvKoiIyIyMfrtNskIioiubEn4moXkfeJyJiI7C7/94FW1OnDMu8iskVE9orIIyLy9axrrIdh7j9fMe+PiciRVtTpw1D7UhH5RxH5dxHZIyJva0WdPgy1LxORu8p13yMii1tRpw8RuV5EDorIwxGPi4hcUx7bHhE5z9SxqvK/lP4D0AngBwBeBaALwIMAVnvazQdwL4CdAAZbXbe1dgDvA3Btq2tNWPtKAP8OYGH5959odd2Nvm8q2v8GgOtbXXcDc38dgA+Vf14N4MlW191A7X8D4L3ln38OwFdbXXdFbRsBnAfg4YjH3wbg2wAEwAUA/tXSL48k0uV8ACOq+oSqngTwDQCXetr9DwCfAfDjLIuLwVp7HrHU/kEA/0dVDwOAqh7MuMZ6NDr37wZwYyaVxWOpXQGcUf75FQCeybC+elhqXw3grvLP/+h5vGWo6r0ADtVpcimAG7TETgALROSsuH65SKTLKwHsr/j9QHnbNCLyGgBLVPXWLAszEFt7mcvKh643iciSbEqLxVL72QDOFpF/FpGdIvLWzKqLxzr3EJFlAFYAuDuDuixYar8KwHtE5ACA21A6EsoDltofBHBZ+ed3AJgvIj0Z1BYC8/uqEi4S6SKebdM6mYh0APg8gN/JrCI7dWsv8/cAlqvqGgB3AvjL1KuyYal9FkqnnC5E6V/iXxaRBSnXZcVS/xTvAnCTqk6kWE8jWGp/N4CvqOpilE6BfLX8t9BqLLV/DMDPisi/A/hZAP8J4FTahQWikffVNHl4YdqZAwAq/3W9GNWH1vMBnAPgHhF5EqXzhLfk5OJ1XO1Q1XFVPVH+9UsA1mdUWxyxtZfbfEtVX1LVfQAeRWnRyAOW+qd4F/Jzqgmw1f5+ADsAQFW/B+A0lLKFWo3lPf+Mqr5TVV8D4A/K236YXYlN0cj7ahouEulyP4CVIrJCRLpQ+oO+ZepBVf2hqvaq6nJVXY7ShetLVHW4NeVWUbd2AHDOZ14C4PsZ1leP2NoB3AzgjQAgIr0onX56ItMqo7HUDxFZBWAhgO9lXF89LLU/DeBNACAir0ZpkRjLtEo/lvd8b8VRzycAXJ9xjc1wC4CtZcvpAgA/VNVn43bid1yniKqeEpEPA7gdJXPielV9RESuBjCsqjV/+HnBWPtHROQSlA63D6FkO7UcY+23A7hIRPYCmADwu6o63rqqX6aB9827AXxDy+pKHjDW/jsAviQiH0XpdMf78jAGY+0XAviUiChKRuKvt6xgBxG5EaX6esvXe64EMBsAVPXPUbr+8zYAIwBeAPBLpn5z8NoQQgjJKTzdRAghJBIuEoQQQiLhIkEIISQSLhKEEEIi4SJBCCEkEi4ShHgQkTNF5Bsi8oNyUuxtInK5iOQtPoWQVOEiQYiDiAiAvwNwj6r+lKquBvD7APpbWxkh2cNFgpBa3gjgpfINSAAAVd0N4D4A88phhv8hIn9VXlAgIp8UkftF5GERua5i+z0i8mkR+bfy9z4Mlbd3ish2EXmoHJD4G+Xt60Xkn0Rkl4jcbknpJCRNuEgQUss5AHZFPPYaAL+FUmT0qwC8vrz9WlV9raqeA+B0ABdX7DNLVc8v73dledvlKKW3vqYckPhXIjIbwBcAbFLV9ShFPvyvcMMipHEYy0FIY/ybqh4AABHZDWA5gO8CeKOI/B6AuQAWAXgEpZRcAPjb8v93ldsDwJsB/LmqngIAVT0kIuegtEB9p3wg0gkgNluHkDThIkFILY8A2BTx2ImKnycAzBKR0wD8KUrfKrhfRK5CKbTO3WcCL//NCWpjmgXAI6q6oYnaCQkKTzcRUsvdAOaIyAenNojIa1H6/gAfUwvC8yIyD9ELTCV3APhVEZlV7n8RSnHlfSKyobxttoj8dMIxEBIELhKEOJQTSd8B4C1lBfYRlL5NzZu9r6pHUPo+jYdQiiC/3/A0X0YpMnuPiDwI4L+XvzJzE4BPl7ftBvC6JodDSFMwBZYQQkgkPJIghBASCRcJQgghkXCRIIQQEgkXCUIIIZFwkSCEEBIJFwlCCCGRcJEghBASCRcJQgghkfx/Pv9qtPCzyrgAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "for i in range(1,7):\n", + " df.plot(kind='scatter', x='chance', y=df.columns[i]) \n", + " plt.xlabel(\"Chance\")\n", + " plt.ylabel(df.columns[i])\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Pandas/Pandas.pptx b/Pandas/Pandas.pptx new file mode 100644 index 00000000..0812a218 Binary files /dev/null and b/Pandas/Pandas.pptx differ diff --git a/Pandas/Python Pandas Input-Output.ipynb b/Pandas/Python Pandas Input-Output.ipynb new file mode 100644 index 00000000..4fc3f862 --- /dev/null +++ b/Pandas/Python Pandas Input-Output.ipynb @@ -0,0 +1,762 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python Pandas I/O\n", + "### Creating DataFrames, Reading and Writing to CSV & JSON files \n", + "[Documentation](https://pandas.pydata.org/docs/index.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import random" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating DataFrames from Lists and Dicts\n", + "▶ New DataFrame from a **List** \n", + "Pandas automatically assigns numerical row indexes." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
00.027684
10.174996
20.743153
30.628041
40.658552
\n", + "
" + ], + "text/plain": [ + " 0\n", + "0 0.027684\n", + "1 0.174996\n", + "2 0.743153\n", + "3 0.628041\n", + "4 0.658552" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data1 = [random.random() for i in range(10000)]\n", + "df = pd.DataFrame(data1)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "▶ New DataFrame from a **2D List** \n", + "Column names default to integers. Each subList is a row." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
0A28
1B78
2C85
3D65
4E98
\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 A 28\n", + "1 B 78\n", + "2 C 85\n", + "3 D 65\n", + "4 E 98" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data2 = [[i, random.randint(10,99)] for i in 'ABCDEFGHIJKLMNOPQRSTUVWXYZ']\n", + "df = pd.DataFrame(data2)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "▶ New DataFrame from a **Dictionary** \n", + "Dict Keys become column names" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ModelPriceSize
0T571.4257 inches
1T611.4861 inches
2T641.7364 inches
3T651.9565 inches
\n", + "
" + ], + "text/plain": [ + " Model Price Size\n", + "0 T57 1.42 57 inches\n", + "1 T61 1.48 61 inches\n", + "2 T64 1.73 64 inches\n", + "3 T65 1.95 65 inches" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data3 = {\n", + " 'Model':['T57','T61','T64','T65'],\n", + " 'Price':[1.42,1.48,1.73,1.95],\n", + " 'Size':['57 inches','61 inches','64 inches','65 inches']}\n", + "df = pd.DataFrame(data3)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Change previous example to use Model number as index. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PriceSize
T571.4257 inches
T611.4861 inches
T641.7364 inches
T651.9565 inches
\n", + "
" + ], + "text/plain": [ + " Price Size\n", + "T57 1.42 57 inches\n", + "T61 1.48 61 inches\n", + "T64 1.73 64 inches\n", + "T65 1.95 65 inches" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(\n", + " {'Price':data3['Price'],'Size':data3['Size']}, \n", + " index=data3['Model'])\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "▶ New DataFrame from a List of Dictionaries \n", + "Note, missing Length is populated with NaN (not a number)." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HtLenWt
06345.02.6
129NaN1.7
23771.04.2
\n", + "
" + ], + "text/plain": [ + " Ht Len Wt\n", + "0 63 45.0 2.6\n", + "1 29 NaN 1.7\n", + "2 37 71.0 4.2" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data4 = [\n", + " {'Ht':63, 'Len':45, 'Wt':2.6}, \n", + " {'Ht':29, 'Wt':1.7},\n", + " {'Ht':37, 'Len':71, 'Wt':4.2}]\n", + "df = pd.DataFrame(data4)\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reading & Writing DataFrames to CSV Files\n", + "[Documentation](https://pandas.pydata.org/docs/user_guide/io.html#csv-text-files) of numerous optional parameters. \n", + "\n", + "▶ Write DataFrame to CSV file " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.DataFrame(data4)\n", + "df.to_csv('outfile.csv', index=False) #, sep=';')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "▶ Read CSV file into DataFrame \n", + "Missing numerical data are given value NaN by default." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HtLenWt
06345.02.6
129NaN1.7
23771.04.2
\n", + "
" + ], + "text/plain": [ + " Ht Len Wt\n", + "0 63 45.0 2.6\n", + "1 29 NaN 1.7\n", + "2 37 71.0 4.2" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('outfile.csv')\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "▶ Convert DataFrame to_string" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "' Ht Len Wt\\n0 63 45.0 2.6\\n1 29 NaN 1.7\\n2 37 71.0 4.2'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(data4)\n", + "d4str = df.to_string()\n", + "d4str" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Reading & Writing DataFrames to JSON files\n", + "[Documentation](https://pandas.pydata.org/docs/user_guide/io.html#csv-text-files) of numerous optional parameters. \n", + "\n", + "▶ Convert DataFrame to **JSON** string \n", + "No argument - json by columns is default, {column -> {index -> value}}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'{\"Ht\":{\"0\":63,\"1\":29,\"2\":37},\"Len\":{\"0\":45.0,\"1\":null,\"2\":71.0},\"Wt\":{\"0\":2.6,\"1\":1.7,\"2\":4.2}}'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data4_json = df.to_json()\n", + "data4_json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use orient='index' to structure the json by rows, {index -> {column -> value}}. \n", + "You can also strip out the row indices by using orient='records'." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'{\"0\":{\"Ht\":63,\"Len\":45.0,\"Wt\":2.6},\"1\":{\"Ht\":29,\"Len\":null,\"Wt\":1.7},\"2\":{\"Ht\":37,\"Len\":71.0,\"Wt\":4.2}}'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data4_json = df.to_json(orient='index')\n", + "data4_json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "▶ Write to a text file in JSON format." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "data4_json = df.to_json('outjson.txt')\n", + "data4_json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "▶ Read same JSON data back in to a DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
HtLenWt
06345.02.6
129NaN1.7
23771.04.2
\n", + "
" + ], + "text/plain": [ + " Ht Len Wt\n", + "0 63 45.0 2.6\n", + "1 29 NaN 1.7\n", + "2 37 71.0 4.2" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data4 = pd.read_json('outjson.txt')\n", + "data4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Pandas/Python Pandas Time Series Data.ipynb b/Pandas/Python Pandas Time Series Data.ipynb new file mode 100644 index 00000000..3db1b4b4 --- /dev/null +++ b/Pandas/Python Pandas Time Series Data.ipynb @@ -0,0 +1,856 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python Pandas Time Series Data\n", + "[Documentation](https://pandas.pydata.org/docs/user_guide/timeseries.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import datetime\n", + "import random" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data Formats Supported\n", + "Pandas datetime64 can interpret strings, Python datetime, and Numpy datetime64 objects. \n", + "Also note, a list of pd.datetime64 objects are automatically converted to a DatetimeIndex." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2020-06-01', '2020-06-02', '2020-06-03', '2020-06-04',\n", + " '2020-06-05'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a1 = pd.to_datetime([\n", + " '6/1/2020', \n", + " '6-2-2020',\n", + " datetime.datetime(2020, 6, 3),\n", + " np.datetime64('2020-06-04'),\n", + " np.datetime64('2020-06-05')])\n", + "a1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Pass in a format argument for custom formatted dates (case matters)." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2020-06-14', '2020-06-15'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a2 = pd.to_datetime(['2020/14/06', '2020/15/06'], format='%Y/%d/%m')\n", + "a2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Hours and Minutes too? No problem." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2020-08-06 14:05:00', '2020-09-06 06:45:00'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a3 = pd.to_datetime(\n", + " ['2020/6/8 14.05', '2020/6/9 06.45'], format='%Y/%d/%m %H.%M')\n", + "a3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating a datetime sequence with fixed intervals\n", + "freq parameters: \n", + " D=days, W=weeks, M=months, B=business days, BW=bus weeks, BM=bus months" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "DatetimeIndex(['2020-06-01', '2020-06-02', '2020-06-03', '2020-06-04',\n", + " '2020-06-05', '2020-06-06', '2020-06-07', '2020-06-08',\n", + " '2020-06-09', '2020-06-10', '2020-06-11', '2020-06-12',\n", + " '2020-06-13', '2020-06-14', '2020-06-15', '2020-06-16',\n", + " '2020-06-17', '2020-06-18', '2020-06-19', '2020-06-20',\n", + " '2020-06-21', '2020-06-22', '2020-06-23', '2020-06-24',\n", + " '2020-06-25', '2020-06-26', '2020-06-27', '2020-06-28',\n", + " '2020-06-29', '2020-06-30'],\n", + " dtype='datetime64[ns]', freq='D')\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
M
2020-06-070.970080
2020-06-140.809867
2020-06-180.917428
2020-06-200.945739
2020-06-260.815245
\n", + "
" + ], + "text/plain": [ + " M\n", + "2020-06-07 0.970080\n", + "2020-06-14 0.809867\n", + "2020-06-18 0.917428\n", + "2020-06-20 0.945739\n", + "2020-06-26 0.815245" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b1 = [random.random() for i in range(30)]\n", + "b2 = pd.date_range('2020-06-01', periods=30, freq='1d')\n", + "print(b2)\n", + "df = pd.DataFrame({'M':b1}, index=b2)\n", + "#df.loc['2020-06-18':]\n", + "df[df['M'] > 0.8]" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
2020-07-120.581691
2020-07-190.611492
2020-07-260.933940
\n", + "
" + ], + "text/plain": [ + " 0\n", + "2020-07-12 0.581691\n", + "2020-07-19 0.611492\n", + "2020-07-26 0.933940" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b3 = np.random.rand(52)\n", + "b4 = pd.date_range('2020-06-01', periods=52, freq='W')\n", + "df = pd.DataFrame(b3, index=b4)\n", + "df['2020-07-10':'2020-07-28']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Alternative to periods, you can give start and stop dates." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2020-06-30', '2020-07-31', '2020-08-31', '2020-09-30',\n", + " '2020-10-31', '2020-11-30', '2020-12-31'],\n", + " dtype='datetime64[ns]', freq='M')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "b3 = pd.date_range('2020-06-30', '2020-12-31', freq='M')\n", + "b3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Dates Index to/from CSV file\n", + "Create DataFrame with Dates as Index, Write it to a CSV file, then Read in the CSV data and put the dates as Index" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alphabeta
2020-05-2910.4026
2020-05-308.9226
2020-05-315.0912
2020-06-013.8727
2020-06-023.9324
2020-06-034.7916
2020-06-049.1216
\n", + "
" + ], + "text/plain": [ + " alpha beta\n", + "2020-05-29 10.40 26\n", + "2020-05-30 8.92 26\n", + "2020-05-31 5.09 12\n", + "2020-06-01 3.87 27\n", + "2020-06-02 3.93 24\n", + "2020-06-03 4.79 16\n", + "2020-06-04 9.12 16" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "d1 = np.round(6 + 4 * np.random.randn(7), decimals=2)\n", + "d2 = np.random.randint(12, 30, size=7)\n", + "d3 = pd.Series(pd.date_range('2020-05-29', periods=7, freq='1d'))\n", + "df = pd.DataFrame({'alpha':d1, 'beta':d2}, index=d3)\n", + "\n", + "df.to_csv('file01.csv')\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alphabeta
2020-05-2910.4026
2020-05-308.9226
2020-05-315.0912
\n", + "
" + ], + "text/plain": [ + " alpha beta\n", + "2020-05-29 10.40 26\n", + "2020-05-30 8.92 26\n", + "2020-05-31 5.09 12" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('file01.csv', index_col=0)\n", + "print(type(df.index[2]))\n", + "df.index = pd.to_datetime(df.index, format='%Y/%m/%d')\n", + "print(type(df.index[2]))\n", + "df[:'2020/05/31']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Constructing Dates from Multiple Columns\n", + "You have Month, Day and Year in separate fields, and need to combine them into a single Datetime field." + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2017 12 7 0.970109923902562\n" + ] + } + ], + "source": [ + "yyyy = [random.randint(1995,2020) for i in range(100)]\n", + "mm = [random.randint(1,12) for i in range(100)]\n", + "dd = [random.randint(1,28) for i in range(100)]\n", + "data = [random.random() for i in range(100)]\n", + "print(yyyy[5], mm[5], dd[5], data[5])" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
01
02016-10-180.282307
12007-09-090.004984
22016-12-120.652762
32017-04-140.199284
42013-03-230.163154
\n", + "
" + ], + "text/plain": [ + " 0 1\n", + "0 2016-10-18 0.282307\n", + "1 2007-09-09 0.004984\n", + "2 2016-12-12 0.652762\n", + "3 2017-04-14 0.199284\n", + "4 2013-03-23 0.163154" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df1 = pd.DataFrame({'year': yyyy,'month': mm, 'day': dd})\n", + "df1 = pd.to_datetime(df1) \n", + "df2 = pd.Series(data)\n", + "df = pd.concat([df1, df2], axis=1)\n", + "df[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pivot (Transpose) Rows & Columns\n", + "You normally want dates as the row index, not the column headers. \n", + "Flip the rows and columns using T." + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
2016-10-18 00:00:000.282307
2007-09-09 00:00:000.004984
2016-12-12 00:00:000.652762
2017-04-14 00:00:000.199284
2013-03-23 00:00:000.163154
\n", + "
" + ], + "text/plain": [ + " 0\n", + "2016-10-18 00:00:00 0.282307\n", + "2007-09-09 00:00:00 0.004984\n", + "2016-12-12 00:00:00 0.652762\n", + "2017-04-14 00:00:00 0.199284\n", + "2013-03-23 00:00:00 0.163154" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('pivot.csv')\n", + "df = df.T\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Date Arithmetic" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Thursday'" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "appointment = pd.Timestamp('2020-06-04')\n", + "appointment.day_name()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Uh oh! my appointment is delayed 2 days. \n", + "Here are 3 different ways to add 2 days to the date." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Saturday'" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "appointment = pd.Timestamp('2020-06-04')\n", + "appointment += pd.Timedelta('2 days')\n", + "appointment.day_name()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Saturday'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "appointment = pd.Timestamp('2020-06-04')\n", + "appointment += pd.Timedelta(days=2)\n", + "appointment.day_name()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Date offsets: Day, Hour, Minute, Second, Milli, Micro, Nano " + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Saturday'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "appointment = pd.Timestamp('2020-06-04')\n", + "appointment += pd.offsets.Day(2)\n", + "appointment.day_name()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "NO, it's delayed 2 business days. " + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Monday'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "appointment = pd.Timestamp('2020-06-04')\n", + "appointment += pd.offsets.BDay(2)\n", + "appointment.day_name()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Pandas/file01.csv b/Pandas/file01.csv new file mode 100644 index 00000000..229e74ef --- /dev/null +++ b/Pandas/file01.csv @@ -0,0 +1,8 @@ +,alpha,beta +2020-05-29,8.78,24 +2020-05-30,13.0,25 +2020-05-31,0.44,25 +2020-06-01,1.94,28 +2020-06-02,5.4,20 +2020-06-03,5.68,21 +2020-06-04,2.64,16 diff --git a/Pandas/iris.data b/Pandas/iris.data new file mode 100644 index 00000000..5c4316cd --- /dev/null +++ b/Pandas/iris.data @@ -0,0 +1,151 @@ +5.1,3.5,1.4,0.2,Iris-setosa +4.9,3.0,1.4,0.2,Iris-setosa +4.7,3.2,1.3,0.2,Iris-setosa +4.6,3.1,1.5,0.2,Iris-setosa +5.0,3.6,1.4,0.2,Iris-setosa +5.4,3.9,1.7,0.4,Iris-setosa +4.6,3.4,1.4,0.3,Iris-setosa +5.0,3.4,1.5,0.2,Iris-setosa +4.4,2.9,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.4,3.7,1.5,0.2,Iris-setosa +4.8,3.4,1.6,0.2,Iris-setosa +4.8,3.0,1.4,0.1,Iris-setosa +4.3,3.0,1.1,0.1,Iris-setosa +5.8,4.0,1.2,0.2,Iris-setosa +5.7,4.4,1.5,0.4,Iris-setosa +5.4,3.9,1.3,0.4,Iris-setosa +5.1,3.5,1.4,0.3,Iris-setosa +5.7,3.8,1.7,0.3,Iris-setosa +5.1,3.8,1.5,0.3,Iris-setosa +5.4,3.4,1.7,0.2,Iris-setosa +5.1,3.7,1.5,0.4,Iris-setosa +4.6,3.6,1.0,0.2,Iris-setosa +5.1,3.3,1.7,0.5,Iris-setosa +4.8,3.4,1.9,0.2,Iris-setosa +5.0,3.0,1.6,0.2,Iris-setosa +5.0,3.4,1.6,0.4,Iris-setosa +5.2,3.5,1.5,0.2,Iris-setosa +5.2,3.4,1.4,0.2,Iris-setosa +4.7,3.2,1.6,0.2,Iris-setosa +4.8,3.1,1.6,0.2,Iris-setosa +5.4,3.4,1.5,0.4,Iris-setosa +5.2,4.1,1.5,0.1,Iris-setosa +5.5,4.2,1.4,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +5.0,3.2,1.2,0.2,Iris-setosa +5.5,3.5,1.3,0.2,Iris-setosa +4.9,3.1,1.5,0.1,Iris-setosa +4.4,3.0,1.3,0.2,Iris-setosa +5.1,3.4,1.5,0.2,Iris-setosa +5.0,3.5,1.3,0.3,Iris-setosa +4.5,2.3,1.3,0.3,Iris-setosa +4.4,3.2,1.3,0.2,Iris-setosa +5.0,3.5,1.6,0.6,Iris-setosa +5.1,3.8,1.9,0.4,Iris-setosa +4.8,3.0,1.4,0.3,Iris-setosa +5.1,3.8,1.6,0.2,Iris-setosa +4.6,3.2,1.4,0.2,Iris-setosa +5.3,3.7,1.5,0.2,Iris-setosa +5.0,3.3,1.4,0.2,Iris-setosa +7.0,3.2,4.7,1.4,Iris-versicolor +6.4,3.2,4.5,1.5,Iris-versicolor +6.9,3.1,4.9,1.5,Iris-versicolor +5.5,2.3,4.0,1.3,Iris-versicolor +6.5,2.8,4.6,1.5,Iris-versicolor +5.7,2.8,4.5,1.3,Iris-versicolor +6.3,3.3,4.7,1.6,Iris-versicolor +4.9,2.4,3.3,1.0,Iris-versicolor +6.6,2.9,4.6,1.3,Iris-versicolor +5.2,2.7,3.9,1.4,Iris-versicolor +5.0,2.0,3.5,1.0,Iris-versicolor +5.9,3.0,4.2,1.5,Iris-versicolor +6.0,2.2,4.0,1.0,Iris-versicolor +6.1,2.9,4.7,1.4,Iris-versicolor +5.6,2.9,3.6,1.3,Iris-versicolor +6.7,3.1,4.4,1.4,Iris-versicolor +5.6,3.0,4.5,1.5,Iris-versicolor +5.8,2.7,4.1,1.0,Iris-versicolor +6.2,2.2,4.5,1.5,Iris-versicolor +5.6,2.5,3.9,1.1,Iris-versicolor +5.9,3.2,4.8,1.8,Iris-versicolor +6.1,2.8,4.0,1.3,Iris-versicolor +6.3,2.5,4.9,1.5,Iris-versicolor +6.1,2.8,4.7,1.2,Iris-versicolor +6.4,2.9,4.3,1.3,Iris-versicolor +6.6,3.0,4.4,1.4,Iris-versicolor +6.8,2.8,4.8,1.4,Iris-versicolor +6.7,3.0,5.0,1.7,Iris-versicolor +6.0,2.9,4.5,1.5,Iris-versicolor +5.7,2.6,3.5,1.0,Iris-versicolor +5.5,2.4,3.8,1.1,Iris-versicolor +5.5,2.4,3.7,1.0,Iris-versicolor +5.8,2.7,3.9,1.2,Iris-versicolor +6.0,2.7,5.1,1.6,Iris-versicolor +5.4,3.0,4.5,1.5,Iris-versicolor +6.0,3.4,4.5,1.6,Iris-versicolor +6.7,3.1,4.7,1.5,Iris-versicolor +6.3,2.3,4.4,1.3,Iris-versicolor +5.6,3.0,4.1,1.3,Iris-versicolor +5.5,2.5,4.0,1.3,Iris-versicolor +5.5,2.6,4.4,1.2,Iris-versicolor +6.1,3.0,4.6,1.4,Iris-versicolor +5.8,2.6,4.0,1.2,Iris-versicolor +5.0,2.3,3.3,1.0,Iris-versicolor +5.6,2.7,4.2,1.3,Iris-versicolor +5.7,3.0,4.2,1.2,Iris-versicolor +5.7,2.9,4.2,1.3,Iris-versicolor +6.2,2.9,4.3,1.3,Iris-versicolor +5.1,2.5,3.0,1.1,Iris-versicolor +5.7,2.8,4.1,1.3,Iris-versicolor +6.3,3.3,6.0,2.5,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +7.1,3.0,5.9,2.1,Iris-virginica +6.3,2.9,5.6,1.8,Iris-virginica +6.5,3.0,5.8,2.2,Iris-virginica +7.6,3.0,6.6,2.1,Iris-virginica +4.9,2.5,4.5,1.7,Iris-virginica +7.3,2.9,6.3,1.8,Iris-virginica +6.7,2.5,5.8,1.8,Iris-virginica +7.2,3.6,6.1,2.5,Iris-virginica +6.5,3.2,5.1,2.0,Iris-virginica +6.4,2.7,5.3,1.9,Iris-virginica +6.8,3.0,5.5,2.1,Iris-virginica +5.7,2.5,5.0,2.0,Iris-virginica +5.8,2.8,5.1,2.4,Iris-virginica +6.4,3.2,5.3,2.3,Iris-virginica +6.5,3.0,5.5,1.8,Iris-virginica +7.7,3.8,6.7,2.2,Iris-virginica +7.7,2.6,6.9,2.3,Iris-virginica +6.0,2.2,5.0,1.5,Iris-virginica +6.9,3.2,5.7,2.3,Iris-virginica +5.6,2.8,4.9,2.0,Iris-virginica +7.7,2.8,6.7,2.0,Iris-virginica +6.3,2.7,4.9,1.8,Iris-virginica +6.7,3.3,5.7,2.1,Iris-virginica +7.2,3.2,6.0,1.8,Iris-virginica +6.2,2.8,4.8,1.8,Iris-virginica +6.1,3.0,4.9,1.8,Iris-virginica +6.4,2.8,5.6,2.1,Iris-virginica +7.2,3.0,5.8,1.6,Iris-virginica +7.4,2.8,6.1,1.9,Iris-virginica +7.9,3.8,6.4,2.0,Iris-virginica +6.4,2.8,5.6,2.2,Iris-virginica +6.3,2.8,5.1,1.5,Iris-virginica +6.1,2.6,5.6,1.4,Iris-virginica +7.7,3.0,6.1,2.3,Iris-virginica +6.3,3.4,5.6,2.4,Iris-virginica +6.4,3.1,5.5,1.8,Iris-virginica +6.0,3.0,4.8,1.8,Iris-virginica +6.9,3.1,5.4,2.1,Iris-virginica +6.7,3.1,5.6,2.4,Iris-virginica +6.9,3.1,5.1,2.3,Iris-virginica +5.8,2.7,5.1,1.9,Iris-virginica +6.8,3.2,5.9,2.3,Iris-virginica +6.7,3.3,5.7,2.5,Iris-virginica +6.7,3.0,5.2,2.3,Iris-virginica +6.3,2.5,5.0,1.9,Iris-virginica +6.5,3.0,5.2,2.0,Iris-virginica +6.2,3.4,5.4,2.3,Iris-virginica +5.9,3.0,5.1,1.8,Iris-virginica + diff --git a/Pandas/outfile.csv b/Pandas/outfile.csv new file mode 100644 index 00000000..ba5d7289 --- /dev/null +++ b/Pandas/outfile.csv @@ -0,0 +1,4 @@ +Ht,Len,Wt +63,45.0,2.6 +29,,1.7 +37,71.0,4.2 diff --git a/Pandas/outjson.txt b/Pandas/outjson.txt new file mode 100644 index 00000000..0967acb0 --- /dev/null +++ b/Pandas/outjson.txt @@ -0,0 +1 @@ +{"0":{"Ht":63,"Len":45.0,"Wt":2.6},"1":{"Ht":29,"Len":null,"Wt":1.7},"2":{"Ht":37,"Len":71.0,"Wt":4.2}} \ No newline at end of file diff --git a/Pandas/pandas_weather.py b/Pandas/pandas_weather.py new file mode 100644 index 00000000..39c4f2c4 --- /dev/null +++ b/Pandas/pandas_weather.py @@ -0,0 +1,123 @@ +import numpy as np +import pandas as pd + +def header(msg): + print('-' * 50) + print('[ ' + msg + ' ]') + +# 1. load hard-coded data into a dataframe +header("1. load hard-coded data into a df") +df = pd.DataFrame( + [['Jan',58,42,74,22,2.95], + ['Feb',61,45,78,26,3.02], + ['Mar',65,48,84,25,2.34], + ['Apr',67,50,92,28,1.02], + ['May',71,53,98,35,0.48], + ['Jun',75,56,107,41,0.11], + ['Jul',77,58,105,44,0.0], + ['Aug',77,59,102,43,0.03], + ['Sep',77,57,103,40,0.17], + ['Oct',73,54,96,34,0.81], + ['Nov',64,48,84,30,1.7], + ['Dec',58,42,73,21,2.56]], + index = [0,1,2,3,4,5,6,7,8,9,10,11], + columns = ['month','avg_high','avg_low','record_high','record_low','avg_precipitation']) +print(df) + +# 2. read text file into a dataframe +header("2. read text file into a df") +filename = 'Fremont_weather.txt' +df = pd.read_csv(filename) +print(df) + +# 3. print first 5 or last 3 rows of df +header("3. df.head()") +print(df.head()) +header("3. df.tail(3)") +print(df.tail(3)) + +# 4. get data types, index, columns, values +header("4. df.dtypes") +print(df.dtypes) + +header("4. df.index") +print(df.index) + +header("4. df.columns") +print(df.columns) + +header("4. df.values") +print(df.values) + +# 5. statistical summary of each column +header("5. df.describe()") +print(df.describe()) + +# 6. sort records by any column +header("6. df.sort_values('record_high', ascending=False)") +print (df.sort_values('record_high', ascending=False)) + +# 7. slicing records +header("7. slicing -- df.avg_low") +print(df.avg_low) # index with single column + +header("7. slicing -- df['avg_low']") +print(df['avg_low']) + +header("7. slicing -- df[2:4]") # index with single column +print(df[2:4]) # rows 2 to 3 + +header("7. slicing -- df[['avg_low','avg_high']]") +print(df[['avg_low','avg_high']]) + +header("7. slicing -- df.loc[:,['avg_low','avg_high']]") +print(df.loc[:,['avg_low','avg_high']]) # multiple columns: df.loc[from_row:to_row,['column1','column2']] + +header("7. slicing scalar value -- df.loc[9,['avg_precipitation']]") +print(df.loc[9,['avg_precipitation']]) + +header("7. df.iloc[3:5,[0,3]]") # index location can receive range or list of indices +print(df.iloc[3:5,[0,3]]) + +# 8. filtering +header("8. df[df.avg_precipitation > 1.0]") # filter on column values +print(df[df.avg_precipitation > 1.0]) + +header("8. df[df['month'].isin['Jun','Jul','Aug']]") +print(df[df['month'].isin(['Jun','Jul','Aug'])]) + +# 9. assignment -- very similar to slicing +header("9. df.loc[9,['avg_precipitation']] = 101.3") +df.loc[9,['avg_precipitation']] = 101.3 +print(df.iloc[9:11]) + +header("9. df.loc[9,['avg_precipitation']] = np.nan") +df.loc[9,['avg_precipitation']] = np.nan +print(df.iloc[9:11]) + +header("9. df.loc[:,'avg_low'] = np.array([5] * len(df))") +df.loc[:,'avg_low'] = np.array([5] * len(df)) +print(df.head()) + +header("9. df['avg_day'] = (df.avg_low + df.avg_high) / 2") +df['avg_day'] = (df.avg_low + df.avg_high) / 2 +print(df.head()) + +# 10. renaming columns +header("10. df.rename(columns = {'avg_precipitation':'avg_rain'}, inplace=True)") +df.rename(columns = {'avg_precipitation':'avg_rain'}, inplace=True) # rename 1 column +print(df.head()) + +header("10. df.columns = ['month','av_hi','av_lo','rec_hi','rec_lo','av_rain','av_day']") +df.columns = ['month','av_hi','av_lo','rec_hi','rec_lo','av_rain','av_day'] +print(df.head()) + +# 11. iterate a df +header("11. iterate rows of df with a for loop") +for index, row in df.iterrows(): + print (index, row["month"], row["avg_high"]) + +# 12. write to csv file +df.to_csv('foo.csv') + + diff --git a/Pandas/pivot.csv b/Pandas/pivot.csv new file mode 100644 index 00000000..0c603dc6 --- /dev/null +++ b/Pandas/pivot.csv @@ -0,0 +1,2 @@ +2016-10-18 00:00:00,2007-09-09 00:00:00,2016-12-12 00:00:00,2017-04-14 00:00:00,2013-03-23 00:00:00,2017-12-07 00:00:00,2008-06-05 00:00:00,2004-12-06 00:00:00,1995-11-05 00:00:00,1996-09-12 00:00:00,2001-05-23 00:00:00,1997-07-08 00:00:00,1995-05-01 00:00:00,2008-11-06 00:00:00,2020-12-07 00:00:00,1998-02-03 00:00:00,1996-12-20 00:00:00,1998-04-25 00:00:00,2019-03-09 00:00:00,2019-08-25 00:00:00,2015-12-01 00:00:00,2004-04-08 00:00:00,2015-04-19 00:00:00,2013-12-23 00:00:00,2008-07-17 00:00:00,2016-02-16 00:00:00,2004-05-08 00:00:00,2000-10-26 00:00:00,1999-04-27 00:00:00,2014-06-23 00:00:00,2014-04-02 00:00:00,1999-06-05 00:00:00,1998-10-20 00:00:00,2013-01-24 00:00:00,2006-07-27 00:00:00,2002-08-20 00:00:00,2013-11-07 00:00:00,2006-07-01 00:00:00,2004-11-23 00:00:00,2008-09-07 00:00:00,1996-08-19 00:00:00,2016-01-27 00:00:00,2002-09-26 00:00:00,1996-09-09 00:00:00,1998-10-09 00:00:00,2000-07-19 00:00:00,2008-11-19 00:00:00,2014-03-11 00:00:00,1996-07-15 00:00:00,2000-02-05 00:00:00,1998-06-24 00:00:00,1998-01-23 00:00:00,1998-05-06 00:00:00,2003-08-05 00:00:00,2013-08-02 00:00:00,1996-03-07 00:00:00,1995-03-25 00:00:00,2012-10-06 00:00:00,2004-07-27 00:00:00,1999-08-05 00:00:00,2009-06-04 00:00:00,2007-07-27 00:00:00,2002-07-03 00:00:00,2011-06-07 00:00:00,2012-08-19 00:00:00,2018-03-22 00:00:00,1996-09-02 00:00:00,2008-09-02 00:00:00,2006-09-14 00:00:00,2007-07-11 00:00:00,2009-07-16 00:00:00,2016-06-24 00:00:00,2008-10-07 00:00:00,1997-06-13 00:00:00,2017-02-17 00:00:00,2009-05-09 00:00:00,1995-12-28 00:00:00,2014-05-25 00:00:00,1996-03-24 00:00:00,1996-11-12 00:00:00,2011-07-12 00:00:00,2009-11-24 00:00:00,2003-02-05 00:00:00,2010-07-06 00:00:00,1996-12-13 00:00:00,2014-10-11 00:00:00,2008-03-26 00:00:00,2019-07-07 00:00:00,2015-12-13 00:00:00,1997-08-21 00:00:00,2016-10-05 00:00:00,2016-10-08 00:00:00,2005-03-27 00:00:00,2011-03-08 00:00:00,2015-03-14 00:00:00,2001-10-11 00:00:00,1996-07-03 00:00:00,2006-10-22 00:00:00,2004-03-22 00:00:00,1998-12-07 00:00:00 +0.2823066951673592,0.004983503360937558,0.6527617484109105,0.19928401502972315,0.16315370812362973,0.970109923902562,0.28902358319246113,0.3869769040495181,0.036043163595530725,0.8466446865018183,0.3029305402508473,0.9333588096128297,0.1519465926874476,0.9927748105073949,0.4651284520015794,0.14707391568333994,0.08982833065821505,0.5883453931565746,0.3494752580177175,0.10167526699057972,0.07941368601234156,0.9358293552727159,0.10740538848773118,0.4506715669567083,0.8387140420947164,0.5746907600075374,0.758976559783973,0.3110498538520595,0.9993881318470451,0.26265671090461906,0.6470895524293089,0.49971727121051435,0.8195258715074762,0.3630891873905159,0.11926782337362651,0.555536083920244,0.8190498637543558,0.40175480684114895,0.7158884358768607,0.3076511179082977,0.06309063694263983,0.5979927629277495,0.7614082398079934,0.34115186547855936,0.5709798851222291,0.9855403495879589,0.7253074001190444,0.4685492447308346,0.03796109649032342,0.16599775387020776,0.7730834960205076,0.04807532952934723,0.9967131145813193,0.7619403019670993,0.4326634641827285,0.43819600412852544,0.8915384234633204,0.7388190145903542,0.1504441063873443,0.11645793742178479,0.8849805306825719,0.8209440808963199,0.03756126787686931,0.2921962688201817,0.637006806437786,0.21496659424673736,0.0673283796900469,0.2679415763216668,0.845924613974428,0.35520559789032924,0.9358371834432343,0.12177533426329734,0.7385219285657647,0.09006868872192064,0.21716948989174056,0.212482450203213,0.26463884587482933,0.4639594087198322,0.473534405458537,0.31034018086435244,0.4868968278642336,0.7276362315420386,0.5856335537301501,0.3848357918705628,0.5045424488044008,0.3669372132755703,0.5223717711718852,0.07330274927032765,0.12129741612938838,0.4463446916302444,0.06560636427124344,0.5628053006445556,0.6800242556861632,0.13053686078804783,0.9666542996125932,0.21805040691134103,0.920335829229451,0.5715463228495896,0.2984664705574499,0.28766845010273867 diff --git a/Primes.py b/Primes.py index 196644d3..3bbf14eb 100644 --- a/Primes.py +++ b/Primes.py @@ -2,7 +2,7 @@ max = int(input("Find primes up to what number? : ")) primeList = [] - +#for loop for checking each number for x in range(2, max + 1): isPrime = True index = 0 @@ -43,4 +43,4 @@ x += 1 -print(primeList) \ No newline at end of file +print(primeList) diff --git a/Python Bisect.ipynb b/Python Bisect.ipynb new file mode 100644 index 00000000..1390432d --- /dev/null +++ b/Python Bisect.ipynb @@ -0,0 +1,267 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python Bisect Module\n", + "Used to find the insertion point for adding an item to a sorted list. \n", + "Advantage: it's fast. Runs in O(log n). \n", + "[Documentation](https://docs.python.org/3/library/bisect.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import bisect" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### bisect_left\n", + "Finds the insertion point for an item in a sorted list, or the spot just left of any matches. \n", + "Works for list of ints, list of floats, list of strings." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "3\n", + "2\n" + ] + } + ], + "source": [ + "a = [24, 33, 41, 41, 45, 50, 53, 59, 62, 66, 70]\n", + "i = bisect.bisect_left(a, 41)\n", + "print(i)\n", + "\n", + "b = [1.3, 2.2, 3.4, 4.6, 5.5, 6.9, 7.2, 8.4]\n", + "j = bisect.bisect_left(b, 4.1)\n", + "print(j)\n", + "\n", + "c = ['aaa', 'bbb', 'ccc', 'ddd']\n", + "k = bisect.bisect_left(c, 'bug')\n", + "print(k)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If list is unsorted, results are unpredictable, but it still tries." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n" + ] + } + ], + "source": [ + "a = [33, 24, 41, 41, 45, 50, 53, 59, 66, 62, 70]\n", + "i = bisect.bisect_left(a, 30)\n", + "print(i)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### insort_left\n", + "This inserts an item into the list in the correct position." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[24, 33, 41, 41, 44, 45, 50, 53, 59, 62, 66, 70]\n" + ] + } + ], + "source": [ + "d = [24, 33, 41, 41, 45, 50, 53, 59, 62, 66, 70]\n", + "bisect.insort_left(d, 44)\n", + "print(d)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### bisect_right\n", + "Just like bisect_left, but for matches it returns the spot just to the right of matches." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4\n", + "2\n", + "3\n" + ] + } + ], + "source": [ + "a = [24, 33, 41, 41, 45, 50, 53, 59, 62, 66, 70]\n", + "i = bisect.bisect_right(a, 41)\n", + "print(i)\n", + "\n", + "b = [1.3, 2.2, 3.4, 4.6, 5.5, 6.9, 7.2, 8.4]\n", + "j = bisect.bisect_right(b, 2.2)\n", + "print(j)\n", + "\n", + "c = ['A', 'big', 'dog', 'runs', 'slowly']\n", + "k = bisect.bisect_right(c, 'dog')\n", + "print(k)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### insort_right\n", + "Just like insort_left, but for matches it inserts to the right of the match." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[24, 33, 41, 41, 45, 46, 50, 53, 59, 62, 66, 70]\n" + ] + } + ], + "source": [ + "d = [24, 33, 41, 41, 45, 50, 53, 59, 62, 66, 70]\n", + "bisect.insort_right(d, 46)\n", + "print(d)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A fast Find function for a Sorted List\n", + "Find leftmost value greater than x in sorted list a" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] + } + ], + "source": [ + "def find_next(a, x):\n", + " i = bisect.bisect_right(a, x)\n", + " if i < len(a):\n", + " return a[i]\n", + " return False\n", + "\n", + "print(find_next([10, 15, 20, 25, 30], 33))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### A simple get_grade function\n", + "get_grade uses a list of cutoffs to split grades into 5 ranges, then uses the bisect index to return the corresponding grade. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['F', 'A', 'C', 'C', 'B', 'A', 'A']\n" + ] + } + ], + "source": [ + "def get_grade(score, cutoffs=[60, 70, 80, 90], grades='FDCBA'):\n", + " i = bisect.bisect_right(cutoffs, score)\n", + " return grades[i]\n", + "\n", + "grades = [get_grade(score) for score in [52, 99, 77, 70, 89, 90, 100]]\n", + "print(grades)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Python Generators.ipynb b/Python Generators.ipynb new file mode 100644 index 00000000..1a68f384 --- /dev/null +++ b/Python Generators.ipynb @@ -0,0 +1,224 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python Generators\n", + "[documentation](https://docs.python.org/3/howto/functional.html#generators) \n", + "[Another really good tutorial](https://realpython.com/introduction-to-python-generators/#using-generators)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "*yield* keyword makes a function into a generator. Python keeps the call stack for the generator function open and saves the state. When you invoke the next() function it will return execution to the same point it left off in the generator function." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Simple generator function \n", + "The while loop continues indefinitely. The function increments x then returns x with each iteration." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "def my_generator(x=1):\n", + " while True:\n", + " yield x\n", + " x += 1" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using the generator with a for loop\n", + "Here, gene is a my_generator function. \n", + "The for loop iterates through gene indefinitely. \n", + "Behind the scenes, the for loop is calling the generator's \\__next__ function. \n", + "Big advantages over Lists: \n", + "- Generator can provide an infinite seqence. \n", + "- Generator doesn't load values into memory. " + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 " + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mgene\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mend\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m' '\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msleep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0.5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "import time\n", + "gene = my_generator()\n", + "print(type(gene))\n", + "\n", + "for i in gene:\n", + " print(i, end=' ')\n", + " time.sleep(0.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using the generator with explicit next( ) calls\n", + "*range* limits this for loop to 10 iterations. \n", + "Each iteration of the for loop it calls the generator using *next(gene)*." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "2 3 4 5 6 7 8 9 10 11 " + ] + } + ], + "source": [ + "gene = my_generator()\n", + "print(gene.__next__())\n", + "for i in range(10):\n", + " print(next(gene), end=' ')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generators from Generator Expressions\n", + "Similar to List Comprehensions, but uses ( ) rather than [ ]. \n", + "Create with a single line of code. \n", + "Only use 120 bytes of memory." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "120\n", + "\n", + "0\n", + "3\n" + ] + } + ], + "source": [ + "gene = (x for x in range(999999))\n", + "\n", + "import sys\n", + "print(sys.getsizeof(gene))\n", + "print(type(gene))\n", + "\n", + "print(next(gene))\n", + "next(gene)\n", + "next(gene)\n", + "print(next(gene))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Generator to Read File\n", + "Saves memory, and avoids memory overflow for very large files, because it only *loads one line into memory at a time*." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Rolling Stones\n", + "\n", + "Lady Gaga\n", + "Jackson Browne\n", + "Maroon 5\n", + "Arijit Singh\n", + "Elton John\n", + "John Mayer\n" + ] + } + ], + "source": [ + "def read_file(fn = 'bands.txt'):\n", + " for line in open(fn):\n", + " yield line\n", + " \n", + "band = read_file()\n", + "print(next(band))\n", + "for i in range(6):\n", + " print(next(band), end='')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Python List Iteration.ipynb b/Python List Iteration.ipynb new file mode 100644 index 00000000..7361caaf --- /dev/null +++ b/Python List Iteration.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python List Iteration\n", + "A variety of ways to iterate Lists, including for loop, while loop, enumerate." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "The standard for loop works well if it is used inside the loop you only need the item and not its index." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "a\n", + "b\n", + "c\n", + "d\n", + "e\n" + ] + } + ], + "source": [ + "letters = ['a', 'b', 'c', 'd', 'e']\n", + "\n", + "for letter in letters:\n", + " print(letter)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "If you need the index inside the loop you can use range(len(list)). \n", + "Then you can always get the list item if needed by using the index." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "letters 0 = a\n", + "letters 1 = b\n", + "letters 2 = c\n", + "letters 3 = d\n", + "letters 4 = e\n" + ] + } + ], + "source": [ + "for index in range(len(letters)):\n", + " print('letters', index, '=', letters[index])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "Best option if you need both index and item inside the loop is to use Python's **enumerate** function. \n", + "Enumerate works in both Python 2.x and 3.x" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "letters 0 = a\n", + "letters 1 = b\n", + "letters 2 = c\n", + "letters 3 = d\n", + "letters 4 = e\n" + ] + } + ], + "source": [ + "for index, item in enumerate(letters):\n", + " print('letters', index, '=', item)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Enumerate actually returns an iterable enumerate object, \n", + "which is a sequence of tuples of (index, item)." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(0, 'a')\n", + "(1, 'b')\n", + "\n" + ] + } + ], + "source": [ + "enum_obj = enumerate(letters)\n", + "print(next(enum_obj))\n", + "print(next(enum_obj))\n", + "print(type(enum_obj))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----\n", + "Probably the clumsiest way to iterate a list in Python -- the **while loop**. \n", + "Requires index initialization before list, and incrementation inside loop." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "letters 0 = a\n", + "letters 1 = b\n", + "letters 2 = c\n", + "letters 3 = d\n", + "letters 4 = e\n" + ] + } + ], + "source": [ + "index = 0\n", + "while index < len(letters): \n", + " print('letters', index, '=', letters[index]) \n", + " index += 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Python Random Numbers Module.ipynb b/Python Random Numbers Module.ipynb new file mode 100644 index 00000000..db837a8c --- /dev/null +++ b/Python Random Numbers Module.ipynb @@ -0,0 +1,450 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python Random Numbers Module\n", + "[Official Documentation](https://docs.python.org/3/library/random.html)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import random" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### randint\n", + "Gives you a random integer between from and to values, inclusive." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2 2 3 1 1 2 0 2 3 2 0 0 2 2 0 3 3 2 1 2 3 0 2 2 2 " + ] + } + ], + "source": [ + "for i in range (25):\n", + " print(random.randint(0, 3), end=' ')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### randrange\n", + "Works similar to the range function -- gives you a random number between from and to-1, with optional step. \n", + "From defaults to 0 if only 1 argument is given. \n", + "Step defaults to 1 if only 2 arguments are given." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "25\n", + "3 6 6 6 6 3 3 3 0 0 6 6 3 0 6 0 3 0 6 6 6 6 3 3 6 " + ] + } + ], + "source": [ + "print(random.randrange(100))\n", + "\n", + "for i in range (25):\n", + " print(random.randrange(0, 9, 3), end=' ')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### choice\n", + "Returns one randomly chosen item from a sequence (list, tuple or string). Works for lists/tuples of integers, floats, strings or other objects. " + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "9\n", + "Roby\n", + "Darby\n", + "Washington\n", + "Hampton\n" + ] + } + ], + "source": [ + "print(random.choice([3, 5, 7, 9, 11]))\n", + "\n", + "names = ['Roby', 'Matthews', 'Washington', 'Darby', 'Hampton']\n", + "for i in range(4):\n", + " print(random.choice(names))" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "-\n", + "a c e b b a e d c c " + ] + } + ], + "source": [ + "print(random.choice('bunch-of-letters'))\n", + "\n", + "material = 'brocade'\n", + "for i in range(10):\n", + " print(random.choice(material), end=' ')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### choices\n", + "Just like choice, but returns a list of n random choices, with replacement, so each pick is from the full sequence." + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[9, 5, 3, 6, 5, 6, 5, 3, 1, 5, 10, 1, 4, 4, 10]\n", + "[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\n" + ] + } + ], + "source": [ + "numbers = [n+1 for n in range(10)]\n", + "my_picks = random.choices(numbers, k=15)\n", + "print(my_picks)\n", + "print(numbers)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Darby', 'Hampton']\n" + ] + } + ], + "source": [ + "names = ['Roby', 'Matthews', 'Washington', 'Darby', 'Hampton']\n", + "print(random.choices(names, k=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also add weights if you want some items to have a better chance of being picked. Here, 1 is 4x more likely than 4 to be picked." + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2, 1, 3, 4, 1, 1, 1, 3, 2, 2, 2, 4, 2, 1, 1, 3, 2, 3, 1, 3]\n" + ] + } + ], + "source": [ + "numbers = [1,2,3,4]\n", + "my_picks = random.choices(numbers, weights=[4,3,2,1], k=20)\n", + "print(my_picks)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Use random.choices to generate random passwords \n", + "First we pick a list of 8 random numbers between a and z on the ascii table, then we convert the numbers to ascii letters, then join them into a string." + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[100, 109, 104, 100, 103, 102, 118, 121]\n", + "dmhdgfvy\n" + ] + } + ], + "source": [ + "picks = random.choices(range(ord('a'),ord('z')), k=8)\n", + "print(picks)\n", + "picks = [chr(i) for i in picks]\n", + "print(''.join(picks))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's a random password generator that uses all upper and lower case letters and numbers." + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Z81Hw3uk\n" + ] + } + ], + "source": [ + "import string\n", + "all_chars = string.ascii_lowercase + string.ascii_uppercase + string.digits\n", + "pw = ''.join(random.choices(all_chars, k=8))\n", + "print(pw)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### sample\n", + "Just like choices, but without replacement. \n", + "Useful for picking lottery winners or bingo numbers. \n", + "Returned list is in the order they were picked." + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['green', 'pink']\n" + ] + } + ], + "source": [ + "colors = ['red', 'blue', 'green', 'aqua', 'pink', 'black']\n", + "picks = random.sample(colors, k=2)\n", + "print(picks)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the range function as an argument will not give you any duplicate picks." + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[18, 38, 20, 50, 1]\n" + ] + } + ], + "source": [ + "picks = random.sample(range(1,51), k=5)\n", + "print(picks)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### shuffle\n", + "Shuffle any sequence into random order. \n", + "This is an in-place shuffle, and it doesn't return anything." + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 2, 3, 4, 5, 6, 7, 8]\n", + "None\n", + "[2, 6, 8, 1, 4, 7, 5, 3]\n" + ] + } + ], + "source": [ + "numbers = [1, 2, 3, 4, 5, 6, 7, 8]\n", + "print(numbers)\n", + "print(random.shuffle(numbers))\n", + "\n", + "random.shuffle(numbers)\n", + "print(numbers)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### random.random()\n", + "Random floating point values between 0.0 and 1.0." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.8424897774160051\n", + "0.9016594664191279\n", + "0.5162849368345925\n", + "0.021852081927422384\n", + "0.5740618908246983\n", + "0.6539291129848911\n" + ] + } + ], + "source": [ + "print(random.random())\n", + "\n", + "for i in range(5):\n", + " print(random.random())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### uniform (from, to)\n", + "Random float between a range of values" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2.5032833557221568\n", + "10.31258224982709\n", + "9.431820659293221\n", + "10.4390639618008\n", + "9.6906814789157\n", + "10.559354593909362\n" + ] + } + ], + "source": [ + "print(random.uniform(2.1, 4.3))\n", + "\n", + "for i in range(5):\n", + " print(random.uniform(9.4, 10.7))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Python Set Comprehensions.ipynb b/Python Set Comprehensions.ipynb new file mode 100644 index 00000000..475521cb --- /dev/null +++ b/Python Set Comprehensions.ipynb @@ -0,0 +1,331 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python Set Comprehensions\n", + "Note that Python sets are not ordered, and duplicates are automatically removed. \n", + "Otherwise, comprehensions work just like with lists. \n", + "General syntax is: new_set = {expression for item in iterable if condition}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Simple Comprehension using Range" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}\n" + ] + } + ], + "source": [ + "ints = {i for i in range(10)}\n", + "print(ints)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Comprehension using Range with a Condition filter\n", + "Only take even values from range" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0, 2, 4, 6, 8}\n" + ] + } + ], + "source": [ + "evens = {i for i in range(10) if i%2 == 0}\n", + "print(evens)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Apply math function to values in range\n", + "Here, square each value" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0, 1, 64, 4, 36, 9, 16, 49, 81, 25}\n" + ] + } + ], + "source": [ + "squares = {i*i for i in range(10)}\n", + "print(squares)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that Python eliminates duplicates from sets" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{0, 1, 4, 9, 16, 25}\n" + ] + } + ], + "source": [ + "sqrs = {i*i for i in range(-5, 5)}\n", + "print(sqrs)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set Comprehension on a List" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{4, 9, 169, 49, 121, 25}\n" + ] + } + ], + "source": [ + "primes = [2, 2, 2, 3, 3, 5, 5, 5, 7, 11, 11, 13, 13, 13, 13]\n", + "primes_squared = {p*p for p in primes}\n", + "print(primes_squared)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### More Complex Expressions: quadratic transformation\n", + "Any expression is allowed. More complex expressions can be put in parentheses. \n", + "Here, quadratic equation: \n", + "2x^2 + 5x + 10" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{43, 143, 307, 85, 28, 413}\n" + ] + } + ], + "source": [ + "transformed = {(2*x*x + 5*x + 10) for x in primes}\n", + "print(transformed)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Flatten List and eliminate duplicates\n", + "Syntax: {leaf for branch in tree for leaf in branch}" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{1, 2, 3, 98, 76}\n" + ] + } + ], + "source": [ + "nums = [[1,3],[2,3],[3,98],[76,1]]\n", + "flat_set = {a for b in nums for a in b}\n", + "print(flat_set)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Eliminate Dups from a List\n", + "We can easily eliminate differences in capitalization, while removing duplicates." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'Albert', 'Ella', 'George', 'Salil'}\n" + ] + } + ], + "source": [ + "names = ['salil', 'ALBERT', 'Ella', 'george', 'Salil', 'George', 'ELLA', 'Albert']\n", + "names_set = {n.capitalize() for n in names}\n", + "print(names_set)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And it's easy to convert this back to a list." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Albert', 'Ella', 'George', 'Salil']\n" + ] + } + ], + "source": [ + "names_set = list({n.capitalize() for n in names})\n", + "print(names_set)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get Car Make from list of Make & Model\n", + "We're getting the first word from each string." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'Toyota', 'Tesla', 'Chevy'}\n" + ] + } + ], + "source": [ + "cars = ['Toyota Prius', 'Chevy Bolt', 'Tesla Model 3', 'Tesla Model Y']\n", + "makes = {(c.split()[0]) for c in cars}\n", + "print(makes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get Initials from Names\n", + "Take first and last initials" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'CB', 'NF', 'HP'}\n" + ] + } + ], + "source": [ + "names = ['Clint Barton', 'Tony', 'Nick Fury', 'Hank Pym']\n", + "inits = {(n.split()[0][0] + n.split()[1][0]) for n in names if len(n.split())==2}\n", + "print(inits)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Python in 90 minutes/Python in 90 minutes.pptx b/Python in 90 minutes/Python in 90 minutes.pptx new file mode 100644 index 00000000..7640e394 Binary files /dev/null and b/Python in 90 minutes/Python in 90 minutes.pptx differ diff --git a/Python in 90 minutes/Python in 90 minutes.py b/Python in 90 minutes/Python in 90 minutes.py new file mode 100644 index 00000000..17d08eb5 --- /dev/null +++ b/Python in 90 minutes/Python in 90 minutes.py @@ -0,0 +1,596 @@ +# INTRODUCTION TO PYTHON +# ------------------------------------------------ +# to print, put text inside single or double quotes, inside parentheses. +print('Hello World') + +# VARIABLES +# ------------------------------------------------ +# variables are used for temporary storage of data that may change +# a single equals sign is the assignment operator +age = 26 +first_name = 'Shivika' +gpa = 3.99 + +# here we can see three different types of data stored in variables: an integer, a string, and a float. +# You do not have to declare the data type stored in each variable. Python does that for you. +# You can see what type of data is in a variable using the type() function +print(type(age)) +print(type(first_name)) +print(type(gpa)) + +# variables are "dynamically typed" -- Python checks the type at runtime +age = 26.2 +print(type(age)) +print(age, type(age)) + +# variable naming tips: +# naming can have letters, numbers and underscore, but cannot start with a digit +# some Python reserved words cannot be used +# use descriptive variable names +# Case Matters +# constants in all caps: PI = 3.14159 +# every Python variable is a pointer to the data stored somewhere in memory +# get memory location of a variable using id() function +print(id(age)) + +# to swap variable values: +x = 5 +y = 10 +x, y = y, x +print('x =', x, 'y =', y) + +# BOOLEAN VALUES - True or False +# ------------------------------------------------ +# These all evaluate to False: 0, 0.0, [], "", None +# These are all True: any non-zero number, any non-empty string, list or set +print(bool(1)) # True +print(bool('dog')) +print(bool(10.78)) +print(bool(0 or 1)) + +print(bool(0)) # False +print(bool('')) +print(bool(0 and 1)) + +# MATH FUNCTIONS +# ------------------------------------------------ +# built-in arithmetic functions are: add, subtract, multiply, divide, power, integer division, and modulus (AKA: mod or remainder) +# + Addition +# - Subtraction +# * Multiplication +# / Division +# // Integer Division +# % Modulus (division remainder) +# ** Power +x = 5 + 7 +print(x, type(x)) +x = 5 - 7 +print(x, type(x)) +x = 7 / 4 +print(x, type(x)) +x = 7 // 4 +print(x, type(x)) +x = 7 % 4 +print(x) +x = 4 ** 3 +print(x) + +# x += 5 is the same as saying x = x + 5 +x = 2 +x = x + 5 +print(x) + +x = 2 +x += 5 +print(x) + +# Order of Operations +# 1. ( ) +# 2. ** +# 3. * / // % +# 4. + - +# Example: 1 + 5 ** (3 // 2) - 6 % 4 => 4 +x = 1 + 5 ** (3 // 2) - 6 % 4 +print(x) + +# CONSOLE INPUT +# ------------------------------------------------ +# Get user input from the keyboard at the command prompt +name = input('What is your name? ') +print("Hello,", name) +age = eval(input('How old are you? ')) +print('Age =', age, type(age)) + +# With what we know so far we can write a program to get user input and compute the area of a triangle. +base = eval(input('Enter the base: ')) +height = eval(input('Enter the height: ')) +area = base * height / 2 +print ('Area = ', area) + +# COMMENTS +# ------------------------------------------------ +# Hash tag for single line comment +''' +Three sets of quotes +for multi-line comments +''' +""" double quotes work too """ + +# IF-ELIF-ELSE STATEMENTS +# ------------------------------------------------ +# requires a boolean expression +x = 69 +print(x > 50) +print(x == 50) +print(x != 50) + +my_age = 19 +print(my_age > 21) +if my_age >= 21: + print("Old enough.") +else: + print("Not old enough.") + print("Maybe next year.") + +score = 72 +if score > 90: + print('Grade: A') +elif score > 80: + print('Grade: B') +elif score > 70: + print('Grade: C') +elif score > 60: + print('Grade: D') +else: + print('Grade: F') + +# can have multiple conditions in an if, using and/or +my_age = 19 +grade = 'C' +if my_age > 18 and grade == 'A': + print('I can go to the party!') + +# nested if statements -- both conditions must be True +my_age = 19 +grade = 'C' +if my_age > 18: + if grade == 'A': + print('I can go to the party!') + +# if ternary +x = 10 +y = 20 +# action/ if condition true/ else condfition false +z = x + y if x > y else y - x +print(z) +# result 10 + + +# STRINGS +# ------------------------------------------------ +# a string is a sequence of characters (ie. text) +s = 'Howdy' +print(s) +print(len(s)) +print(s[3]) +print(s[1:3]) +t = ' dude! ' +s += t +print(s + '|') +print(s.strip() + '|') +s = s.rstrip('! ') +print(s) + +s = 'Howdy dude!' +print(s.lower()) +print(s.upper()[:5]) +print(s.title()) +print(s.replace('Howdy', 'Greetings')) +print(s) +print(s.count('d')) +print(s.find('w')) +print('dud' in s) +print('X' not in s) +print(s.startswith('How')) +print(s.endswith('cat')) +print(s > 'Honk') +print(s.isalpha()) +print(s[0:4].isalpha()) +print(s.isnumeric()) + +print(s.split()) +print('5,7,9'.split(',')) +print('73.294'.split('.')) + +print(s[0], '\t', s[1], '\t', s[2]) +print(s[:s.find(' ')] + '\n' + s[s.find(' ')+1:]) + +# LOOPS -- FOR, WHILE +# ------------------------------------------------ +# used to iterate through the items of a string or list +# indention is important. +# every statement indented from for will be executed each iteration +s = 'Raj' +for letter in s: + print(letter) +for letter in s: + print(letter, end='') +print() + +# if inside a for loop. indention is important. +for pig in s: + if pig != 'a': + print(pig, end='') +print() +for i in range(len(s)): + print(i, end='') +print() +for i in range(len(s)): + print(s[i]) +for i in range(len(s)-1, -1, -1): + print(s[i]) +print(s[::-1]) + +# while loops are an alternative to for loops +# they check a boolean each iteration, and exit the loop when the bool is False +x = 2 +while x < 5: + print('ha') + x += 1 + +# DATA STRUCTURES +# ------------------------------------------------ +# These functions all work on String, List, and Tuple + +# Indexing -- access any item in the sequence using its index +x = 'frog' +print (x[3]) # prints 'g' + +x = ['pig', 'cow', 'horse'] +print (x[1]) # prints 'cow' + +# Slicing -- slice out substrings, sublists, subtuples using indexes +# [start : end+1 : step] +x = 'computer' +print(x[1:4]) # items 1 to 3, 'omp' +print(x[1:6:2]) # items 1, 3, 5, 'opt' +print(x[3:]) # items 3 to end, 'puter' +print(x[:5]) # items 0 to 4, 'compu' +print(x[-1]) # last item, 'r' +print(x[-3:]) # last 3 items, 'ter' +print(x[:-2]) # all except last 2 items, 'comput' + +# Adding / Concatenating -- combine 2 sequences of the same type using + +x = 'horse' + 'shoe' +print (x) # prints 'horseshoe' + +x = ['pig', 'cow'] + ['horse'] +print (x) # prints ['pig', 'cow', 'horse'] + +# Multiplying -- multiply a sequence using * +x = 'bug' * 3 +print (x) # prints 'bugbugbug' + +x = [8, 5] * 3 +print (x) # prints [8, 5, 8, 5, 8, 5] + +# Checking Membership -- test whether an item is in or not in a sequence +x = 'bug' +print ('u' in x) # prints True + +x = ['pig', 'cow', 'horse'] +print ('cow' not in x) # prints False + +# Iterating -- iterate through the items in a sequence +x = [7, 8, 3] +for item in x: + print (item * 2) # prints 14, 16, 6 + +x = [7, 8, 3] +for index, item in enumerate(x): + print (index, item) # prints 0 7, 1 8, 2 3 + +# Length -- count the number of items in a sequence +x = 'bug' +print (len(x)) # prints 3 + +x = ['pig', 'cow', 'horse'] +print (len(x)) # prints 3 + +# Minimum -- find the minimum item in a sequence lexicographically +# alpha or numeric types, but cannot mix types +x = 'bug' +print (min(x)) # prints 'b' + +x = ['pig', 'cow', 'horse'] +print (min(x)) # prints 'cow' + +# Maximum -- find the maximum item in a sequence +# alpha or numeric types, but cannot mix types +x = 'bug' +print (max(x)) # prints 'u' + +x = ['pig', 'cow', 'horse'] +print (max(x)) # prints 'pig' + +# Sum -- find the sum of items in a sequence +# entire sequence must be numeric type +x = [5, 7, 'bug'] +print (sum(x)) # error! + +x = [2, 5, 8, 12] +print (sum(x)) # prints 27 +print (sum(x[-2:])) # prints 20 + +# Sorting -- returns a new list of items in sorted order +# sorted does not change the original list +x = 'bug' +print (sorted(x)) # prints ['b', 'g', 'u'] + +x = ['pig', 'cow', 'horse'] +print (sorted(x)) # prints ['cow', 'horse', 'pig'] + +# count (item) +# Returns count of an item +x = 'hippo' +print (x.count('p')) # prints 2 + +x = ['pig', 'cow', 'horse', 'cow'] +print (x.count('cow')) # prints 2 + +# index (item) +# Returns the index of the first occurrence of an item +x = 'hippo' +print (x.index('p')) # prints 2 + +x = ['pig', 'cow', 'horse', 'cow'] +print (x.index('cow')) # prints 1 + +# Unpacking - unpack the n items of a sequence into n variables +x = ['pig', 'cow', 'horse'] +a, b, c = x # now a is 'pig', b is 'cow', c is 'horse' + +# LISTS +# ------------------------------------------------ +# constructors – creating a new list +x = list((1, 2, 3)) # note double parens +x = ['a', 25, 'dog', 8.43] +x = list(tuple1) + +# list creation using comprehensions +x = [m for m in range(8)] +# resulting list: [0, 1, 2, 3, 4, 5, 6, 7] +x = [z**2 for z in range(10) if z>4] +# resulting list: [25, 36, 49, 64, 81] + +# Delete -- delete a list or an item from a list +x = [5, 3, 8, 6] +del(x[1]) # [5, 8, 6] +del(x) # deletes list x + +# Append -- append an item to a list +x = [5, 3, 8, 6] +x.append(7) # [5, 3, 8, 6, 7] + +# Extend -- append an sequence to a list +x = [5, 3, 8, 6] +y = [12, 13] +x.extend(y) # [5, 3, 8, 6, 7, 12, 13] + +# Insert -- insert an item at given index. x.insert(index, item) +x = [5, 3, 8, 6] +x.insert(1, 7) # [5, 7, 3, 8, 6] +x.insert(1,['a','m']) # [5, ['a', 'm'], 7, 3, 8, 6] + +# Pop -- pops last item off the list, and returns item +x = [5, 3, 8, 6] +x.pop() # [5, 3, 8]. and returns the 6 +print(x.pop()) # [5, 3]. and prints 8 + +# Remove -- remove first instance of an item +x = [5, 3, 8, 6, 3] +x.remove(3) # [5, 8, 6, 3] + +# Reverse -- reverse the order of the list +x = [5, 3, 8, 6] +x.reverse() # [6, 8, 3, 5] + +# Sort -- sort the list in place +# sorted(x) returns a new sorted list without changing the original list x. +# x.sort() puts the items of x in sorted order (sorts in place). +x = [5, 3, 8, 6] +x.sort() # [3, 5, 6, 8] + +# Clear -- delete all items from the list +x = [5, 3, 8, 6] +x.clear() # [] + +# TUPLES +# ------------------------------------------------ +# constructors – creating a new tuple +x = () # no-item tuple +x = (1,2,3) +x = 1, 2, 3 # parenthesis are optional +x = 2, # single-item tuple +list1 = [5, 7, 7] +x = tuple(list1) # tuple from list + +# Tuples are Immutable, but member objects may be mutable +x = (1, 2, 3) +del(x[1]) # error! +x[1] = 8 # error! + +x = ([1,2], 3) # 2-item tuple: list and int +del(x[0][1]) # ([1], 3) + +# SETS +# ------------------------------------------------ +# constructors – creating a new set +x = {3,5,3,5} # {5, 3} +x = set() # empty set +list1 = [5, 7, 7] +x = set(list1) # new set from list. strips duplicates, {5, 7} + +# Set Comprehension +x = {3*x for x in range(10) if x>5} +# resulting set: {18, 21, 24, 27} but in random order + +# DICTIONARIES +# ------------------------------------------------ +# constructors – creating a new dict +x = {'pork':25.3, 'beef':33.8, 'chicken':22.7} +x = dict([('pork', 25.3),('beef', 33.8),('chicken', 22.7)]) +x = dict(pork=25.3, beef=33.8, chicken=22.7) + +# Accessing keys and values in a dict +x.keys() # returns list of keys in x +x.values() # returns list of values in x +x.items() # returns list of key-value tuple pairs in x + +item in x.values() # tests membership in x, returns boolean + +# Iterating a Dict +for key in x: # iterate keys + print(key, x[key]) # print all key/value pairs + +for k, v in x.items(): # iterate key/value pairs + print(k, v) # print all key/value pairs + +# FUNCTIONS +# ------------------------------------------------ +# use the def keyword to create a function +# give the function a name, followed by parentheses and a colon +# you can pass in 0 or more variables. here we pass in num. +# you can return 0 or more variables. here we return the cube of num +# indention is important. +def cuber(num): + num_cubed = num * num * num + return num_cubed + +# to call the function, and pass in 5: +cuber(5) + +# but if you want to assign the return value (125) to a variable, +x = 5 +x_cubed = cuber(x) +print(x, x_cubed) + +# you can set default values for parameters +def cuber(num = 2): + num_cubed = num * num * num + return num_cubed + +print(cuber()) # uses the default 2 +print(cuber(3)) # 3 overrides the default + +# you can pass in multiple values, and return multiple values +# but order is important +def solve_triangle(base, height, side1, side2, side3): + area = base * height / 2 + perimeter = side1 + side2 + side3 + return area, perimeter + +area, perim = solve_triangle(3, 4, 5, 3, 4) # b=3, h=4, s1=5, s2=3, s3=4 +print('Area:', area, ' Perimeter:', perim) + +# above are all called "positional arguments", and order matters +# you can also pass in "keyword arguments" when calling a function +a, p = solve_triangle(side1=5, side2=3, side3=4, height=4, base=3) +print(a, p) + +# or use a combination of both, but positional arguments must come first +a, p = solve_triangle(3, 4, side3=4, side2=3, side1=5) +print(a, p) + +# CLASSES & OBJECTS +# ------------------------------------------------ +# Use classes to model real-world things. +# Keep related data (variables) and actions (functions) in one block of code. +class Circle: + # Circle constructor -- __init__ method creates a new Circle object + def __init__(self, r = 1): + self.radius = r + + def getPerimeter(self): + return 2 * self.radius * 3.14 + + def getArea(self): + return self.radius ** 2 * 3.14 +# all methods have the self parameter, which is Python's reference to the object that invoked the method + +# this calls the __init__ method, which creates the new Circle +circle1 = Circle(3) +# you can access the circle's attributes and methods using the dot operator +print("Radius =", circle1.radius) +print("Perimeter =", circle1.getPerimeter()) + +# IMPORTS +# ------------------------------------------------ +# Python has many many classes already written that you can use +# To access methods and data from another class you must import it +import math +print(math.pi) + +import random +print(random.randint(1,5)) + +# shorter version, using as to abbreviate module name +import math as m +print(m.pi) + +import random as rd +print(rd.randint(1,5)) + +# import just one or two functions or constants rather than a whole module +# easier for coding, but need to beware names don't conflict +from math import pi +print(pi) + +from random import randint, shuffle +print(randint(1,5)) +x = ['a', 'b', 'c'] +shuffle(x) +print(x) + +# can rename an imported function if you want +x = ['a', 'b', 'c'] +from random import shuffle as sf +sf(x) +print(x) + +# can also import whole module using * +from random import * +print(randint(1,5)) + +# FILE READ & WRITE +# ------------------------------------------------ +filename = 'city_data.txt' + +# this opens a file handle called fin, iterates the lines of the file, and prints each line +with open(filename) as fin: + for line in fin: + print(line) + +# we can grab words from a line by using split, which turns each line into a list called row +with open(filename) as fin: + fin.readline() + for line in fin: + row = line.split(',') + print('Country:', row[1], ' City:', row[2]) + +# use the w parameter to write to an output file +with open('Cities.txt', 'w') as fout: + with open(filename) as fin: + fin.readline() + for line in fin: + row = line.split(',') + fout.write(row[2] + '\n') + + + + + + + + + diff --git a/Python in 90 minutes/city_data.txt b/Python in 90 minutes/city_data.txt new file mode 100644 index 00000000..5ed0a476 --- /dev/null +++ b/Python in 90 minutes/city_data.txt @@ -0,0 +1,5 @@ +Country Code,Country Name,City Name,City Population +08,US,San Jose,70 +09,Canada,Vancouver,30 +08,US,San Francisco,90 +07,China,Beijing,40 \ No newline at end of file diff --git a/Queues implementaion.py b/Queues implementaion.py new file mode 100644 index 00000000..63170c8c --- /dev/null +++ b/Queues implementaion.py @@ -0,0 +1,64 @@ + +# implemented by Linked list +class Node(object): + def __init__(self, item = None): + self.item = item + self.next = None + self.previous = None + + +class Queue(object): + def __init__(self): + self.length = 0 + self.head = None + self.tail = None + + def enqueue(self, x): + newNode = Node(x) + if self.head == None: + self.head = self.tail = newNode + else: + self.tail.next = newNode + newNode.previous = self.tail + self.tail = newNode + self.length += 1 + + + def dequeue (self): + item = self.head.item + self.head = self.head.next + self.length -= 1 + if self.length == 0: + self.last = None + return item + + +################################################# + +# implemented by array +class Queue: + def __init__(self): + self.items = [] + + def is_empty(self): + return self.items == [] + + def enqueue(self, data): + self.items.append(data) + + def dequeue(self): + return self.items.pop(0) + + def display(self): + ar = [] + for i in self.items: + ar.append(i) + return ar +que = Queue() +que.enqueue('google') +que.enqueue('youtube') +que.enqueue('udemy') +que.enqueue('udacity') +que.dequeue() +que.dequeue() +print(que.display()) diff --git a/README.md b/README.md index c5403ef5..939de1ed 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,19 @@ -# Python +#This is a open source project. +# Python 3 These files are mainly intended to accompany my series of YouTube tutorial videos here, https://www.youtube.com/user/joejamesusa -and is mainly intended for educational purposes. -You are invited to subscribe to my video channel, and to download and use any code in -this Python repository without restrictions, according to the MIT License. +and are mainly intended for educational purposes. +You are invited to subscribe to my video channel-Joe James, and to download and use any code in +this Python repository, according to the MIT License. Feel free to post any comments on my YouTube channel. +I am very happy to see you there on my you tube channel. excited!!!!!!!!! +## Subscribe to my channel for more tutorial videos. -Joe James -Fremont, California -Copyright (C) 2015-2017, Joe James +This source code is easy to understand and reliable for self study and you will learn them easily, try to practice more coding by making algorithms yourself and you can become a better Python programmer, and remember "Try to learn something about everything and everything about something". + +Thank you for reviewing my repositories and keep practicing. +Joe James. +Fremont, CA. +Copyright (C) 2015-2021, Joe James + +## Happy coding guys!😀 diff --git a/Sorting Algorithms/Heapsort.py b/Sorting Algorithms/Heapsort.py new file mode 100644 index 00000000..9a2e2c14 --- /dev/null +++ b/Sorting Algorithms/Heapsort.py @@ -0,0 +1,33 @@ +# heapify +def heapify(arr, n, i): + largest = i # largest value + l = 2 * i + 1 # left + r = 2 * i + 2 # right + # if left child exists + if l < n and arr[i] < arr[l]: + largest = l + # if right child exits + if r < n and arr[largest] < arr[r]: + largest = r + # root + if largest != i: + arr[i],arr[largest] = arr[largest],arr[i] # swap + # root. + heapify(arr, n, largest) +# sort +def heapSort(arr): + n = len(arr) + # maxheap + for i in range(n, -1, -1): + heapify(arr, n, i) + # element extraction + for i in range(n-1, 0, -1): + arr[i], arr[0] = arr[0], arr[i] # swap + heapify(arr, i, 0) +# main +arr = [2,5,3,8,6,5,4,7] +heapSort(arr) +n = len(arr) +print ("Sorted array is") +for i in range(n): + print (arr[i],end=" ") diff --git a/Sorting Algorithms/Python QuickSort.ipynb b/Sorting Algorithms/Python QuickSort.ipynb new file mode 100644 index 00000000..5f925c81 --- /dev/null +++ b/Sorting Algorithms/Python QuickSort.ipynb @@ -0,0 +1,140 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python QuickSort Algorithm" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5, 9, 1, 2, 4, 8, 6, 3, 7]\n", + "[1, 2, 3, 4, 5, 6, 7, 8, 9]\n" + ] + } + ], + "source": [ + "#---------------------------------------\n", + "# Quick Sort\n", + "#---------------------------------------\n", + "def quick_sort(A):\n", + " quick_sort2(A, 0, len(A)-1)\n", + " \n", + "def quick_sort2(A, low, hi):\n", + " if hi-low < 1 and low < hi:\n", + " quick_selection(A, low, hi)\n", + " elif low < hi:\n", + " p = partition(A, low, hi)\n", + " quick_sort2(A, low, p - 1)\n", + " quick_sort2(A, p + 1, hi)\n", + " \n", + "def get_pivot(A, low, hi):\n", + " mid = (hi + low) // 2\n", + " s = sorted([A[low], A[mid], A[hi]])\n", + " if s[1] == A[low]:\n", + " return low\n", + " elif s[1] == A[mid]:\n", + " return mid\n", + " return hi\n", + " \n", + "def partition(A, low, hi):\n", + " pivotIndex = get_pivot(A, low, hi)\n", + " pivotValue = A[pivotIndex]\n", + " A[pivotIndex], A[low] = A[low], A[pivotIndex]\n", + " border = low\n", + "\n", + " for i in range(low, hi+1):\n", + " if A[i] < pivotValue:\n", + " border += 1\n", + " A[i], A[border] = A[border], A[i]\n", + " A[low], A[border] = A[border], A[low]\n", + "\n", + " return (border)\n", + " \n", + "def quick_selection(x, first, last):\n", + " for i in range (first, last):\n", + " minIndex = i\n", + " for j in range (i+1, last+1):\n", + " if x[j] < x[minIndex]:\n", + " minIndex = j\n", + " if minIndex != i:\n", + " x[i], x[minIndex] = x[minIndex], x[i]\n", + " \n", + "A = [5,9,1,2,4,8,6,3,7]\n", + "print(A)\n", + "quick_sort(A)\n", + "print(A)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Nice simple version written by Mr. UncleChu in comments\n", + "Slick code, but does not sort in place, so uses a lot more memory. Do not use for large lists or you'll get stackoverflow." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5, 9, 1, 2, 4, 8, 6, 3, 7]\n", + "[1, 2, 3, 4, 5, 6, 7, 8, 9]\n" + ] + } + ], + "source": [ + "def quick_sort_chu(a_list):\n", + " if len(a_list) < 2: return a_list\n", + " lesser = quick_sort([x for x in a_list[1:] if x <= a_list[0]])\n", + " bigger = quick_sort([x for x in a_list[1:] if x > a_list[0]])\n", + " return sum([lesser, [a_list[0]], bigger], [])\n", + "A = [5,9,1,2,4,8,6,3,7]\n", + "print(A)\n", + "B = quick_sort_chu(A)\n", + "print(B)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Sorting Algorithms/Radix_Sort.ipynb b/Sorting Algorithms/Radix_Sort.ipynb new file mode 100644 index 00000000..0b701528 --- /dev/null +++ b/Sorting Algorithms/Radix_Sort.ipynb @@ -0,0 +1,121 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Radix Sort\n", + "(c) 2020, Joe James" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# get number of digits in largest item\n", + "def __get_num_digits(A):\n", + " m = 0\n", + " for item in A:\n", + " m = max(m, item)\n", + " return len(str(m))" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# flatten into a 1D List\n", + "from functools import reduce\n", + "def __flatten(A):\n", + " return reduce(lambda x, y: x + y, A)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Changed from YouTube video:\n", + "It's much cleaner to put the _get_num_digits call inside the radix function rather than in main as shown in the video. That way you only need to pass a List to the radix function. Thanks to Brother Lui for this suggestion." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def radix(A):\n", + " num_digits = __get_num_digits(A)\n", + " for digit in range(0, num_digits):\n", + " B = [[] for i in range(10)]\n", + " for item in A:\n", + " # num is the bucket number that the item will be put into\n", + " num = item // 10 ** (digit) % 10\n", + " B[num].append(item)\n", + " A = __flatten(B)\n", + " return A" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 2, 3, 45, 53, 55, 213, 288, 289]\n", + "[0, 1, 2, 3, 4, 5] [999994, 999995, 999996, 999997, 999998, 999999]\n" + ] + } + ], + "source": [ + "def main():\n", + " A = [55, 45, 3, 289, 213, 1, 288, 53, 2]\n", + " A = radix(A)\n", + " print(A)\n", + " \n", + " B = [i for i in range(1000000)]\n", + " from random import shuffle\n", + " shuffle(B)\n", + " B = radix(B)\n", + " print(B[:6], B[-6:])\n", + "\n", + "main()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/SortingAlgorithms.py b/Sorting Algorithms/SortingAlgorithms similarity index 96% rename from SortingAlgorithms.py rename to Sorting Algorithms/SortingAlgorithms index 8d6f81c6..0379471d 100644 --- a/SortingAlgorithms.py +++ b/Sorting Algorithms/SortingAlgorithms @@ -1,394 +1,394 @@ -import random -import time -import copy -size1 = 100 -size2 = 10000 -size3 = 1000000 -span = 1000000 -threshold = 20 - -#--------------------------------------- -# Insertion Sort -#--------------------------------------- -# not optimized, equiv to while version below, but uses for loop -def insertion_sort1(A): - for i in range(1, len(A)): - for j in range(i-1, -1, -1): - if A[j] > A[j+1]: - A[j], A[j+1] = A[j+1], A[j] - else: - break - -# not optimized, equiv to break version, but uses while loop -def insertion_sort2(A): - for i in range(1, len(A)): - j = i-1 - while A[j] > A[j+1] and j >= 0: - A[j], A[j+1] = A[j+1], A[j] - j -= 1 - -# optimized - shifts instead of swapping -def insertion_sort3(A): - for i in range(1, len(A)): - curNum = A[i] - k = 0 - for j in range(i-1, -2, -1): - k = j - if A[j] > curNum: - A[j+1] = A[j] - else: - break - A[k+1] = curNum - -#--------------------------------------- -# Selection Sort -#--------------------------------------- -def selection_sort(A): - for i in range (0, len(A) - 1): - minIndex = i - for j in range (i+1, len(A)): - if A[j] < A[minIndex]: - minIndex = j - if minIndex != i: - A[i], A[minIndex] = A[minIndex], A[i] - -#--------------------------------------- -# Bubble Sort -#--------------------------------------- -# not optimized -def bubble_sort1(A): - for i in range (0, len(A) - 1): - for j in range (0, len(A) - i - 1): - if A[j] > A[j+1]: - A[j], A[j+1] = A[j+1], A[j] - -# optimized to exit if no swaps occur -def bubble_sort2(A): - for i in range (0, len(A) - 1): - done = True - for j in range (0, len(A) - i - 1): - if A[j] > A[j+1]: - A[j], A[j+1] = A[j+1], A[j] - done = False - if done: - return - -#--------------------------------------- -# Merge Sort -#--------------------------------------- -def merge_sort(A): - merge_sort2(A, 0, len(A)-1) - -def merge_sort2(A, first, last): - if last-first < threshold and first < last: - quick_selection(A, first, last) - elif first < last: - middle = (first + last)//2 - merge_sort2(A, first, middle) - merge_sort2(A, middle+1, last) - merge(A, first, middle, last) - -def merge(A, first, middle, last): - L = A[first:middle] - R = A[middle:last+1] - L.append(999999999) - R.append(999999999) - i = j = 0 - - for k in range (first, last+1): - if L[i] <= R[j]: - A[k] = L[i] - i += 1 - else: - A[k] = R[j] - j += 1 -#--------------------------------------- -# Quick Sort -#--------------------------------------- -def quick_sort(A): - quick_sort2(A, 0, len(A)-1) - -def quick_sort2(A, low, hi): - if hi-low < threshold and low < hi: - quick_selection(A, low, hi) - elif low < hi: - p = partition(A, low, hi) - quick_sort2(A, low, p - 1) - quick_sort2(A, p + 1, hi) - -def get_pivot(A, low, hi): - mid = (hi + low) // 2 - s = sorted([A[low], A[mid], A[hi]]) - if s[1] == A[low]: - return low - elif s[1] == A[mid]: - return mid - return hi - -def partition(A, low, hi): - pivotIndex = get_pivot(A, low, hi) - pivotValue = A[pivotIndex] - A[pivotIndex], A[low] = A[low], A[pivotIndex] - border = low - - for i in range(low, hi+1): - if A[i] < pivotValue: - border += 1 - A[i], A[border] = A[border], A[i] - A[low], A[border] = A[border], A[low] - - return (border) - -def quick_selection(x, first, last): - for i in range (first, last): - minIndex = i - for j in range (i+1, last+1): - if x[j] < x[minIndex]: - minIndex = j - if minIndex != i: - x[i], x[minIndex] = x[minIndex], x[i] - -#--------------RANDOM ORDER---------------------- -#------------------------------------------------ -# size = 100 -#------------------------------------------------ -print("\nRandom Order\n---------------------------------") -w = [random.randint(0, span) for a in range(0, size1)] -t1 = time.clock() -insertion_sort3(w) -print("Insertion Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size1)] -t1 = time.clock() -selection_sort(w) -print("Selection Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size1)] -t1 = time.clock() -bubble_sort2(w) -print("Bubble Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size1)] -t1 = time.clock() -merge_sort(w) -print("Merge Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size1)] -t1 = time.clock() -quick_sort(w) -print("Quick Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size1)] -t1 = time.clock() -w.sort() -print("Tim Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) -#------------------------------------------------ -# size = 10,000 -#------------------------------------------------ -w = [random.randint(0, span) for a in range(0, size2)] -t1 = time.clock() -insertion_sort3(w) -print("Insertion Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size2)] -t1 = time.clock() -selection_sort(w) -print("Selection Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size2)] -t1 = time.clock() -bubble_sort2(w) -print("Bubble Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size2)] -t1 = time.clock() -merge_sort(w) -print("Merge Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size2)] -t1 = time.clock() -quick_sort(w) -print("Quick Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size2)] -t1 = time.clock() -w.sort() -print("Tim Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) -#------------------------------------------------ -# size = 1,000,000 -#------------------------------------------------ -w = [random.randint(0, span) for a in range(0, size3)] -t1 = time.clock() -merge_sort(w) -print("Merge Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size3)] -t1 = time.clock() -quick_sort(w) -print("Quick Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, span) for a in range(0, size3)] -t1 = time.clock() -w.sort() -print("Tim Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) - -# ----------------ALREADY SORTED----------------- -#------------------------------------------------ -# size = 10,000 -#------------------------------------------------ -print("\nAlready Sorted\n---------------------------------") - -w = [a for a in range(0, size2)] -t1 = time.clock() -insertion_sort3(w) -print("Insertion Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -t1 = time.clock() -selection_sort(w) -print("Selection Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -t1 = time.clock() -bubble_sort2(w) -print("Bubble Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -t1 = time.clock() -merge_sort(w) -print("Merge Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -t1 = time.clock() -quick_sort(w) -print("Quick Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -t1 = time.clock() -w.sort() -print("Tim Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) -#------------------------------------------------ -# size = 1,000,000 -#------------------------------------------------ -w = [a for a in range(0, size3)] -t1 = time.clock() -merge_sort(w) -print("Merge Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) - -t1 = time.clock() -quick_sort(w) -print("Quick Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) - -t1 = time.clock() -w.sort() -print("Tim Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) - -# ----------------REVERSE SORTED----------------- -#------------------------------------------------ -# size = 10,000 -#------------------------------------------------ -print("\nReverse Sorted\n---------------------------------") - -w = [a for a in range(0, size2)] -w.reverse() -t1 = time.clock() -insertion_sort3(w) -print("Insertion Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [a for a in range(0, size2)] -w.reverse() -t1 = time.clock() -selection_sort(w) -print("Selection Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [a for a in range(0, size2)] -w.reverse() -t1 = time.clock() -bubble_sort2(w) -print("Bubble Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [a for a in range(0, size2)] -w.reverse() -t1 = time.clock() -merge_sort(w) -print("Merge Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [a for a in range(0, size2)] -w.reverse() -t1 = time.clock() -quick_sort(w) -print("Quick Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [a for a in range(0, size2)] -w.reverse() -t1 = time.clock() -w.sort() -print("Tim Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) -#------------------------------------------------ -# size = 1,000,000 -#------------------------------------------------ -w = [a for a in range(0, size3)] -w.reverse() -t1 = time.clock() -merge_sort(w) -print("Merge Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) - -w = [a for a in range(0, size3)] -w.reverse() -t1 = time.clock() -quick_sort(w) -print("Quick Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) - -w = [a for a in range(0, size3)] -w.reverse() -t1 = time.clock() -w.sort() -print("Tim Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) - -#--------------RANDOM ORDER, MANY DUPLICATES------------------ -#------------------------------------------------ -# size = 10,000 -#------------------------------------------------ -print("\nRandom Order, Many Duplicates\n---------------------------------") - -w = [random.randint(0, size2//10) for a in range(0, size2)] -t1 = time.clock() -insertion_sort3(w) -print("Insertion Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, size2//10) for a in range(0, size2)] -t1 = time.clock() -selection_sort(w) -print("Selection Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0,size2//10) for a in range(0, size2)] -t1 = time.clock() -bubble_sort2(w) -print("Bubble Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, size2//10) for a in range(0, size2)] -t1 = time.clock() -merge_sort(w) -print("Merge Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, size2//10) for a in range(0, size2)] -t1 = time.clock() -quick_sort(w) -print("Quick Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, size2//10) for a in range(0, size2)] -t1 = time.clock() -w.sort() -print("Tim Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) -#------------------------------------------------ -# size = 1,000,000 -#------------------------------------------------ -w = [random.randint(0, size2//10) for a in range(0, size3)] -t1 = time.clock() -merge_sort(w) -print("Merge Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, size2//10) for a in range(0, size3)] -t1 = time.clock() -#quick_sort(w) -#print("Quick Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) - -w = [random.randint(0, size2//10) for a in range(0, size3)] -t1 = time.clock() -w.sort() +import random +import time +import copy +size1 = 100 +size2 = 10000 +size3 = 1000000 +span = 1000000 +threshold = 20 + +#--------------------------------------- +# Insertion Sort +#--------------------------------------- +# not optimized, equiv to while version below, but uses for loop +def insertion_sort1(A): + for i in range(1, len(A)): + for j in range(i-1, -1, -1): + if A[j] > A[j+1]: + A[j], A[j+1] = A[j+1], A[j] + else: + break + +# not optimized, equiv to break version, but uses while loop +def insertion_sort2(A): + for i in range(1, len(A)): + j = i-1 + while A[j] > A[j+1] and j >= 0: + A[j], A[j+1] = A[j+1], A[j] + j -= 1 + +# optimized - shifts instead of swapping +def insertion_sort3(A): + for i in range(1, len(A)): + curNum = A[i] + k = 0 + for j in range(i-1, -2, -1): + k = j + if A[j] > curNum: + A[j+1] = A[j] + else: + break + A[k+1] = curNum + +#--------------------------------------- +# Selection Sort +#--------------------------------------- +def selection_sort(A): + for i in range (0, len(A) - 1): + minIndex = i + for j in range (i+1, len(A)): + if A[j] < A[minIndex]: + minIndex = j + if minIndex != i: + A[i], A[minIndex] = A[minIndex], A[i] + +#--------------------------------------- +# Bubble Sort +#--------------------------------------- +# not optimized +def bubble_sort1(A): + for i in range (0, len(A) - 1): + for j in range (0, len(A) - i - 1): + if A[j] > A[j+1]: + A[j], A[j+1] = A[j+1], A[j] + +# optimized to exit if no swaps occur +def bubble_sort2(A): + for i in range (0, len(A) - 1): + done = True + for j in range (0, len(A) - i - 1): + if A[j] > A[j+1]: + A[j], A[j+1] = A[j+1], A[j] + done = False + if done: + return + +#--------------------------------------- +# Merge Sort +#--------------------------------------- +def merge_sort(A): + merge_sort2(A, 0, len(A)-1) + +def merge_sort2(A, first, last): + if last-first < threshold and first < last: + quick_selection(A, first, last) + elif first < last: + middle = (first + last)//2 + merge_sort2(A, first, middle) + merge_sort2(A, middle+1, last) + merge(A, first, middle, last) + +def merge(A, first, middle, last): + L = A[first:middle] + R = A[middle:last+1] + L.append(999999999) + R.append(999999999) + i = j = 0 + + for k in range (first, last+1): + if L[i] <= R[j]: + A[k] = L[i] + i += 1 + else: + A[k] = R[j] + j += 1 +#--------------------------------------- +# Quick Sort +#--------------------------------------- +def quick_sort(A): + quick_sort2(A, 0, len(A)-1) + +def quick_sort2(A, low, hi): + if hi-low < threshold and low < hi: + quick_selection(A, low, hi) + elif low < hi: + p = partition(A, low, hi) + quick_sort2(A, low, p - 1) + quick_sort2(A, p + 1, hi) + +def get_pivot(A, low, hi): + mid = (hi + low) // 2 + s = sorted([A[low], A[mid], A[hi]]) + if s[1] == A[low]: + return low + elif s[1] == A[mid]: + return mid + return hi + +def partition(A, low, hi): + pivotIndex = get_pivot(A, low, hi) + pivotValue = A[pivotIndex] + A[pivotIndex], A[low] = A[low], A[pivotIndex] + border = low + + for i in range(low, hi+1): + if A[i] < pivotValue: + border += 1 + A[i], A[border] = A[border], A[i] + A[low], A[border] = A[border], A[low] + + return (border) + +def quick_selection(x, first, last): + for i in range (first, last): + minIndex = i + for j in range (i+1, last+1): + if x[j] < x[minIndex]: + minIndex = j + if minIndex != i: + x[i], x[minIndex] = x[minIndex], x[i] + +#--------------RANDOM ORDER---------------------- +#------------------------------------------------ +# size = 100 +#------------------------------------------------ +print("\nRandom Order\n---------------------------------") +w = [random.randint(0, span) for a in range(0, size1)] +t1 = time.clock() +insertion_sort3(w) +print("Insertion Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size1)] +t1 = time.clock() +selection_sort(w) +print("Selection Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size1)] +t1 = time.clock() +bubble_sort2(w) +print("Bubble Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size1)] +t1 = time.clock() +merge_sort(w) +print("Merge Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size1)] +t1 = time.clock() +quick_sort(w) +print("Quick Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size1)] +t1 = time.clock() +w.sort() +print("Tim Sort(size=", str(size1),"): ", (time.clock()-t1) * 1000) +#------------------------------------------------ +# size = 10,000 +#------------------------------------------------ +w = [random.randint(0, span) for a in range(0, size2)] +t1 = time.clock() +insertion_sort3(w) +print("Insertion Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size2)] +t1 = time.clock() +selection_sort(w) +print("Selection Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size2)] +t1 = time.clock() +bubble_sort2(w) +print("Bubble Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size2)] +t1 = time.clock() +merge_sort(w) +print("Merge Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size2)] +t1 = time.clock() +quick_sort(w) +print("Quick Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size2)] +t1 = time.clock() +w.sort() +print("Tim Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) +#------------------------------------------------ +# size = 1,000,000 +#------------------------------------------------ +w = [random.randint(0, span) for a in range(0, size3)] +t1 = time.clock() +merge_sort(w) +print("Merge Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size3)] +t1 = time.clock() +quick_sort(w) +print("Quick Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, span) for a in range(0, size3)] +t1 = time.clock() +w.sort() +print("Tim Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) + +# ----------------ALREADY SORTED----------------- +#------------------------------------------------ +# size = 10,000 +#------------------------------------------------ +print("\nAlready Sorted\n---------------------------------") + +w = [a for a in range(0, size2)] +t1 = time.clock() +insertion_sort3(w) +print("Insertion Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +t1 = time.clock() +selection_sort(w) +print("Selection Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +t1 = time.clock() +bubble_sort2(w) +print("Bubble Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +t1 = time.clock() +merge_sort(w) +print("Merge Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +t1 = time.clock() +quick_sort(w) +print("Quick Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +t1 = time.clock() +w.sort() +print("Tim Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) +#------------------------------------------------ +# size = 1,000,000 +#------------------------------------------------ +w = [a for a in range(0, size3)] +t1 = time.clock() +merge_sort(w) +print("Merge Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) + +t1 = time.clock() +quick_sort(w) +print("Quick Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) + +t1 = time.clock() +w.sort() +print("Tim Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) + +# ----------------REVERSE SORTED----------------- +#------------------------------------------------ +# size = 10,000 +#------------------------------------------------ +print("\nReverse Sorted\n---------------------------------") + +w = [a for a in range(0, size2)] +w.reverse() +t1 = time.clock() +insertion_sort3(w) +print("Insertion Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [a for a in range(0, size2)] +w.reverse() +t1 = time.clock() +selection_sort(w) +print("Selection Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [a for a in range(0, size2)] +w.reverse() +t1 = time.clock() +bubble_sort2(w) +print("Bubble Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [a for a in range(0, size2)] +w.reverse() +t1 = time.clock() +merge_sort(w) +print("Merge Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [a for a in range(0, size2)] +w.reverse() +t1 = time.clock() +quick_sort(w) +print("Quick Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [a for a in range(0, size2)] +w.reverse() +t1 = time.clock() +w.sort() +print("Tim Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) +#------------------------------------------------ +# size = 1,000,000 +#------------------------------------------------ +w = [a for a in range(0, size3)] +w.reverse() +t1 = time.clock() +merge_sort(w) +print("Merge Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) + +w = [a for a in range(0, size3)] +w.reverse() +t1 = time.clock() +quick_sort(w) +print("Quick Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) + +w = [a for a in range(0, size3)] +w.reverse() +t1 = time.clock() +w.sort() +print("Tim Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) + +#--------------RANDOM ORDER, MANY DUPLICATES------------------ +#------------------------------------------------ +# size = 10,000 +#------------------------------------------------ +print("\nRandom Order, Many Duplicates\n---------------------------------") + +w = [random.randint(0, size2//10) for a in range(0, size2)] +t1 = time.clock() +insertion_sort3(w) +print("Insertion Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, size2//10) for a in range(0, size2)] +t1 = time.clock() +selection_sort(w) +print("Selection Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0,size2//10) for a in range(0, size2)] +t1 = time.clock() +bubble_sort2(w) +print("Bubble Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, size2//10) for a in range(0, size2)] +t1 = time.clock() +merge_sort(w) +print("Merge Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, size2//10) for a in range(0, size2)] +t1 = time.clock() +quick_sort(w) +print("Quick Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, size2//10) for a in range(0, size2)] +t1 = time.clock() +w.sort() +print("Tim Sort(size=", str(size2),"): ", (time.clock()-t1) * 1000) +#------------------------------------------------ +# size = 1,000,000 +#------------------------------------------------ +w = [random.randint(0, size2//10) for a in range(0, size3)] +t1 = time.clock() +merge_sort(w) +print("Merge Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, size2//10) for a in range(0, size3)] +t1 = time.clock() +#quick_sort(w) +#print("Quick Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) + +w = [random.randint(0, size2//10) for a in range(0, size3)] +t1 = time.clock() +w.sort() print("Tim Sort(size=", str(size3),"): ", (time.clock()-t1) * 1000) \ No newline at end of file diff --git a/Sorting Algorithms/bubble_sort.py b/Sorting Algorithms/bubble_sort.py new file mode 100644 index 00000000..c24853c9 --- /dev/null +++ b/Sorting Algorithms/bubble_sort.py @@ -0,0 +1,25 @@ +#--------------------------------------- +# Bubble Sort +#--------------------------------------- +# not optimized +def bubble_sort1(A): + for i in range (0, len(A) - 1): + for j in range (0, len(A) - i - 1): + if A[j] > A[j+1]: + A[j], A[j+1] = A[j+1], A[j] + +# optimized to exit if no swaps occur +def bubble_sort2(A): + for i in range (0, len(A) - 1): + done = True + for j in range (0, len(A) - i - 1): + if A[j] > A[j+1]: + A[j], A[j+1] = A[j+1], A[j] + done = False + if done: + return + +A = [5,9,1,2,4,8,6,3,7] +print(A) +bubble_sort1(A) +print(A) \ No newline at end of file diff --git a/Sorting Algorithms/insertion_sort.py b/Sorting Algorithms/insertion_sort.py new file mode 100644 index 00000000..ee911f2a --- /dev/null +++ b/Sorting Algorithms/insertion_sort.py @@ -0,0 +1,37 @@ +#--------------------------------------- +# Insertion Sort +#--------------------------------------- +# not optimized, equiv to while version below, but uses for loop +def insertion_sort1(A): + for i in range(1, len(A)): + for j in range(i-1, -1, -1): + if A[j] > A[j+1]: + A[j], A[j+1] = A[j+1], A[j] + else: + break + +# not optimized, equiv to break version, but uses while loop +def insertion_sort2(A): + for i in range(1, len(A)): + j = i-1 + while A[j] > A[j+1] and j >= 0: + A[j], A[j+1] = A[j+1], A[j] + j -= 1 + +# optimized - shifts instead of swapping +def insertion_sort3(A): + for i in range(1, len(A)): + curNum = A[i] + k = 0 + for j in range(i-1, -2, -1): + k = j + if A[j] > curNum: + A[j+1] = A[j] + else: + break + A[k+1] = curNum + +A = [5,9,1,2,4,8,6,3,7] +print(A) +insertion_sort1(A) +print(A) \ No newline at end of file diff --git a/Mergesort.py b/Sorting Algorithms/merge_sort.py similarity index 79% rename from Mergesort.py rename to Sorting Algorithms/merge_sort.py index ea55cfc4..73d3ab31 100644 --- a/Mergesort.py +++ b/Sorting Algorithms/merge_sort.py @@ -1,31 +1,34 @@ -import sys - -def merge_sort(A): - merge_sort2(A, 0, len(A)-1) - -def merge_sort2(A, first, last): - if first < last: - middle = (first + last)//2 - merge_sort2(A, first, middle) - merge_sort2(A, middle+1, last) - merge(A, first, middle, last) - -def merge(A, first, middle, last): - L = A[first:middle+1] - R = A[middle+1:last+1] - L.append(sys.maxsize) - R.append(sys.maxsize) - i = j = 0 - - for k in range (first, last+1): - if L[i] <= R[j]: - A[k] = L[i] - i += 1 - else: - A[k] = R[j] - j += 1 - -A = [5,9,1,2,4,8,6,3,7] -print(A) -merge_sort(A) -print(A) \ No newline at end of file +#--------------------------------------- +# Merge Sort +#--------------------------------------- +import sys + +def merge_sort(A): + merge_sort2(A, 0, len(A)-1) + +def merge_sort2(A, first, last): + if first < last: + middle = (first + last)//2 + merge_sort2(A, first, middle) + merge_sort2(A, middle+1, last) + merge(A, first, middle, last) + +def merge(A, first, middle, last): + L = A[first:middle+1] + R = A[middle+1:last+1] + L.append(sys.maxsize) + R.append(sys.maxsize) + i = j = 0 + + for k in range (first, last+1): + if L[i] <= R[j]: + A[k] = L[i] + i += 1 + else: + A[k] = R[j] + j += 1 + +A = [5,9,1,2,4,8,6,3,7] +print(A) +merge_sort(A) +print(A) diff --git a/Sorting Algorithms/quick_sort.py b/Sorting Algorithms/quick_sort.py new file mode 100644 index 00000000..9731a8ee --- /dev/null +++ b/Sorting Algorithms/quick_sort.py @@ -0,0 +1,50 @@ +#--------------------------------------- +# Quick Sort +#--------------------------------------- +def quick_sort(A): + quick_sort2(A, 0, len(A)-1) + +def quick_sort2(A, low, hi): + if hi-low < threshold and low < hi: + quick_selection(A, low, hi) + elif low < hi: + p = partition(A, low, hi) + quick_sort2(A, low, p - 1) + quick_sort2(A, p + 1, hi) + +def get_pivot(A, low, hi): + mid = (hi + low) // 2 + s = sorted([A[low], A[mid], A[hi]]) + if s[1] == A[low]: + return low + elif s[1] == A[mid]: + return mid + return hi + +def partition(A, low, hi): + pivotIndex = get_pivot(A, low, hi) + pivotValue = A[pivotIndex] + A[pivotIndex], A[low] = A[low], A[pivotIndex] + border = low + + for i in range(low, hi+1): + if A[i] < pivotValue: + border += 1 + A[i], A[border] = A[border], A[i] + A[low], A[border] = A[border], A[low] + + return (border) + +def quick_selection(x, first, last): + for i in range (first, last): + minIndex = i + for j in range (i+1, last+1): + if x[j] < x[minIndex]: + minIndex = j + if minIndex != i: + x[i], x[minIndex] = x[minIndex], x[i] + +A = [5,9,1,2,4,8,6,3,7] +print(A) +quick_sort(A) +print(A) \ No newline at end of file diff --git a/Sorting Algorithms/selection_sort.py b/Sorting Algorithms/selection_sort.py new file mode 100644 index 00000000..f3209f46 --- /dev/null +++ b/Sorting Algorithms/selection_sort.py @@ -0,0 +1,16 @@ +#--------------------------------------- +# Selection Sort +#--------------------------------------- +def selection_sort(A): + for i in range (0, len(A) - 1): + minIndex = i + for j in range (i+1, len(A)): + if A[j] < A[minIndex]: + minIndex = j + if minIndex != i: + A[i], A[minIndex] = A[minIndex], A[i] + +A = [5,9,1,2,4,8,6,3,7] +print(A) +selection_sort(A) +print(A) \ No newline at end of file diff --git a/Stacks, Queues & Heaps.ipynb b/Stacks, Queues & Heaps.ipynb new file mode 100644 index 00000000..a54ede21 --- /dev/null +++ b/Stacks, Queues & Heaps.ipynb @@ -0,0 +1,346 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Stacks, Queues & Heaps\n", + "© Joe James, 2019.\n", + "\n", + "### Stack using Python List\n", + "Stack is a LIFO data structure -- last-in, first-out. \n", + "Use append() to push an item onto the stack. \n", + "Use pop() to remove an item." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[4, 7, 12, 19]\n" + ] + } + ], + "source": [ + "my_stack = list()\n", + "my_stack.append(4)\n", + "my_stack.append(7)\n", + "my_stack.append(12)\n", + "my_stack.append(19)\n", + "print(my_stack)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "19\n", + "12\n", + "[4, 7]\n" + ] + } + ], + "source": [ + "print(my_stack.pop())\n", + "print(my_stack.pop())\n", + "print(my_stack)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Stack using List with a Wrapper Class\n", + "We create a Stack class and a full set of Stack methods. \n", + "But the underlying data structure is really a Python List. \n", + "For pop and peek methods we first check whether the stack is empty, to avoid exceptions." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "class Stack():\n", + " def __init__(self):\n", + " self.stack = list()\n", + " def push(self, item):\n", + " self.stack.append(item)\n", + " def pop(self):\n", + " if len(self.stack) > 0:\n", + " return self.stack.pop()\n", + " else:\n", + " return None\n", + " def peek(self):\n", + " if len(self.stack) > 0:\n", + " return self.stack[len(self.stack)-1]\n", + " else:\n", + " return None\n", + " def __str__(self):\n", + " return str(self.stack)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test Code for Stack Wrapper Class" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1, 3]\n", + "3\n", + "1\n", + "1\n", + "None\n" + ] + } + ], + "source": [ + "my_stack = Stack()\n", + "my_stack.push(1)\n", + "my_stack.push(3)\n", + "print(my_stack)\n", + "print(my_stack.pop())\n", + "print(my_stack.peek())\n", + "print(my_stack.pop())\n", + "print(my_stack.pop())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "### Queue using Python Deque\n", + "Queue is a FIFO data structure -- first-in, first-out. \n", + "Deque is a double-ended queue, but we can use it for our queue. \n", + "We use append() to enqueue an item, and popleft() to dequeue an item. \n", + "See [Python docs](https://docs.python.org/3/library/collections.html#collections.deque) for deque." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "deque([5, 10])\n", + "5\n" + ] + } + ], + "source": [ + "from collections import deque\n", + "my_queue = deque()\n", + "my_queue.append(5)\n", + "my_queue.append(10)\n", + "print(my_queue)\n", + "print(my_queue.popleft())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Fun exercise:\n", + "Write a wrapper class for the Queue class, similar to what we did for Stack, but using Python deque. \n", + "Try adding enqueue, dequeue, and get_size methods." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Python Single-ended Queue Wrapper Class using Deque\n", + "We rename the append method to enqueue, and popleft to dequeue. \n", + "We also add peek and get_size operations." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import deque\n", + "class Queue():\n", + " def __init__(self):\n", + " self.queue = deque()\n", + " self.size = 0\n", + " def enqueue(self, item):\n", + " self.queue.append(item)\n", + " self.size += 1\n", + " def dequeue(self, item):\n", + " if self.size > 0:\n", + " self.size -= 1\n", + " return self.queue.popleft()\n", + " else: \n", + " return None\n", + " def peek(self):\n", + " if self.size > 0:\n", + " ret_val = self.queue.popleft()\n", + " queue.appendleft(ret_val)\n", + " return ret_val\n", + " else:\n", + " return None\n", + " def get_size(self):\n", + " return self.size" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Python MaxHeap\n", + "A MaxHeap always bubbles the highest value to the top, so it can be removed instantly. \n", + "Public functions: push, peek, pop \n", + "Private functions: __swap, __floatUp, __bubbleDown, __str__." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "class MaxHeap:\n", + " def __init__(self, items=[]):\n", + " super().__init__()\n", + " self.heap = [0]\n", + " for item in items:\n", + " self.heap.append(item)\n", + " self.__floatUp(len(self.heap) - 1)\n", + "\n", + " def push(self, data):\n", + " self.heap.append(data)\n", + " self.__floatUp(len(self.heap) - 1)\n", + "\n", + " def peek(self):\n", + " if self.heap[1]:\n", + " return self.heap[1]\n", + " else:\n", + " return False\n", + " \n", + " def pop(self):\n", + " if len(self.heap) > 2:\n", + " self.__swap(1, len(self.heap) - 1)\n", + " max = self.heap.pop()\n", + " self.__bubbleDown(1)\n", + " elif len(self.heap) == 2:\n", + " max = self.heap.pop()\n", + " else: \n", + " max = False\n", + " return max\n", + "\n", + " def __swap(self, i, j):\n", + " self.heap[i], self.heap[j] = self.heap[j], self.heap[i]\n", + "\n", + " def __floatUp(self, index):\n", + " parent = index//2\n", + " if index <= 1:\n", + " return\n", + " elif self.heap[index] > self.heap[parent]:\n", + " self.__swap(index, parent)\n", + " self.__floatUp(parent)\n", + "\n", + " def __bubbleDown(self, index):\n", + " left = index * 2\n", + " right = index * 2 + 1\n", + " largest = index\n", + " if len(self.heap) > left and self.heap[largest] < self.heap[left]:\n", + " largest = left\n", + " if len(self.heap) > right and self.heap[largest] < self.heap[right]:\n", + " largest = right\n", + " if largest != index:\n", + " self.__swap(index, largest)\n", + " self.__bubbleDown(largest)\n", + " \n", + " def __str__(self):\n", + " return str(self.heap)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### MaxHeap Test Code" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[0, 95, 10, 21, 3]\n", + "95\n", + "21\n" + ] + } + ], + "source": [ + "m = MaxHeap([95, 3, 21])\n", + "m.push(10)\n", + "print(m)\n", + "print(m.pop())\n", + "print(m.peek())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/String Formatting.ipynb b/String Formatting.ipynb new file mode 100644 index 00000000..82546e12 --- /dev/null +++ b/String Formatting.ipynb @@ -0,0 +1,444 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python String format()\n", + "[Official docs](https://docs.python.org/3/library/string.html#format-string-syntax) \n", + "[more documentation](https://pyformat.info)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Replace with String - positional" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My name is Alex.\n", + "My name is Alex Marshall.\n" + ] + } + ], + "source": [ + "first_name = 'Alex'\n", + "last_name = 'Marshall'\n", + "print('My name is {}.'.format(first_name))\n", + "print('My name is {} {}.'.format(first_name, last_name))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Replace with String using Index\n", + "Using indexes can be useful when order varies." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My name is Alex Marshall.\n", + "My name is Marshall, Alex. First name Alex\n" + ] + } + ], + "source": [ + "print('My name is {0} {1}.'.format(first_name, last_name))\n", + "print('My name is {1}, {0}. First name {0}'.format(first_name, last_name))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Alignment: Align Left, Right, Middle\n", + "{:<} align Left (default is align left, so this is optional) \n", + "{:>n} align Right with n padding spaces \n", + "{:^n} align Middle with n padding spaces" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of cases: 5\n", + "Number of cases: 16\n", + "Number of cases: 294\n", + "Number of cases: 5\n", + "Number of cases: 16\n", + "Number of cases: 294\n" + ] + } + ], + "source": [ + "# align left - these both do the same thing\n", + "cases = [5, 16, 294]\n", + "for case in cases:\n", + " print('Number of cases: {}'.format(case))\n", + "for case in cases:\n", + " print('Number of cases: {:<}'.format(case))" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of cases: 5\n", + "Number of cases: 16\n", + "Number of cases: 294\n" + ] + } + ], + "source": [ + "# align right with 5 total spaces\n", + "for case in cases:\n", + " print('Number of cases:{:>5}'.format(case))" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of cases: 5 \n", + "Number of cases: 16 \n", + "Number of cases: 294 \n" + ] + } + ], + "source": [ + "# align center with 5 total spaces\n", + "for case in cases:\n", + " print('Number of cases:{:^5}'.format(case))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Integers & Floats\n", + "{:d} Integer variable \n", + "{:5d} Integer with padding of 5 \n", + "{:f} Floating point variable " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Length is 26.\n", + "Length is 26.\n", + "In dog years, I'm 8 .\n" + ] + } + ], + "source": [ + "length = 26\n", + "print('Length is {:d}.'.format(length))\n", + "\n", + "# align right, padding=6, integer\n", + "print('Length is {:>6d}.'.format(length))\n", + "\n", + "# named variable, align center, padding=4, integer\n", + "print(\"In dog years, I'm {age:^5d}.\".format(age=8))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Distance to moon is 238,900 miles.\n" + ] + } + ], + "source": [ + "# integer with commas\n", + "print('Distance to moon is {:,d} miles.'.format(238900))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Radius is 4.780000 inches.\n", + "Radius is 4.8 inches.\n", + "Radius is 0004.8 inches.\n", + "Radius is 4.78000 inches.\n" + ] + } + ], + "source": [ + "radius = 4.78\n", + "print('Radius is {:f} inches.'.format(radius))\n", + "\n", + "# round to 1 decimal place, float\n", + "print('Radius is {:.1f} inches.'.format(radius))\n", + "\n", + "# padding=6 (pads with leading 0's), round to 1 decimal\n", + "print('Radius is {:06.1f} inches.'.format(radius))\n", + "\n", + "# padding=5 decimal places\n", + "print('Radius is {:.5f} inches.'.format(radius))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "A is +15. B is -9. C is 33.\n", + "A is +15. B is -9. B is -9.\n" + ] + } + ], + "source": [ + "# positive & negative signs\n", + "a, b, c = 15, -9, 33\n", + "print('A is {:+d}. B is {:+d}. C is {:-d}.'.format(a, b, c))\n", + "\n", + "# {+3d} shows pos or neg sign, padding=3. \n", + "# {: d} prints neg or a leading space if positive.\n", + "print('A is {:+3d}. B is {:+4d}. B is {: d}.'.format(a, b, b))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Named Placeholders\n", + "You can pass in named variables as keyword args, or as an unpacked dict. \n", + "And it's easy to pass in a list." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mekael is a Carpenter.\n" + ] + } + ], + "source": [ + "print(\"{name} is a {job}.\".format(name='Mekael', job='Carpenter'))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'name'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mjob\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'Carpenter'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m# THIS DOES NOT WORK!\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"{name} is a {job}.\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjob\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m: 'name'" + ] + } + ], + "source": [ + "name = 'Mekael'\n", + "job = 'Carpenter'\n", + "# THIS DOES NOT WORK!\n", + "print(\"{name} is a {job}.\".format(name, job))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mekael is a Carpenter.\n" + ] + } + ], + "source": [ + "# This works great\n", + "print(\"{n} is a {j}.\".format(n=name, j=job))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Mekael is a Carpenter.\n" + ] + } + ], + "source": [ + "# Or use a dictionary, and ** unpacks the dictionary.\n", + "jobs = {'name':'Mekael', 'job':'Carpenter'}\n", + "print(\"{name} is a {job}.\".format(**jobs))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Score 2 is 96\n" + ] + } + ], + "source": [ + "# passing in a list is clean and easy\n", + "scores = [78, 96, 83, 86]\n", + "print('Score 2 is {s[1]}'.format(s = scores))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Scientific Notation\n", + "Use {:e}, or upper case E." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "My big number is 8.745770e+02\n", + "A bigger number is 6.022141E+23\n" + ] + } + ], + "source": [ + "print('My big number is {:e}'.format(874.577))\n", + "print('A bigger number is {:E}'.format(602214090000000000000000))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Binary & Hexadecimal\n", + "{:b} converts decimal to binary\n", + "{:x} converts decimal to hex. Or use upper case X for capitals." + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The binary equivalent of 79 is 1001111\n", + "The Hexadecimal equivalent of 183 is B7\n" + ] + } + ], + "source": [ + "print('The binary equivalent of 79 is {:b}'.format(79))\n", + "print('The Hexadecimal equivalent of 183 is {:X}'.format(183))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Strings/Using Python Strings.ipynb b/Strings/Using Python Strings.ipynb new file mode 100644 index 00000000..19643b9c --- /dev/null +++ b/Strings/Using Python Strings.ipynb @@ -0,0 +1,518 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using Strings in Python 3\n", + "[Python String docs](https://docs.python.org/3/library/string.html)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating Strings\n", + "Enclose a string in single or double quotes, or in triple single quotes. \n", + "And you can embed single quotes within double quotes, or double quotes within single quotes. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tony Stark is Ironman.\n", + "Her book is called \"The Magician\".\n", + "Captain Rogers kicks butt.\n" + ] + } + ], + "source": [ + "s = 'Tony Stark is'\n", + "t = \"Ironman.\"\n", + "print(s, t)\n", + "u = 'Her book is called \"The Magician\".'\n", + "print(u)\n", + "v = '''Captain Rogers kicks butt.'''\n", + "print(v)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Type, Len, Split, Join\n", + "Get the number of characters in a string using len. \n", + "To get the number of words you have to split the string into a list. Split uses a space as its default, or you can split on any substring you like. \n", + "To reverse a split, use join(str)." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "13\n" + ] + } + ], + "source": [ + "print(type(s))\n", + "print(len(s))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Tony', 'Stark', 'is']\n", + "3\n", + "['Her book is c', 'lled \"The M', 'gici', 'n\".']\n", + "['you', 'are', 'so', 'pretty']\n", + "Just do it.\n" + ] + } + ], + "source": [ + "print(s.split())\n", + "print(len(s.split()))\n", + "print(u.split('a'))\n", + "print('you,are,so,pretty'.split(','))\n", + "print(' '.join(['Just', 'do', 'it.']))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Check if a substring is contained in a string\n", + "Use *in* or *not in*. \n", + "Startswith and Endswith are also useful boolean checks." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n", + "False\n", + "True\n", + "True\n", + "True\n" + ] + } + ], + "source": [ + "print('dog' in s)\n", + "print('k' in t)\n", + "print('k' not in t)\n", + "print(s.startswith('Tony'))\n", + "print(s.endswith('is'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Replace all substrings\n", + "Second example iterates through a dictionary and replaces all instances of text numbers with numerals." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Captain America kicks butt.\n" + ] + } + ], + "source": [ + "v = v.replace('Rogers', 'America')\n", + "print(v)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Anton has 3 cars. Javier has 4.\n" + ] + } + ], + "source": [ + "z = 'Anton has three cars. Javier has four.'\n", + "numbers = {'one':'1', 'two':'2', 'three':'3', 'four':'4', 'five':'5'}\n", + "for k,v in numbers.items():\n", + " z = z.replace(k,v)\n", + "print(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Change case" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tony stark is\n", + "IRONMAN.\n", + "Her Book Is Called \"The Magician\".\n", + "Hulk rules!\n" + ] + } + ], + "source": [ + "print(s.lower())\n", + "print(t.upper())\n", + "print(u.title())\n", + "print('hulk rules!'.capitalize())" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n", + "False\n", + "True\n", + "True\n" + ] + } + ], + "source": [ + "print('david'.islower())\n", + "print('hulk'.isupper())\n", + "print('Hulk'.istitle())\n", + "print('covid19'.isalnum())" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "True\n", + "False\n", + "True\n", + "False\n" + ] + } + ], + "source": [ + "print('Thor'.isalpha())\n", + "print('3.14'.isnumeric())\n", + "print('314'.isdigit())\n", + "print('3.14'.isdecimal())" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0123456789\n", + "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\n", + "abcdefghijklmnopqrstuvwxyz\n", + "ABCDEFGHIJKLMNOPQRSTUVWXYZ\n" + ] + } + ], + "source": [ + "import string\n", + "print(string.digits)\n", + "print(string.punctuation)\n", + "print(string.ascii_lowercase)\n", + "print(string.ascii_uppercase)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Strip leading or trailing characters\n", + "This is often used to strip blank spaces or newlines, but can be used for much more." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Natasha is a spy.\n", + "Natasha is a spy \n", + "\n", + "\n", + " Natasha is a spy\n", + "Natasha is a spy. She has red hair.\n", + "She has red \n" + ] + } + ], + "source": [ + "w = '\\n Natasha is a spy \\n'\n", + "x = '\\nShe has red hair\\n'\n", + "\n", + "print(w.strip() + '.')\n", + "print(w.lstrip())\n", + "print(w.rstrip())\n", + "print(w.strip() + '. ' + x.strip() + '.')\n", + "print(x.strip().rstrip('arih'))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "What do you want\n" + ] + } + ], + "source": [ + "y = 'What do you want?!!&?'\n", + "print(y.rstrip(string.punctuation))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find, and Count substrings\n", + "Search from the left with find, or from the right with rfind. \n", + "The return value is the start index of the first match of the substring." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "5\n", + "What do you want?!!&?\n" + ] + } + ], + "source": [ + "print(y.find('a'))\n", + "print(y.rfind('do'))\n", + "print(y)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Natasha is a spy\n", + "4\n" + ] + } + ], + "source": [ + "print(w.strip())\n", + "print(w.count('a'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Strings are immutable\n", + "Any change to a string results in a new string being written to a new block of memory. " + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4565794160\n", + "4565793776\n" + ] + } + ], + "source": [ + "m = 'Black widow'\n", + "print(id(m))\n", + "m = m + 's'\n", + "print(id(m))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Tony Stark is Ironman.\n", + "Tony Stark is Ironman.\n" + ] + } + ], + "source": [ + "print(s, t)\n", + "z = s + ' ' + t\n", + "print(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Slicing Substrings\n", + "string[from:to+1:step]\n", + "Only 1 parameter: it is used as an index. \n", + "From defaults to beginning. \n", + "To defaults to end. \n", + "Step defaults to 1." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "567\n" + ] + } + ], + "source": [ + "z = '0123456789'\n", + "print(z[1])\n", + "print(z[5:8])" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "012\n", + "789\n", + "89\n", + "24\n" + ] + } + ], + "source": [ + "print(z[:3])\n", + "print(z[7:])\n", + "print(z[-2:])\n", + "print(z[2:5:2])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Strings/bands.txt b/Strings/bands.txt new file mode 100644 index 00000000..7148cf12 --- /dev/null +++ b/Strings/bands.txt @@ -0,0 +1,38 @@ +Rolling Stones +Lady Gaga +Jackson Browne +Maroon 5 +Arijit Singh +Elton John +John Mayer +CCR +Eagles +Pink +Aerosmith +Adele +Taylor Swift +Faye Wong +UB40 +ColdPlay +Boston +4 Non Blondes +The Cars +Cheap Trick +Def Leppard +Ed Sheeran +Dire Straits +Train +Tom Petty +One Direction +Jimmy Buffett +Mumford & Sons +Phil Collins +Rod Stewart +The Script +Elvis +U2 +Simon & Garfunkel +Michael Buble +Abba +The Jackson 5 +R.E.M. \ No newline at end of file diff --git a/Tensorflow_Keras/TensorFlow Tutorial with MNIST Dataset.ipynb b/Tensorflow_Keras/TensorFlow Tutorial with MNIST Dataset.ipynb new file mode 100644 index 00000000..49cb48ed --- /dev/null +++ b/Tensorflow_Keras/TensorFlow Tutorial with MNIST Dataset.ipynb @@ -0,0 +1,337 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TensorFlow Tutorial with MNIST Dataset\n", + "**About the MNIST dataset**. \n", + "MNIST is the equivalent *Hello World* of image analysis.\n", + "It consists of hand written numbers, 0-9, in 28x28 pixel squares. \n", + "Each gray-scale pixel contains an integer 0-255 to indicate darkness, with 0 white and 255 black. \n", + "There are about 60,000 training records, and about 10,000 test records. \n", + "In other words, the images of numbers have already been transformed into arrays of ints to make them easier to use for ML projects. You can find more info on the dataset [here](http://yann.lecun.com/exdb/mnist/). You can also download it from [here](https://s3.amazonaws.com/img-datasets/mnist.pkl.gz).\n", + "## 1. Load Data into a Numpy Array \n", + "I downloaded the data file onto my desktop and loaded it locally. \n", + "You can also load it directly from the cloud as follows: \n", + "```mnist = tf.keras.datasets.mnist \n", + "(x_train, y_train), (x_test, y_test) = mnist.load_data() \n", + "``` \n", + "**After the load:** \n", + "x_train contains 60k arrays of 28x28. \n", + "The y_train vector contains the corresponding labels for these. \n", + "x_test contains 10k arrays of 28x28. \n", + "The y_test vector contains the corresponding labels for these." + ] + }, + { + "cell_type": "code", + "execution_count": 218, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 47040000 (60000, 28, 28)\n", + " 60000 (60000,)\n", + " 7840000 (10000, 28, 28)\n", + " 10000 (10000,)\n", + "8 2\n" + ] + } + ], + "source": [ + "import pickle\n", + "import numpy as np\n", + "\n", + "with open('/Users/joejames/desktop/mnist.pkl', 'rb') as f:\n", + " (x_train, y_train), (x_test, y_test) = pickle.load(f, encoding='latin1')\n", + " \n", + "print(type(x_train), x_train.size, x_train.shape)\n", + "print(type(y_train), y_train.size, y_train.shape)\n", + "print(type(x_test), x_test.size, x_test.shape)\n", + "print(type(y_test), y_test.size, y_test.shape)\n", + "print(y_train[55], y_test[583])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Use Matplotlib to visualize one record. \n", + "I set the colormap to Greys. There are a bunch of other colormap choices if you like bright visualizations. Try magma or any of the other colormap choice in the [docs](https://matplotlib.org/tutorials/colors/colormaps.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 217, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 217, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAADmJJREFUeJzt3X+MVPW5x/HPIxcwbhvDlr1IKLi1gZsYI/RmRJOaS01vGzEkWP8gENNs1XQbheSSkFhCY1RCArmxJfxxhWwvG0Cr5WoxYtQrPzTBxpvGUVGhXsWL2xSC7BJ/QDUG2T73jz00W935zjBzZs6sz/uVbGbmPOfMeXLChzMz35nzNXcXgHguKroBAMUg/EBQhB8IivADQRF+ICjCDwRF+IGgCD8QFOEHgvqHVu5s6tSp3t3d3cpdAqEMDAzo1KlTVsu6DYXfzG6UtEnSBEn/6e4bUut3d3erXC43sksACaVSqeZ1637Zb2YTJP2HpIWSrpS0zMyurPf5ALRWI+/550t6192PuvtZSb+VtDiftgA0WyPhnyHpz6MeH8uW/R0z6zWzspmVh4aGGtgdgDw1/dN+d+9z95K7l7q6upq9OwA1aiT8xyXNHPX4m9kyAONAI+F/WdJsM/uWmU2StFTS7nzaAtBsdQ/1ufs5M1sh6TmNDPX1u/vh3DoD0FQNjfO7+zOSnsmpFwAtxNd7gaAIPxAU4QeCIvxAUIQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQRF+ICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCIvxAUIQfCKqhWXrNbEDSGUnDks65eymPpgA0X0Phz9zg7qdyeB4ALcTLfiCoRsPvkvaY2Stm1ptHQwBao9GX/de7+3Ez+0dJe83sf939wOgVsv8UeiVp1qxZDe4OQF4aOvO7+/HsdlDSE5Lmj7FOn7uX3L3U1dXVyO4A5Kju8JtZh5l9/fx9ST+UdCivxgA0VyMv+6dJesLMzj/PI+7+37l0BaDp6g6/ux+VNDfHXlCA06dPJ+s7duxo6Pk3btxYsXb06NGGnvvxxx9P1hcvXlyxtnPnzuS2d955Z7J+3XXXJevPPvtssj5hwoRkvRUY6gOCIvxAUIQfCIrwA0ERfiAowg8Elcev+lCwc+fOVawdOHCgYk2SVq5cmawfPny4rp5qcdFFjZ17lixZklMnF+7FF19M1oeHh5N1hvoAFIbwA0ERfiAowg8ERfiBoAg/EBThB4JinH8cOHPmTLJ+ww03VKy99tprDe370ksvTdZXrVqVrM+ZM6di7fXXX09uu379+mS9mRYtWpSsP/DAA8n6pEmT8mynKTjzA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQjPO3gUbG8aX0WP7VV1+d3Pb+++9P1hcsWJCsV/seQOpaAwcPHkxu20yXXHJJsr527dpkffbs2Xm2UwjO/EBQhB8IivADQRF+ICjCDwRF+IGgCD8QVNVxfjPrl7RI0qC7X5Ut65S0U1K3pAFJS9z9w+a1+dVW7bfh1X6TP2PGjIq1559/PrntlClTkvVGvf322xVrGzZsaOq+U1544YVkfe7cr/7s87Wc+bdJuvELy1ZL2u/usyXtzx4DGEeqht/dD0j64AuLF0vant3fLunmnPsC0GT1vuef5u4nsvvvS5qWUz8AWqThD/zc3SV5pbqZ9ZpZ2czKQ0NDje4OQE7qDf9JM5suSdntYKUV3b3P3UvuXurq6qpzdwDyVm/4d0vqye73SHoyn3YAtErV8JvZo5L+R9I/mdkxM7tD0gZJPzCzI5L+NXsMYBypOs7v7ssqlL6fcy+oU0dHR8Vatd+tN+r06dPJ+o4dO5q6/5Senp6KtWrXOYiAb/gBQRF+ICjCDwRF+IGgCD8QFOEHguLS3W3g1ltvTda3bNmSrL/zzjsVa3fddVdy2wcffDBZnzx5crLe29ubrD/22GPJeiMWLlyYrG/evLlibTxMod1snPmBoAg/EBThB4Ii/EBQhB8IivADQRF+ICjG+dvAnDlzkvV9+/Yl6/PmzatY27ZtW3JbM0vW77333mT9o48+StYb0dnZmayvW7cuWa/2HYXoOPMDQRF+ICjCDwRF+IGgCD8QFOEHgiL8QFA2MttWa5RKJS+Xyy3b31fF559/nqy/9NJLFWtLly5Nbjs4WHGypaarNo6/d+/eZD31/YaoSqWSyuVy+ssbGc78QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxBU1d/zm1m/pEWSBt39qmzZfZJ+KmkoW22Nuz/TrCajmzhxYrK+YMGCirWnn346ue0111xTV0+1mjp1asXac889l9yWcfzmquXMv03SjWMs3+ju87I/gg+MM1XD7+4HJH3Qgl4AtFAj7/lXmNkbZtZvZlNy6whAS9Qb/s2Svi1pnqQTkn5ZaUUz6zWzspmVh4aGKq0GoMXqCr+7n3T3YXf/q6RfS5qfWLfP3UvuXurq6qq3TwA5qyv8ZjZ91MMfSTqUTzsAWqWWob5HJX1P0lQzOybpXknfM7N5klzSgKSfNbFHAE1QNfzuvmyMxVub0AvqdPbs2Yq1hx9+uIWdfFlvb2/FGuP4xeIbfkBQhB8IivADQRF+ICjCDwRF+IGgmKJ7HKh26e79+/dXrG3atKmhfVe7vHa1nxt/9tlnDe0fzcOZHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCYpx/HNiyZUuyvnLlyrqf+/bbb0/W169fn6yvWLEiWT927NgF94TW4MwPBEX4gaAIPxAU4QeCIvxAUIQfCIrwA0Exzt8G+vr6kvW1a9fW/dzVLt19yy23JOuTJ0+ue99ob5z5gaAIPxAU4QeCIvxAUIQfCIrwA0ERfiCoquP8ZjZT0g5J0yS5pD5332RmnZJ2SuqWNCBpibt/2LxWx68jR44k62vWrEnWP/wwfVh7enoq1hodx//000+T9ffeey9Zv+KKK5J1FKeWM/85Savc/UpJ10labmZXSlotab+7z5a0P3sMYJyoGn53P+Hur2b3z0h6S9IMSYslbc9W2y7p5mY1CSB/F/Se38y6JX1H0h8kTXP3E1npfY28LQAwTtQcfjP7mqTfSVrp7qdH19zdNfJ5wFjb9ZpZ2czKQ0NDDTULID81hd/MJmok+L9x913Z4pNmNj2rT5c0ONa27t7n7iV3L3V1deXRM4AcVA2/mZmkrZLecvdfjSrtlnT+Y+YeSU/m3x6AZqnlJ73flfRjSW+a2cFs2RpJGyT9l5ndIelPkpY0p8X2d/bs2WT92muvTdY//vjjZP3yyy9P1lOX9p40aVJy22pWr04P4pTL5WR9yZKw/yzaXtXwu/vvJVmF8vfzbQdAq/ANPyAowg8ERfiBoAg/EBThB4Ii/EBQXLo7B/39/cl6tXH8jo6OZP2pp55K1hsZy9+3b1+yvnXr1mR91qxZyfptt912wT2hNTjzA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQjPPn4JNPPmlo+2qX1z506FDd9UceeSS57Z49e5L1atcq2LVrV7Le2dmZrKM4nPmBoAg/EBThB4Ii/EBQhB8IivADQRF+ICjG+dvAQw891FC9mdatW5esz507t0WdIG+c+YGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gqKrj/GY2U9IOSdMkuaQ+d99kZvdJ+qmkoWzVNe7+TLMabWfLly9P1u++++6m7r+rq6ti7Z577kluW+1aApdddlmyblZp9na0u1q+5HNO0ip3f9XMvi7pFTPbm9U2uvsDzWsPQLNUDb+7n5B0Irt/xszekjSj2Y0BaK4Les9vZt2SviPpD9miFWb2hpn1m9mUCtv0mlnZzMpDQ0NjrQKgADWH38y+Jul3kla6+2lJmyV9W9I8jbwy+OVY27l7n7uX3L2Uem8KoLVqCr+ZTdRI8H/j7rskyd1Puvuwu/9V0q8lzW9emwDyVjX8NvJx7lZJb7n7r0Ytnz5qtR9JSl9iFkBbqeXT/u9K+rGkN83sYLZsjaRlZjZPI8N/A5J+1pQOx4GLL744WR8eHm5RJ0Dtavm0//eSxhrMDTmmD3xV8A0/ICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCIvxAUOburduZ2ZCkP41aNFXSqZY1cGHatbd27Uuit3rl2dvl7l7T9fJaGv4v7dys7O6lwhpIaNfe2rUvid7qVVRvvOwHgiL8QFBFh7+v4P2ntGtv7dqXRG/1KqS3Qt/zAyhO0Wd+AAUpJPxmdqOZvW1m75rZ6iJ6qMTMBszsTTM7aGblgnvpN7NBMzs0almnme01syPZ7ZjTpBXU231mdjw7dgfN7KaCeptpZi+Y2R/N7LCZ/Vu2vNBjl+irkOPW8pf9ZjZB0juSfiDpmKSXJS1z9z+2tJEKzGxAUsndCx8TNrN/kfQXSTvc/aps2b9L+sDdN2T/cU5x95+3SW/3SfpL0TM3ZxPKTB89s7SkmyX9RAUeu0RfS1TAcSvizD9f0rvuftTdz0r6raTFBfTR9tz9gKQPvrB4saTt2f3tGvnH03IVemsL7n7C3V/N7p+RdH5m6UKPXaKvQhQR/hmS/jzq8TG115TfLmmPmb1iZr1FNzOGadm06ZL0vqRpRTYzhqozN7fSF2aWbptjV8+M13njA78vu97d/1nSQknLs5e3bclH3rO103BNTTM3t8oYM0v/TZHHrt4Zr/NWRPiPS5o56vE3s2Vtwd2PZ7eDkp5Q+80+fPL8JKnZ7WDB/fxNO83cPNbM0mqDY9dOM14XEf6XJc02s2+Z2SRJSyXtLqCPLzGzjuyDGJlZh6Qfqv1mH94tqSe73yPpyQJ7+TvtMnNzpZmlVfCxa7sZr9295X+SbtLIJ/7/J+kXRfRQoa8rJL2e/R0uujdJj2rkZeDnGvls5A5J35C0X9IRSfskdbZRbw9JelPSGxoJ2vSCerteIy/p35B0MPu7qehjl+irkOPGN/yAoPjADwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCIvxAUP8PP0VjZQqKM3UAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.cm as cm\n", + "import matplotlib.pyplot as plt\n", + "\n", + "plt.imshow(x_train[55].reshape(28, 28), cmap=cm.Greys)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Plot a bunch of records to see sample data \n", + "Basically, use the same Matplotlib commands above in a for loop to show 18 records from the train set in a subplot figure. We also make the figsize a bit bigger and remove the tick marks for readability." + ] + }, + { + "cell_type": "code", + "execution_count": 213, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 213, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "images = x_train[0:18]\n", + "fig, axes = plt.subplots(3, 6, figsize=[9,5])\n", + "\n", + "for i, ax in enumerate(axes.flat):\n", + " ax.imshow(x_train[i].reshape(28, 28), cmap=cm.Greys)\n", + " ax.set_xticks([])\n", + " ax.set_yticks([])\n", + "plt.show" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 4. Show distribution of training data labels \n", + "The training data is about evenly distributed across all nine digits. " + ] + }, + { + "cell_type": "code", + "execution_count": 214, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAEeJJREFUeJzt3G+MXfV95/H3pzj0D13FpsxarG2tkWoloislsCMgm1WVjbfGkCrmQYqI2mSEvPI+cLJJVamBPkELzYpKq6ZB2iJZwV3TzYayNBVWFoWOSKKqDyAMgSUBB3lKQm3X4GnGkG5RkyX97oP7c3pDPJ17YeZe17/3S7q653zP75zz+2ns+cz5m6pCktSfn5h2ByRJ02EASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUqVUDIMnbkjw19Pluko8nuTjJfJKj7XtTa58kdyVZTPJ0kiuHtjXX2h9NMreeA5Mk/eMyzpPASS4ATgBXA/uB5aq6M8ktwKaq+kSS64GPAte3dp+uqquTXAwsALNAAU8A/7qqTq+0v0suuaS2b9/+xkYmSZ164okn/rqqZlZrt2HM7e4E/qKqXkiyB3hPqx8CvgJ8AtgD3FuDZHk0ycYkl7a281W1DJBkHtgNfG6lnW3fvp2FhYUxuyhJfUvywijtxr0GcBP/8At7c1WdbNMvApvb9Bbg2NA6x1ttpbokaQpGDoAkFwLvB/7X65e1v/bX5K1ySfYlWUiysLS0tBablCSdxThHANcBX6uql9r8S+3UDu37VKufALYNrbe11Vaq/4iqOlBVs1U1OzOz6iksSdIbNE4AfJAfPV9/GDhzJ88c8OBQ/cPtbqBrgFfaqaKHgV1JNrU7hna1miRpCka6CJzkIuCXgP84VL4TuD/JXuAF4MZWf4jBHUCLwKvAzQBVtZzkDuDx1u72MxeEJUmTN9ZtoJM2Oztb3gUkSeNJ8kRVza7WzieBJalTBoAkdcoAkKROjfsksEa0/Zb/va7b//ad71vX7Us6/3kEIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CnfBirpDVvvt96Cb75dTx4BSFKnDABJ6pQBIEmd8hqA1pTnhKV/OkY6AkiyMckDSb6Z5EiSdyW5OMl8kqPte1NrmyR3JVlM8nSSK4e2M9faH00yt16DkiStbtRTQJ8GvlhVbwfeARwBbgEeqaodwCNtHuA6YEf77APuBkhyMXAbcDVwFXDbmdCQJE3eqgGQ5K3ALwL3AFTV96vqZWAPcKg1OwTc0Kb3APfWwKPAxiSXAtcC81W1XFWngXlg95qORpI0slGOAC4DloA/SPJkks8kuQjYXFUnW5sXgc1tegtwbGj94622Ul2SNAWjBMAG4Erg7qq6Avhb/uF0DwBVVUCtRYeS7EuykGRhaWlpLTYpSTqLUe4COg4cr6rH2vwDDALgpSSXVtXJdornVFt+Atg2tP7WVjsBvOd19a+8fmdVdQA4ADA7O7smodIb78SR1tf58n9s1QCoqheTHEvytqp6DtgJPNs+c8Cd7fvBtsph4CNJ7mNwwfeVFhIPA/9l6MLvLuDWtR3Oj1rvH5K/BHUuOF9+GWnyRn0O4KPAZ5NcCDwP3Mzg9NH9SfYCLwA3trYPAdcDi8CrrS1VtZzkDuDx1u72qlpek1FIksY2UgBU1VPA7FkW7TxL2wL2r7Cdg8DBcToojcq/hPviz/vN81UQktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASerUqG8DlfSP8MVk+qfIIwBJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASerUSAGQ5NtJvp7kqSQLrXZxkvkkR9v3plZPkruSLCZ5OsmVQ9uZa+2PJplbnyFJkkYxzhHAv6uqd1bVbJu/BXikqnYAj7R5gOuAHe2zD7gbBoEB3AZcDVwF3HYmNCRJk/dmTgHtAQ616UPADUP1e2vgUWBjkkuBa4H5qlquqtPAPLD7TexfkvQmjBoABfxpkieS7Gu1zVV1sk2/CGxu01uAY0PrHm+1leo/Ism+JAtJFpaWlkbsniRpXKO+DfTfVtWJJP8cmE/yzeGFVVVJai06VFUHgAMAs7Oza7JNSdKPG+kIoKpOtO9TwJ8wOIf/Uju1Q/s+1ZqfALYNrb611VaqS5KmYNUASHJRkn92ZhrYBXwDOAycuZNnDniwTR8GPtzuBroGeKWdKnoY2JVkU7v4u6vVJElTMMopoM3AnyQ50/5/VtUXkzwO3J9kL/ACcGNr/xBwPbAIvArcDFBVy0nuAB5v7W6vquU1G4kkaSyrBkBVPQ+84yz17wA7z1IvYP8K2zoIHBy/m5KkteaTwJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdGjkAklyQ5MkkX2jzlyV5LMlikj9KcmGr/2SbX2zLtw9t49ZWfy7JtWs9GEnS6MY5AvgYcGRo/neAT1XVzwOngb2tvhc43eqfau1IcjlwE/ALwG7g95Nc8Oa6L0l6o0YKgCRbgfcBn2nzAd4LPNCaHAJuaNN72jxt+c7Wfg9wX1V9r6q+BSwCV63FICRJ4xv1COD3gN8E/r7N/xzwclW91uaPA1va9BbgGEBb/kpr/8P6WdaRJE3YqgGQ5JeBU1X1xAT6Q5J9SRaSLCwtLU1il5LUpVGOAN4NvD/Jt4H7GJz6+TSwMcmG1mYrcKJNnwC2AbTlbwW+M1w/yzo/VFUHqmq2qmZnZmbGHpAkaTSrBkBV3VpVW6tqO4OLuF+qql8Fvgx8oDWbAx5s04fbPG35l6qqWv2mdpfQZcAO4KtrNhJJ0lg2rN5kRZ8A7kvy28CTwD2tfg/wh0kWgWUGoUFVPZPkfuBZ4DVgf1X94E3sX5L0JowVAFX1FeArbfp5znIXT1X9HfArK6z/SeCT43ZSkrT2fBJYkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1atUASPJTSb6a5P8keSbJf271y5I8lmQxyR8lubDVf7LNL7bl24e2dWurP5fk2vUalCRpdaMcAXwPeG9VvQN4J7A7yTXA7wCfqqqfB04De1v7vcDpVv9Ua0eSy4GbgF8AdgO/n+SCtRyMJGl0qwZADfzfNvuW9ingvcADrX4IuKFN72nztOU7k6TV76uq71XVt4BF4Ko1GYUkaWwjXQNIckGSp4BTwDzwF8DLVfVaa3Ic2NKmtwDHANryV4CfG66fZR1J0oSNFABV9YOqeiewlcFf7W9frw4l2ZdkIcnC0tLSeu1Gkro31l1AVfUy8GXgXcDGJBvaoq3AiTZ9AtgG0Ja/FfjOcP0s6wzv40BVzVbV7MzMzDjdkySNYZS7gGaSbGzTPw38EnCEQRB8oDWbAx5s04fbPG35l6qqWv2mdpfQZcAO4KtrNRBJ0ng2rN6ES4FD7Y6dnwDur6ovJHkWuC/JbwNPAve09vcAf5hkEVhmcOcPVfVMkvuBZ4HXgP1V9YO1HY4kaVSrBkBVPQ1ccZb685zlLp6q+jvgV1bY1ieBT47fTUnSWvNJYEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1KlVAyDJtiRfTvJskmeSfKzVL04yn+Ro+97U6klyV5LFJE8nuXJoW3Ot/dEkc+s3LEnSakY5AngN+I2quhy4Btif5HLgFuCRqtoBPNLmAa4DdrTPPuBuGAQGcBtwNXAVcNuZ0JAkTd6qAVBVJ6vqa236b4AjwBZgD3CoNTsE3NCm9wD31sCjwMYklwLXAvNVtVxVp4F5YPeajkaSNLKxrgEk2Q5cATwGbK6qk23Ri8DmNr0FODa02vFWW6n++n3sS7KQZGFpaWmc7kmSxjByACT5WeCPgY9X1XeHl1VVAbUWHaqqA1U1W1WzMzMza7FJSdJZjBQASd7C4Jf/Z6vq8638Uju1Q/s+1eongG1Dq29ttZXqkqQpGOUuoAD3AEeq6neHFh0GztzJMwc8OFT/cLsb6BrglXaq6GFgV5JN7eLvrlaTJE3BhhHavBv4EPD1JE+12m8BdwL3J9kLvADc2JY9BFwPLAKvAjcDVNVykjuAx1u726tqeU1GIUka26oBUFV/DmSFxTvP0r6A/Sts6yBwcJwOSpLWh08CS1KnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE4ZAJLUKQNAkjplAEhSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHVq1QBIcjDJqSTfGKpdnGQ+ydH2vanVk+SuJItJnk5y5dA6c6390SRz6zMcSdKoRjkC+O/A7tfVbgEeqaodwCNtHuA6YEf77APuhkFgALcBVwNXAbedCQ1J0nSsGgBV9WfA8uvKe4BDbfoQcMNQ/d4aeBTYmORS4FpgvqqWq+o0MM+Ph4okaYLe6DWAzVV1sk2/CGxu01uAY0PtjrfaSnVJ0pS86YvAVVVArUFfAEiyL8lCkoWlpaW12qwk6XXeaAC81E7t0L5PtfoJYNtQu62ttlL9x1TVgaqararZmZmZN9g9SdJq3mgAHAbO3MkzBzw4VP9wuxvoGuCVdqroYWBXkk3t4u+uVpMkTcmG1Rok+RzwHuCSJMcZ3M1zJ3B/kr3AC8CNrflDwPXAIvAqcDNAVS0nuQN4vLW7vapef2FZkjRBqwZAVX1whUU7z9K2gP0rbOcgcHCs3kmS1o1PAktSpwwASeqUASBJnTIAJKlTBoAkdcoAkKROGQCS1CkDQJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktQpA0CSOmUASFKnDABJ6pQBIEmdMgAkqVMGgCR1ygCQpE5NPACS7E7yXJLFJLdMev+SpIGJBkCSC4D/BlwHXA58MMnlk+yDJGlg0kcAVwGLVfV8VX0fuA/YM+E+SJKYfABsAY4NzR9vNUnShKWqJrez5APA7qr6D23+Q8DVVfWRoTb7gH1t9m3AcxPrIFwC/PUE93eucNx9cdznv39ZVTOrNdowiZ4MOQFsG5rf2mo/VFUHgAOT7NQZSRaqanYa+54mx90Xx60zJn0K6HFgR5LLklwI3AQcnnAfJElM+Aigql5L8hHgYeAC4GBVPTPJPkiSBiZ9Coiqegh4aNL7HdFUTj2dAxx3Xxy3gAlfBJYknTt8FYQkdcoAoN/XUyTZluTLSZ5N8kySj027T5OU5IIkTyb5wrT7MilJNiZ5IMk3kxxJ8q5p92kSkvx6+zf+jSSfS/JT0+7TuaD7AOj89RSvAb9RVZcD1wD7Oxo7wMeAI9PuxIR9GvhiVb0deAcdjD/JFuA/AbNV9a8Y3IBy03R7dW7oPgDo+PUUVXWyqr7Wpv+GwS+DLp7MTrIVeB/wmWn3ZVKSvBX4ReAegKr6flW9PN1eTcwG4KeTbAB+BvirKffnnGAA+HoKAJJsB64AHptuTybm94DfBP5+2h2ZoMuAJeAP2qmvzyS5aNqdWm9VdQL4r8BfAieBV6rqT6fbq3ODASCS/Czwx8DHq+q70+7Pekvyy8Cpqnpi2n2ZsA3AlcDdVXUF8LfAeX/NK8kmBkf1lwH/Argoya9Nt1fnBgNghNdTnM+SvIXBL//PVtXnp92fCXk38P4k32Zwyu+9Sf7HdLs0EceB41V15ijvAQaBcL7798C3qmqpqv4f8Hng30y5T+cEA6Dj11MkCYPzwUeq6nen3Z9Jqapbq2prVW1n8PP+UlWd938RVtWLwLEkb2ulncCzU+zSpPwlcE2Sn2n/5nfSwcXvUUz8SeBzTeevp3g38CHg60mearXfak9r6/z0UeCz7Y+d54Gbp9yfdVdVjyV5APgagzvfnsSnggGfBJakbnkKSJI6ZQBIUqcMAEnqlAEgSZ0yACSpUwaAJHXKAJCkThkAktSp/w867yTNmpgodAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5923 6742 5958 6131 5842 5421 5918 6265 5851 5949]\n" + ] + } + ], + "source": [ + "counts = np.bincount(y_train)\n", + "nums = np.arange(len(counts))\n", + "plt.bar(nums, counts)\n", + "plt.show()\n", + "print(counts)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Apply Keras/TensorFlow neural network \n", + "Use tensorflow to train the model with 60k training records, compile the model, and classify 10k test records with 98% accuracy. \n", + "**Create the model** \n", + "Build the keras model by stacking layers into the network. Our model here has four layers:\n", + "- Flatten reshapes the data into a 1-dimensional array\n", + "- [Dense](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense) tells the model to use output arrays of shape (*, 512) and sets rectified linear [activation function](https://keras.io/activations/). \n", + "- [Dropout](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dropout) applies dropout to the input to help avoid overfitting.\n", + "- The next Dense line condenses the ouput into probabilities for each of the 10 digits.\n", + "\n", + "**Compile the model** \n", + "- [Adam](https://keras.io/optimizers/) is an optimization algorithm that uses stochastic gradient descent to update network weights.\n", + "- Sparse categorical crossentropy is a [loss function](https://keras.io/losses/) that is required to compile the model. The loss function measures how accurate the model is during training. We want to minimize this function to steer the model in the right direction.\n", + "- A metric is a function that is used to judge the performance of your model. We're using accuracy of our predictions as compared to y_test as our metric. \n", + "Lastly, we fit our training data into the model, with several training repetitions (epochs), then evaluate our test data. \n", + "\n", + "Our final result is about 98% accuracy in classifying 10k digits in the test set. You can try tweaking this model with different settings to get a better score. An easy tweak is increasing the epochs, which improves accuracy at the expense of time. Follow the links to the Keras layer docs above and try different options for Dense output, activation functions, optimization algorithms and loss functions." + ] + }, + { + "cell_type": "code", + "execution_count": 215, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/4\n", + "60000/60000 [==============================] - 5s 89us/sample - loss: 0.2581 - acc: 0.9244\n", + "Epoch 2/4\n", + "60000/60000 [==============================] - 5s 83us/sample - loss: 0.1180 - acc: 0.9644\n", + "Epoch 3/4\n", + "60000/60000 [==============================] - 5s 81us/sample - loss: 0.0867 - acc: 0.9736\n", + "Epoch 4/4\n", + "60000/60000 [==============================] - 5s 82us/sample - loss: 0.0697 - acc: 0.9785\n", + "10000/10000 [==============================] - 1s 59us/sample - loss: 0.0662 - acc: 0.9791\n" + ] + }, + { + "data": { + "text/plain": [ + "[0.0662360069771763, 0.9791]" + ] + }, + "execution_count": 215, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import tensorflow as tf\n", + "# Disable some deprecated error messages\n", + "tf.logging.set_verbosity(tf.logging.ERROR)\n", + "\n", + "# Normalize the data to a 0.0 to 1.0 scale for faster processing\n", + "x_train, x_test = x_train / 255.0, x_test / 255.0\n", + "\n", + "model = tf.keras.models.Sequential([\n", + " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", + " tf.keras.layers.Dense(256, activation=tf.nn.relu),\n", + " tf.keras.layers.Dropout(0.25),\n", + " tf.keras.layers.Dense(10, activation=tf.nn.softmax)\n", + "])\n", + " \n", + "model.compile(optimizer='adam',\n", + " loss='sparse_categorical_crossentropy',\n", + " metrics=['accuracy'])\n", + "\n", + "model.fit(x_train, y_train, epochs=4)\n", + "model.evaluate(x_test, y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 6. Generate predictions for test set \n", + "Our predictions are in the form of a list of 10 floats, with probabilities for each value. We can get the prediction by picking the index of the list item with the highest probability. And we can visualize that item to verify our prediction." + ] + }, + { + "cell_type": "code", + "execution_count": 216, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[5.5930052e-09 1.6970777e-15 2.4897268e-10 2.3935108e-14 7.2053798e-09\n", + " 4.4642620e-10 1.0000000e+00 8.1785776e-12 2.4993282e-10 2.6947859e-13]\n", + "6\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 216, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAADhxJREFUeJzt3X+M1PWdx/HX+ygkagEV9taNqNurpInRHJiRnEpODq6NNU2g/gqEXPYS4hpTVAwaCZr44y9z2lYSLzVUN9ALR0vSGkk0d1WC4WoqMrqcYj3PH1lScIUlKJVERdb3/bFfe1vd+cww8535zuz7+Ug2O/N9fz8zbya89jszn+/Mx9xdAOL5q6IbAFAMwg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQRF+IKhvtPLOZs+e7b29va28SyCUoaEhHTlyxGrZt6Hwm9nVkjZImiLpCXd/KLV/b2+vyuVyI3cJIKFUKtW8b91P+81siqR/lfR9SRdJWmFmF9V7ewBaq5HX/AskvePu77n7CUm/lLQ0n7YANFsj4T9X0h/HXT+QbfsLZtZvZmUzK4+MjDRwdwDy1PR3+919o7uX3L3U1dXV7LsDUKNGwn9Q0nnjrs/JtgHoAI2Ef4+kuWb2LTObJmm5pO35tAWg2eqe6nP3k2a2WtJ/amyqb8Dd38itMwBN1dA8v7s/K+nZnHoB0EKc3gsERfiBoAg/EBThB4Ii/EBQhB8IqqWf50dzDA8PV6zde++9ybEDAwPJ+s6dO5P1q666Klk3q+mj5SgAR34gKMIPBEX4gaAIPxAU4QeCIvxAUEz1dYDR0dFkva+vr2Lt+eefT46tNhW3ePHiZP3TTz9N1qdNm5asozgc+YGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gKOb5O8Dg4GCynprLX7JkSXLsunXrkvX77rsvWecju52LIz8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBNXQPL+ZDUn6WNKopJPuXsqjqWg++uijZH3ZsmV13/bcuXOT9WrnAVSro3PlcZLPP7j7kRxuB0AL8bQfCKrR8Luk35rZK2bWn0dDAFqj0af9C939oJn9taTnzOx/3H3X+B2yPwr9knT++ec3eHcA8tLQkd/dD2a/D0t6StKCCfbZ6O4ldy91dXU1cncAclR3+M3sDDOb/uVlSd+TtC+vxgA0VyNP+7slPZV9pPMbkv7d3f8jl64ANF3d4Xf39yT9bY69hPXuu+8m6++//36yPmvWrIq1W265pa6eMPkx1QcERfiBoAg/EBThB4Ii/EBQhB8Iiq/ubgNPPPFEQ+NXr15dsXbJJZc0dNuYvDjyA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQzPO3wLFjx5L1bdu2NXT7K1eubGg8YuLIDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBMc/fAidPnkzWP/zwwxZ1Avw/jvxAUIQfCIrwA0ERfiAowg8ERfiBoAg/EFTVeX4zG5D0A0mH3f3ibNvZkn4lqVfSkKQb3Z3J6goee+yxolsAvqaWI/8mSVd/Zds6STvcfa6kHdl1AB2kavjdfZeko1/ZvFTS5uzyZknLcu4LQJPV+5q/292Hs8sfSOrOqR8ALdLwG37u7pK8Ut3M+s2sbGblkZGRRu8OQE7qDf8hM+uRpOz34Uo7uvtGdy+5e6mrq6vOuwOQt3rDv11SX3a5T9LT+bQDoFWqht/Mtkr6vaTvmNkBM1sl6SFJ3zWztyX9Y3YdQAepOs/v7isqlJbk3Mukdfz48aJb6EiDg4PJ+tq1a5P1yy67rGLtnnvuSY6dMWNGsj4ZcIYfEBThB4Ii/EBQhB8IivADQRF+ICi+ursFxs6Arr/eyVL/tpdffjk59vbbb0/Wd+/enazv3LmzYm379u3JsS+99FKyPnPmzGS9E3DkB4Ii/EBQhB8IivADQRF+ICjCDwRF+IGgmOdvATNrqN7JUnP5l19+eUO33cjj9tZbbyXrCxcuTNarnWNw+umnn3JPrcaRHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCYp6/BWbNmlV0C03zySefJOvVPpOf0tPTk6zfcccdyfr8+fPrHrtv375k/ZlnnknWb7jhhmS9HXDkB4Ii/EBQhB8IivADQRF+ICjCDwRF+IGgqs7zm9mApB9IOuzuF2fb7pd0k6SRbLf17v5ss5rsdDfddFOyvn79+hZ1kr9HHnkkWU997n3evHnJsanv3ZekM888M1lPueCCC5L1avP8e/bsSdYnyzz/JklXT7D9p+4+L/sh+ECHqRp+d98l6WgLegHQQo285l9tZq+Z2YCZnZVbRwBaot7w/0zStyXNkzQs6ceVdjSzfjMrm1l5ZGSk0m4AWqyu8Lv7IXcfdfcvJP1c0oLEvhvdveTupa6urnr7BJCzusJvZuM/bvVDSem3RgG0nVqm+rZKWiRptpkdkHSfpEVmNk+SSxqSdHMTewTQBFXD7+4rJtj8ZBN6mbSmTp2arHd3dyfrhw4dStaHh4cr1i688MLk2GpuvfXWZP3xxx9P1k877bSKtRdeeCE5dubMmcl6kaqdJ9AJOMMPCIrwA0ERfiAowg8ERfiBoAg/EBRf3d0CM2bMSNbvvPPOZP2uu+5K1q+99tqKtR07diTH9vb2Jutbt25N1kdHR5P1TZs2Vaw1OpX32WefJeupf/uLL76YHHvdddcl6/39/cl6J+DIDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBMc/fBlatWpWsb9iwIVk/cOBAxdrixYuTY6vNtR892th3t65cubLusfv370/WH3jggWQ9dY5BNYsWLUrWq31MuxNw5AeCIvxAUIQfCIrwA0ERfiAowg8ERfiBoJjnbwPVlpretWtXsn7llVdWrKW+1ltqfB6/mksvvbRi7YorrkiO3bJlS7J+7NixunqSqp8jcPPNk38pCo78QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxBU1Xl+MztP0i8kdUtySRvdfYOZnS3pV5J6JQ1JutHdP2xeq3FV+279Rx99tGLttttuS46ttvx3owYHByvW9u7d29Btn3POOcn6ww8/XLG2fPny5NgpU6bU1VMnqeXIf1LSWne/SNLfSfqRmV0kaZ2kHe4+V9KO7DqADlE1/O4+7O6vZpc/lvSmpHMlLZW0Odtts6RlzWoSQP5O6TW/mfVKmi9pt6Rud//y3NEPNPayAECHqDn8ZvZNSb+WtMbd/zS+5u6usfcDJhrXb2ZlMyuPjIw01CyA/NQUfjObqrHgb3H332SbD5lZT1bvkXR4orHuvtHdS+5e6urqyqNnADmoGn4zM0lPSnrT3X8yrrRdUl92uU/S0/m3B6BZbOwZe2IHs4WS/kvS65K+yDav19jr/m2Szpe0X2NTfcnPh5ZKJS+Xy432jFPw+eefJ+vbtm1L1qu9VLv77ruT9RMnTlSsTZ8+PTn2wQcfTNavv/76ZH3OnDnJ+mRUKpVULpetln2rzvO7++8kVbqxJafSGID2wRl+QFCEHwiK8ANBEX4gKMIPBEX4gaD46u5JrtpS0o0soS1Ja9asaWg8isORHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCIvxAUIQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQRF+ICjCDwRF+IGgCD8QFOEHgqoafjM7z8x2mtkfzOwNM7s9236/mR00s73ZzzXNbxdAXmpZtOOkpLXu/qqZTZf0ipk9l9V+6u6PNK89AM1SNfzuPixpOLv8sZm9KencZjcGoLlO6TW/mfVKmi9pd7ZptZm9ZmYDZnZWhTH9ZlY2s/LIyEhDzQLIT83hN7NvSvq1pDXu/idJP5P0bUnzNPbM4McTjXP3je5ecvdSV1dXDi0DyENN4TezqRoL/hZ3/40kufshdx919y8k/VzSgua1CSBvtbzbb5KelPSmu/9k3Paecbv9UNK+/NsD0Cy1vNt/paR/kvS6me3Ntq2XtMLM5klySUOSbm5KhwCaopZ3+38nySYoPZt/OwBahTP8gKAIPxAU4QeCIvxAUIQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQZm7t+7OzEYk7R+3abakIy1r4NS0a2/t2pdEb/XKs7cL3L2m78trafi/dudmZXcvFdZAQrv21q59SfRWr6J642k/EBThB4IqOvwbC77/lHbtrV37kuitXoX0VuhrfgDFKfrID6AghYTfzK42s7fM7B0zW1dED5WY2ZCZvZ6tPFwuuJcBMztsZvvGbTvbzJ4zs7ez3xMuk1ZQb22xcnNiZelCH7t2W/G65U/7zWyKpP+V9F1JByTtkbTC3f/Q0kYqMLMhSSV3L3xO2Mz+XtJxSb9w94uzbf8i6ai7P5T94TzL3e9uk97ul3S86JWbswVlesavLC1pmaR/VoGPXaKvG1XA41bEkX+BpHfc/T13PyHpl5KWFtBH23P3XZKOfmXzUkmbs8ubNfafp+Uq9NYW3H3Y3V/NLn8s6cuVpQt97BJ9FaKI8J8r6Y/jrh9Qey357ZJ+a2avmFl/0c1MoDtbNl2SPpDUXWQzE6i6cnMrfWVl6bZ57OpZ8TpvvOH3dQvd/VJJ35f0o+zpbVvysdds7TRdU9PKza0ywcrSf1bkY1fvitd5KyL8ByWdN+76nGxbW3D3g9nvw5KeUvutPnzoy0VSs9+HC+7nz9pp5eaJVpZWGzx27bTidRHh3yNprpl9y8ymSVouaXsBfXyNmZ2RvREjMztD0vfUfqsPb5fUl13uk/R0gb38hXZZubnSytIq+LFruxWv3b3lP5Ku0dg7/u9KuqeIHir09TeS/jv7eaPo3iRt1djTwM819t7IKkmzJO2Q9Lak5yWd3Ua9/Zuk1yW9prGg9RTU20KNPaV/TdLe7Oeaoh+7RF+FPG6c4QcExRt+QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeC+j/gJVnvgAWZwQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "predictions = model.predict(x_test)\n", + "print(predictions[88])\n", + "print(np.argmax(predictions[88]))\n", + "plt.imshow(x_test[88].reshape(28, 28), cmap=cm.Greys)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/2-3_tree.py b/Trees/2-3_tree.py similarity index 96% rename from 2-3_tree.py rename to Trees/2-3_tree.py index f78fae13..9dc3bf5b 100644 --- a/2-3_tree.py +++ b/Trees/2-3_tree.py @@ -1,144 +1,144 @@ -# 2-3 Tree -# balanced tree data structure with up to 2 data items per node - -class Node: - def __init__(self, data, par = None): - #print ("Node __init__: " + str(data)) - self.data = list([data]) - self.parent = par - self.child = list() - - def __str__(self): - if self.parent: - return str(self.parent.data) + ' : ' + str(self.data) - return 'Root : ' + str(self.data) - - def __lt__(self, node): - return self.data[0] < node.data[0] - - def _isLeaf(self): - return len(self.child) == 0 - - # merge new_node sub-tree into self node - def _add(self, new_node): - # print ("Node _add: " + str(new_node.data) + ' to ' + str(self.data)) - for child in new_node.child: - child.parent = self - self.data.extend(new_node.data) - self.data.sort() - self.child.extend(new_node.child) - if len(self.child) > 1: - self.child.sort() - if len(self.data) > 2: - self._split() - - # find correct node to insert new node into tree - def _insert(self, new_node): - # print ('Node _insert: ' + str(new_node.data) + ' into ' + str(self.data)) - - # leaf node - add data to leaf and rebalance tree - if self._isLeaf(): - self._add(new_node) - - # not leaf - find correct child to descend, and do recursive insert - elif new_node.data[0] > self.data[-1]: - self.child[-1]._insert(new_node) - else: - for i in range(0, len(self.data)): - if new_node.data[0] < self.data[i]: - self.child[i]._insert(new_node) - break - - # 3 items in node, split into new sub-tree and add to parent - def _split(self): - # print("Node _split: " + str(self.data)) - left_child = Node(self.data[0], self) - right_child = Node(self.data[2], self) - if self.child: - self.child[0].parent = left_child - self.child[1].parent = left_child - self.child[2].parent = right_child - self.child[3].parent = right_child - left_child.child = [self.child[0], self.child[1]] - right_child.child = [self.child[2], self.child[3]] - - self.child = [left_child] - self.child.append(right_child) - self.data = [self.data[1]] - - # now have new sub-tree, self. need to add self to its parent node - if self.parent: - if self in self.parent.child: - self.parent.child.remove(self) - self.parent._add(self) - else: - left_child.parent = self - right_child.parent = self - - # find an item in the tree; return item, or False if not found - def _find(self, item): - # print ("Find " + str(item)) - if item in self.data: - return item - elif self._isLeaf(): - return False - elif item > self.data[-1]: - return self.child[-1]._find(item) - else: - for i in range(len(self.data)): - if item < self.data[i]: - return self.child[i]._find(item) - - def _remove(self, item): - pass - - # print preorder traversal - def _preorder(self): - print (self) - for child in self.child: - child._preorder() - -class Tree: - def __init__(self): - print("Tree __init__") - self.root = None - - def insert(self, item): - print("Tree insert: " + str(item)) - if self.root is None: - self.root = Node(item) - else: - self.root._insert(Node(item)) - while self.root.parent: - self.root = self.root.parent - return True - - def find(self, item): - return self.root._find(item) - - def remove(self, item): - self.root.remove(item) - - def printTop2Tiers(self): - print ('----Top 2 Tiers----') - print (str(self.root.data)) - for child in self.root.child: - print (str(child.data), end=' ') - print(' ') - - def preorder(self): - print ('----Preorder----') - self.root._preorder() - -tree = Tree() - -lst = [13, 7, 24, 15, 4, 29, 20, 16, 19, 1, 5, 22, 17] -for item in lst: - tree.insert(item) -tree.printTop2Tiers() - -# for i in range (25): - # tree.insert(i) - # tree.printTop2Tiers() -# tree.preorder() -# print (tree.find(16)) +# 2-3 Tree +# balanced tree data structure with up to 2 data items per node + +class Node: + def __init__(self, data, par = None): + #print ("Node __init__: " + str(data)) + self.data = list([data]) + self.parent = par + self.child = list() + + def __str__(self): + if self.parent: + return str(self.parent.data) + ' : ' + str(self.data) + return 'Root : ' + str(self.data) + + def __lt__(self, node): + return self.data[0] < node.data[0] + + def _isLeaf(self): + return len(self.child) == 0 + + # merge new_node sub-tree into self node + def _add(self, new_node): + # print ("Node _add: " + str(new_node.data) + ' to ' + str(self.data)) + for child in new_node.child: + child.parent = self + self.data.extend(new_node.data) + self.data.sort() + self.child.extend(new_node.child) + if len(self.child) > 1: + self.child.sort() + if len(self.data) > 2: + self._split() + + # find correct node to insert new node into tree + def _insert(self, new_node): + # print ('Node _insert: ' + str(new_node.data) + ' into ' + str(self.data)) + + # leaf node - add data to leaf and rebalance tree + if self._isLeaf(): + self._add(new_node) + + # not leaf - find correct child to descend, and do recursive insert + elif new_node.data[0] > self.data[-1]: + self.child[-1]._insert(new_node) + else: + for i in range(0, len(self.data)): + if new_node.data[0] < self.data[i]: + self.child[i]._insert(new_node) + break + + # 3 items in node, split into new sub-tree and add to parent + def _split(self): + # print("Node _split: " + str(self.data)) + left_child = Node(self.data[0], self) + right_child = Node(self.data[2], self) + if self.child: + self.child[0].parent = left_child + self.child[1].parent = left_child + self.child[2].parent = right_child + self.child[3].parent = right_child + left_child.child = [self.child[0], self.child[1]] + right_child.child = [self.child[2], self.child[3]] + + self.child = [left_child] + self.child.append(right_child) + self.data = [self.data[1]] + + # now have new sub-tree, self. need to add self to its parent node + if self.parent: + if self in self.parent.child: + self.parent.child.remove(self) + self.parent._add(self) + else: + left_child.parent = self + right_child.parent = self + + # find an item in the tree; return item, or False if not found + def _find(self, item): + # print ("Find " + str(item)) + if item in self.data: + return item + elif self._isLeaf(): + return False + elif item > self.data[-1]: + return self.child[-1]._find(item) + else: + for i in range(len(self.data)): + if item < self.data[i]: + return self.child[i]._find(item) + + def _remove(self, item): + pass + + # print preorder traversal + def _preorder(self): + print (self) + for child in self.child: + child._preorder() + +class Tree: + def __init__(self): + print("Tree __init__") + self.root = None + + def insert(self, item): + print("Tree insert: " + str(item)) + if self.root is None: + self.root = Node(item) + else: + self.root._insert(Node(item)) + while self.root.parent: + self.root = self.root.parent + return True + + def find(self, item): + return self.root._find(item) + + def remove(self, item): + self.root.remove(item) + + def printTop2Tiers(self): + print ('----Top 2 Tiers----') + print (str(self.root.data)) + for child in self.root.child: + print (str(child.data), end=' ') + print(' ') + + def preorder(self): + print ('----Preorder----') + self.root._preorder() + +tree = Tree() + +lst = [13, 7, 24, 15, 4, 29, 20, 16, 19, 1, 5, 22, 17] +for item in lst: + tree.insert(item) +tree.printTop2Tiers() + +# for i in range (25): + # tree.insert(i) + # tree.printTop2Tiers() +# tree.preorder() +# print (tree.find(16)) diff --git a/Trees/BST_Height_Size.pptx b/Trees/BST_Height_Size.pptx new file mode 100644 index 00000000..572bb448 Binary files /dev/null and b/Trees/BST_Height_Size.pptx differ diff --git a/BinarySearchTree.py b/Trees/BinarySearchTree.py similarity index 95% rename from BinarySearchTree.py rename to Trees/BinarySearchTree.py index 40d6ee8f..c1e34b82 100644 --- a/BinarySearchTree.py +++ b/Trees/BinarySearchTree.py @@ -1,214 +1,214 @@ -# Binary Search Tree in Python - -class Node: - def __init__(self, val): - self.value = val - self.leftChild = None - self.rightChild = None - - def insert(self, data): - if self.value == data: - return False - - elif self.value > data: - if self.leftChild: - return self.leftChild.insert(data) - else: - self.leftChild = Node(data) - return True - - else: - if self.rightChild: - return self.rightChild.insert(data) - else: - self.rightChild = Node(data) - return True - - def find(self, data): - if(self.value == data): - return True - elif self.value > data: - if self.leftChild: - return self.leftChild.find(data) - else: - return False - else: - if self.rightChild: - return self.rightChild.find(data) - else: - return False - - def getHeight(self): - if self.leftChild and self.rightChild: - return 1 + max(self.leftChild.getHeight(), self.rightChild.getHeight()) - elif self.leftChild: - return 1 + self.leftChild.getHeight() - elif self.rightChild: - return 1 + self.rightChild.getHeight() - else: - return 1 - - def preorder(self): - if self: - print (str(self.value)) - if self.leftChild: - self.leftChild.preorder() - if self.rightChild: - self.rightChild.preorder() - - def postorder(self): - if self: - if self.leftChild: - self.leftChild.postorder() - if self.rightChild: - self.rightChild.postorder() - print (str(self.value)) - - def inorder(self): - if self: - if self.leftChild: - self.leftChild.inorder() - print (str(self.value)) - if self.rightChild: - self.rightChild.inorder() - -class Tree: - def __init__(self): - self.root = None - - def insert(self, data): - if self.root: - return self.root.insert(data) - else: - self.root = Node(data) - return True - - def find(self, data): - if self.root: - return self.root.find(data) - else: - return False - - def getHeight(self): - if self.root: - return self.root.getHeight() - else: - return -1 - - def remove(self, data): - # empty tree - if self.root is None: - return False - - # data is in root node - elif self.root.value == data: - if self.root.leftChild is None and self.root.rightChild is None: - self.root = None - elif self.root.leftChild and self.root.rightChild is None: - self.root = self.root.leftChild - elif self.root.leftChild is None and self.root.rightChild: - self.root = self.root.rightChild - elif self.root.leftChild and self.root.rightChild: - delNodeParent = self.root - delNode = self.root.rightChild - while delNode.leftChild: - delNodeParent = delNode - delNode = delNode.leftChild - - self.root.value = delNode.value - if delNode.rightChild: - if delNodeParent.value > delNode.value: - delNodeParent.leftChild = delNode.rightChild - elif delNodeParent.value < delNode.value: - delNodeParent.rightChild = delNode.rightChild - else: - if delNode.value < delNodeParent.value: - delNodeParent.leftChild = None - else: - delNodeParent.rightChild = None - - return True - - parent = None - node = self.root - - # find node to remove - while node and node.value != data: - parent = node - if data < node.value: - node = node.leftChild - elif data > node.value: - node = node.rightChild - - # case 1: data not found - if node is None or node.value != data: - return False - - # case 2: remove-node has no children - elif node.leftChild is None and node.rightChild is None: - if data < parent.value: - parent.leftChild = None - else: - parent.rightChild = None - return True - - # case 3: remove-node has left child only - elif node.leftChild and node.rightChild is None: - if data < parent.value: - parent.leftChild = node.leftChild - else: - parent.rightChild = node.leftChild - return True - - # case 4: remove-node has right child only - elif node.leftChild is None and node.rightChild: - if data < parent.value: - parent.leftChild = node.rightChild - else: - parent.rightChild = node.rightChild - return True - - # case 5: remove-node has left and right children - else: - delNodeParent = node - delNode = node.rightChild - while delNode.leftChild: - delNodeParent = delNode - delNode = delNode.leftChild - - node.value = delNode.value - if delNode.rightChild: - if delNodeParent.value > delNode.value: - delNodeParent.leftChild = delNode.rightChild - elif delNodeParent.value < delNode.value: - delNodeParent.rightChild = delNode.rightChild - else: - if delNode.value < delNodeParent.value: - delNodeParent.leftChild = None - else: - delNodeParent.rightChild = None - - def preorder(self): - if self.root is not None: - print("PreOrder") - self.root.preorder() - - def postorder(self): - if self.root is not None: - print("PostOrder") - self.root.postorder() - - def inorder(self): - if self.root is not None: - print("InOrder") - self.root.inorder() - -bst = Tree() -print(bst.insert(10)) -#print(bst.insert(5)) -bst.preorder() -print(bst.getHeight()) -#bst.postorder() -#bst.inorder() -print(bst.remove(10)) +# Binary Search Tree in Python + +class Node: + def __init__(self, val): + self.value = val + self.leftChild = None + self.rightChild = None + + def insert(self, data): + if self.value == data: + return False + + elif self.value > data: + if self.leftChild: + return self.leftChild.insert(data) + else: + self.leftChild = Node(data) + return True + + else: + if self.rightChild: + return self.rightChild.insert(data) + else: + self.rightChild = Node(data) + return True + + def find(self, data): + if(self.value == data): + return True + elif self.value > data: + if self.leftChild: + return self.leftChild.find(data) + else: + return False + else: + if self.rightChild: + return self.rightChild.find(data) + else: + return False + + def getHeight(self): + if self.leftChild and self.rightChild: + return 1 + max(self.leftChild.getHeight(), self.rightChild.getHeight()) + elif self.leftChild: + return 1 + self.leftChild.getHeight() + elif self.rightChild: + return 1 + self.rightChild.getHeight() + else: + return 1 + + def preorder(self): + if self: + print (str(self.value)) + if self.leftChild: + self.leftChild.preorder() + if self.rightChild: + self.rightChild.preorder() + + def postorder(self): + if self: + if self.leftChild: + self.leftChild.postorder() + if self.rightChild: + self.rightChild.postorder() + print (str(self.value)) + + def inorder(self): + if self: + if self.leftChild: + self.leftChild.inorder() + print (str(self.value)) + if self.rightChild: + self.rightChild.inorder() + +class Tree: + def __init__(self): + self.root = None + + def insert(self, data): + if self.root: + return self.root.insert(data) + else: + self.root = Node(data) + return True + + def find(self, data): + if self.root: + return self.root.find(data) + else: + return False + + def getHeight(self): + if self.root: + return self.root.getHeight() + else: + return -1 + + def remove(self, data): + # empty tree + if self.root is None: + return False + + # data is in root node + elif self.root.value == data: + if self.root.leftChild is None and self.root.rightChild is None: + self.root = None + elif self.root.leftChild and self.root.rightChild is None: + self.root = self.root.leftChild + elif self.root.leftChild is None and self.root.rightChild: + self.root = self.root.rightChild + elif self.root.leftChild and self.root.rightChild: + delNodeParent = self.root + delNode = self.root.rightChild + while delNode.leftChild: + delNodeParent = delNode + delNode = delNode.leftChild + + self.root.value = delNode.value + if delNode.rightChild: + if delNodeParent.value > delNode.value: + delNodeParent.leftChild = delNode.rightChild + elif delNodeParent.value < delNode.value: + delNodeParent.rightChild = delNode.rightChild + else: + if delNode.value < delNodeParent.value: + delNodeParent.leftChild = None + else: + delNodeParent.rightChild = None + + return True + + parent = None + node = self.root + + # find node to remove + while node and node.value != data: + parent = node + if data < node.value: + node = node.leftChild + elif data > node.value: + node = node.rightChild + + # case 1: data not found + if node is None or node.value != data: + return False + + # case 2: remove-node has no children + elif node.leftChild is None and node.rightChild is None: + if data < parent.value: + parent.leftChild = None + else: + parent.rightChild = None + return True + + # case 3: remove-node has left child only + elif node.leftChild and node.rightChild is None: + if data < parent.value: + parent.leftChild = node.leftChild + else: + parent.rightChild = node.leftChild + return True + + # case 4: remove-node has right child only + elif node.leftChild is None and node.rightChild: + if data < parent.value: + parent.leftChild = node.rightChild + else: + parent.rightChild = node.rightChild + return True + + # case 5: remove-node has left and right children + else: + delNodeParent = node + delNode = node.rightChild + while delNode.leftChild: + delNodeParent = delNode + delNode = delNode.leftChild + + node.value = delNode.value + if delNode.rightChild: + if delNodeParent.value > delNode.value: + delNodeParent.leftChild = delNode.rightChild + elif delNodeParent.value < delNode.value: + delNodeParent.rightChild = delNode.rightChild + else: + if delNode.value < delNodeParent.value: + delNodeParent.leftChild = None + else: + delNodeParent.rightChild = None + + def preorder(self): + if self.root is not None: + print("PreOrder") + self.root.preorder() + + def postorder(self): + if self.root is not None: + print("PostOrder") + self.root.postorder() + + def inorder(self): + if self.root is not None: + print("InOrder") + self.root.inorder() + +bst = Tree() +print(bst.insert(10)) +#print(bst.insert(5)) +bst.preorder() +print(bst.getHeight()) +#bst.postorder() +#bst.inorder() +print(bst.remove(10)) bst.preorder() \ No newline at end of file diff --git a/Trees/bst.py b/Trees/bst.py new file mode 100644 index 00000000..f45b2f4f --- /dev/null +++ b/Trees/bst.py @@ -0,0 +1,236 @@ +# Binary Search Tree in Python + +class Node: + def __init__(self, val): + self.value = val + self.leftChild = None + self.rightChild = None + + def insert(self, data): + if self.value == data: + return False + + elif self.value > data: + if self.leftChild: + return self.leftChild.insert(data) + else: + self.leftChild = Node(data) + return True + + else: + if self.rightChild: + return self.rightChild.insert(data) + else: + self.rightChild = Node(data) + return True + + def find(self, data): + if(self.value == data): + return True + elif self.value > data: + if self.leftChild: + return self.leftChild.find(data) + else: + return False + else: + if self.rightChild: + return self.rightChild.find(data) + else: + return False + + def getSize(self): + if self.leftChild and self.rightChild: + return 1 + self.leftChild.getSize() + self.rightChild.getSize() + elif self.leftChild: + return 1 + self.leftChild.getSize() + elif self.rightChild: + return 1 + self.rightChild.getSize() + else: + return 1 + + def getHeight(self): + if self.leftChild and self.rightChild: + return 1 + max(self.leftChild.getHeight(), self.rightChild.getHeight()) + elif self.leftChild: + return 1 + self.leftChild.getHeight() + elif self.rightChild: + return 1 + self.rightChild.getHeight() + else: + return 1 + + def preorder(self): + if self: + print (str(self.value)) + if self.leftChild: + self.leftChild.preorder() + if self.rightChild: + self.rightChild.preorder() + + def postorder(self): + if self: + if self.leftChild: + self.leftChild.postorder() + if self.rightChild: + self.rightChild.postorder() + print (str(self.value)) + + def inorder(self): + if self: + if self.leftChild: + self.leftChild.inorder() + print (str(self.value)) + if self.rightChild: + self.rightChild.inorder() + +class Tree: + def __init__(self): + self.root = None + + def insert(self, data): + if self.root: + return self.root.insert(data) + else: + self.root = Node(data) + return True + + def find(self, data): + if self.root: + return self.root.find(data) + else: + return False + + def getHeight(self): + if self.root: + return self.root.getHeight() + else: + return 0 + + def getSize(self): + if self.root: + return self.root.getSize() + else: + return 0 + + def remove(self, data): + # empty tree + if self.root is None: + return False + + # data is in root node + elif self.root.value == data: + if self.root.leftChild is None and self.root.rightChild is None: + self.root = None + elif self.root.leftChild and self.root.rightChild is None: + self.root = self.root.leftChild + elif self.root.leftChild is None and self.root.rightChild: + self.root = self.root.rightChild + elif self.root.leftChild and self.root.rightChild: + delNodeParent = self.root + delNode = self.root.rightChild + while delNode.leftChild: + delNodeParent = delNode + delNode = delNode.leftChild + + if delNode.rightChild: + if delNodeParent.value > delNode.value: + delNodeParent.leftChild = delNode.rightChild + elif delNodeParent.value < delNode.value: + delNodeParent.rightChild = delNode.rightChild + else: + if delNode.value < delNodeParent.value: + delNodeParent.leftChild = None + else: + delNodeParent.rightChild = None + self.root.value = delNode.value + + return True + + parent = None + node = self.root + + # find node to remove + while node and node.value != data: + parent = node + if data < node.value: + node = node.leftChild + elif data > node.value: + node = node.rightChild + + # case 1: data not found + if node is None or node.value != data: + return False + + # case 2: remove-node has no children + elif node.leftChild is None and node.rightChild is None: + if data < parent.value: + parent.leftChild = None + else: + parent.rightChild = None + return True + + # case 3: remove-node has left child only + elif node.leftChild and node.rightChild is None: + if data < parent.value: + parent.leftChild = node.leftChild + else: + parent.rightChild = node.leftChild + return True + + # case 4: remove-node has right child only + elif node.leftChild is None and node.rightChild: + if data < parent.value: + parent.leftChild = node.rightChild + else: + parent.rightChild = node.rightChild + return True + + # case 5: remove-node has left and right children + else: + delNodeParent = node + delNode = node.rightChild + while delNode.leftChild: + delNodeParent = delNode + delNode = delNode.leftChild + + node.value = delNode.value + if delNode.rightChild: + if delNodeParent.value > delNode.value: + delNodeParent.leftChild = delNode.rightChild + elif delNodeParent.value < delNode.value: + delNodeParent.rightChild = delNode.rightChild + else: + if delNode.value < delNodeParent.value: + delNodeParent.leftChild = None + else: + delNodeParent.rightChild = None + + def preorder(self): + if self.root is not None: + print("PreOrder") + self.root.preorder() + + def postorder(self): + if self.root is not None: + print("PostOrder") + self.root.postorder() + + def inorder(self): + if self.root is not None: + print("InOrder") + self.root.inorder() + +def main(): + bst = Tree() + print(bst.insert(10)) + print(bst.insert(5)) + bst.insert(2) + bst.insert(7) + bst.preorder() + print('Height = ', bst.getHeight()) + print('Size = ', bst.getSize()) + #bst.postorder() + #bst.inorder() + print(bst.remove(10)) + bst.preorder() + +main() diff --git a/Unpacking Variables.ipynb b/Unpacking Variables.ipynb new file mode 100644 index 00000000..3b1f8d31 --- /dev/null +++ b/Unpacking Variables.ipynb @@ -0,0 +1,360 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Packing & Unpacking Variables" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Assign variables\n", + "In Python you can assign multiple variables at a time using commas." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "2\n", + "3\n" + ] + } + ], + "source": [ + "a, b, c = 1, 2, 3\n", + "print(a)\n", + "print(b)\n", + "print(c)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Swap a pair of Variables in Python\n", + "x,y = y,x" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "x=12, y=5\n" + ] + } + ], + "source": [ + "x = 5; y = 12\n", + "x, y = y, x\n", + "print('x=' + str(x) + ', y=' + str(y))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Swap a trio of Variables in Python\n", + "Yes, this trick works for 3 variables too." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "88 99 77\n" + ] + } + ], + "source": [ + "x, y, z = 77, 88, 99\n", + "z, x, y = x, y, z\n", + "print(x, y, z)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Split String into multiple variables\n", + "But be careful because the number of variables must match the number of substrings from the split." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4\n", + "5\n", + "6\n" + ] + } + ], + "source": [ + "a, b, c = '4 5 6'.split()\n", + "print(a)\n", + "print(b)\n", + "print(c)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Splitting a List into variables is magically easy" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "8\n", + "9\n", + "10\n" + ] + } + ], + "source": [ + "my_list = [8, 9, 10]\n", + "a, b, c = my_list\n", + "print(a)\n", + "print(b)\n", + "print(c)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Split Tuple into variables" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "25 26 27\n" + ] + } + ], + "source": [ + "tup = (25,26,27)\n", + "x, y, z = tup\n", + "print(x, y, z)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### This gives you a Tuple, not a List" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(8, 9, 10)\n", + "\n" + ] + } + ], + "source": [ + "var = a, b, c\n", + "print(var)\n", + "print(type(var))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### *args for Functions\n", + "Used for passing a non-keyworded, variable-length argument list to a function. \n", + "Received as a Tuple." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "('Forest', 'Hill', 'High')\n", + "\n" + ] + } + ], + "source": [ + "def pack_it(*args):\n", + " print(args)\n", + " print(type(args))\n", + " \n", + "x = 'Forest'; y = 'Hill'; z = 'High'\n", + "pack_it(x, y, z)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This unpacks the List before sending it, so it can be received by the function as separate variables." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cullen\n", + "McDonough\n" + ] + } + ], + "source": [ + "def unpack_it(x, y):\n", + " print(x)\n", + " print(y)\n", + " \n", + "args = ['Cullen', 'McDonough']\n", + "unpack_it(*args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **kwargs for Functions\n", + "Used for passing keyworded, variable-length argument dictionary to functions. \n", + "This works, but it's kinda annoying because some normal Python dictionaries fail. \n", + "func (1:'Edsel', 2:'Betamax') does not work." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'a': 'Edsel', 'b': 'Betamax', 'c': 'mGaetz'}\n", + "Edsel\n", + "\n" + ] + } + ], + "source": [ + "def func(**losers):\n", + " print(losers)\n", + " print(losers['a'])\n", + " print(type(losers))\n", + " \n", + "func(a='Edsel', b='Betamax', c='mGaetz')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This works, but it's kinda annoying because you have to use strings for the keys, so some normal Python dictionaries will give you an error. {1:'Edsel', 2:'Betamax'} fails." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Edsel\n" + ] + } + ], + "source": [ + "def func(a, b, c):\n", + " print(a)\n", + "\n", + "losers = {'a':'Edsel', 'b':'Betamax', 'c':'mGaetz'}\n", + "func(**losers)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Web Data Mining/Python BeautifulSoup Web Scraping Tutorial.ipynb b/Web Data Mining/Python BeautifulSoup Web Scraping Tutorial.ipynb new file mode 100644 index 00000000..f7d55aa9 --- /dev/null +++ b/Web Data Mining/Python BeautifulSoup Web Scraping Tutorial.ipynb @@ -0,0 +1,514 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python BeautifulSoup Web Scraping Tutorial\n", + "Learn to scrape data from the web using the Python BeautifulSoup bs4 library. \n", + "BeautifulSoup makes it easy to parse useful data out of an HTML page. \n", + "First install the bs4 library on your system by running at the command line, \n", + "*pip install beautifulsoup4* or *easy_install beautifulsoup4* (or bs4) \n", + "See [BeautifulSoup official documentation](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) for the complete set of functions.\n", + "\n", + "### Import requests so we can fetch the html content of the webpage\n", + "You can see our example page has about 28k characters." + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "28556\n" + ] + } + ], + "source": [ + "import requests\n", + "r = requests.get('https://www.usclimatedata.com/climate/united-states/us')\n", + "print(len(r.text))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Import BeautifulSoup, and convert your HTML into a bs4 object\n", + "Now we can access specific HTML tags on the page using dot, just like a JSON object." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Climate United States - normals and averages\n", + "Climate United States - normals and averages\n" + ] + } + ], + "source": [ + "from bs4 import BeautifulSoup\n", + "soup = BeautifulSoup(r.text)\n", + "print(soup.title)\n", + "print(soup.title.string)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Drill into the bs4 object to access page contents\n", + "soup.p will give you the contents of the first paragraph tag on the page. \n", + "soup.a gives you anchors / links on the page. \n", + "Get contents of an attribute inside an HTML tag using square brackets and perentheses. \n", + "Use .parent to get the parent object, and .next_sibling to get the next peer object. \n", + "**Use your browser's *inspect element* feature to find the tag for the data you want.**" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "

You are here: United States

\n", + "You are here: United States\n", + "\n", + "\"US\n", + "\n", + "US Climate Data on Facebook\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(soup.p)\n", + "print(soup.p.text)\n", + "print(soup.a)\n", + "print(soup.a['title'])\n", + "print()\n", + "print(soup.p.parent)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prettify() is handy for formatted printing \n", + "but note this works only on bs4 objects, not on strings, dicts or lists. For those you need to import pprint." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "
\n", + "\n" + ] + } + ], + "source": [ + "print(soup.p.parent.prettify())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### We need all the state links on this page\n", + "First we find_all anchor tags, and print out the href attribute, which is the actual link url. \n", + "But we see the result includes some links we don't want, so we need to filter those out." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://www.facebook.com/yourweatherservice/n", + "https://twitter.com/usclimatedata/n", + "http://www.usclimatedata.com/n", + "/climate/united-states/us\n", + "#summary\n", + "/climate/united-states/us\n", + "#\n", + "#\n", + "/climate/alabama/united-states/3170\n", + "/climate/kentucky/united-states/3187\n", + "/climate/north-dakota/united-states/3204\n", + "/climate/alaska/united-states/3171\n", + "/climate/louisiana/united-states/3188\n", + "/climate/ohio/united-states/3205\n", + "/climate/arizona/united-states/3172\n", + "/climate/maine/united-states/3189\n", + "/climate/oklahoma/united-states/3206\n", + "/climate/arkansas/united-states/3173\n", + "/climate/maryland/united-states/1872\n", + "/climate/oregon/united-states/3207\n", + "/climate/california/united-states/3174\n", + "/climate/massachusetts/united-states/3191\n", + "/climate/pennsylvania/united-states/3208\n", + "/climate/colorado/united-states/3175\n", + "/climate/michigan/united-states/3192\n", + "/climate/rhode-island/united-states/3209\n", + "/climate/connecticut/united-states/3176\n", + "/climate/minnesota/united-states/3193\n", + "/climate/south-carolina/united-states/3210\n", + "/climate/delaware/united-states/3177\n", + "/climate/mississippi/united-states/3194\n", + "/climate/south-dakota/united-states/3211\n", + "/climate/district-of-columbia/united-states/3178\n", + "/climate/missouri/united-states/3195\n", + "/climate/tennessee/united-states/3212\n", + "/climate/florida/united-states/3179\n", + "/climate/montana/united-states/919\n", + "/climate/texas/united-states/3213\n", + "/climate/georgia/united-states/3180\n", + "/climate/nebraska/united-states/3197\n", + "/climate/utah/united-states/3214\n", + "/climate/hawaii/united-states/3181\n", + "/climate/nevada/united-states/3198\n", + "/climate/vermont/united-states/3215\n", + "/climate/idaho/united-states/3182\n", + "/climate/new-hampshire/united-states/3199\n", + "/climate/virginia/united-states/3216\n", + "/climate/illinois/united-states/3183\n", + "/climate/new-jersey/united-states/3200\n", + "/climate/washington/united-states/3217\n", + "/climate/indiana/united-states/3184\n", + "/climate/new-mexico/united-states/3201\n", + "/climate/west-virginia/united-states/3218\n", + "/climate/iowa/united-states/3185\n", + "/climate/new-york/united-states/3202\n", + "/climate/wisconsin/united-states/3219\n", + "/climate/kansas/united-states/3186\n", + "/climate/north-carolina/united-states/3203\n", + "/climate/wyoming/united-states/3220\n", + "https://www.yourweatherservice.com/n", + "https://www.climatedata.eu/n", + "https://www.weernetwerk.nl/n", + "/about-us.php\n", + "/disclaimer.php\n", + "/cookies.php\n" + ] + } + ], + "source": [ + "for link in soup.find_all('a'):\n", + " print(link.get('href'))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Filter urls using string functions\n", + "We just add an *if* to check conditions, then add the good ones to a list. \n", + "In the end we get 51 state links, including Washington DC." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "51\n" + ] + } + ], + "source": [ + "base_url = 'https://www.usclimatedata.com/'\n", + "state_links = []\n", + "for link in soup.find_all('a'):\n", + " url = link.get('href')\n", + " if url and '/climate/' in url and '/climate/united-states/us' not in url:\n", + " state_links.append(url)\n", + "print(len(state_links))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test getting the data for one state\n", + "then print the title for that page." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Climate Ohio - temperature, rainfall and average\n" + ] + } + ], + "source": [ + "r = requests.get(base_url + state_links[5])\n", + "soup = BeautifulSoup(r.text)\n", + "print(soup.title.string)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The data we need is in *tr* tags\n", + "But look, there are 58 tr tags on the page, and we only want 2 of them - the *Average high* rows." + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "58\n" + ] + } + ], + "source": [ + "rows = soup.find_all('tr')\n", + "print(len(rows))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Filter rows, and add temp data to a list\n", + "We use a list comprehension to filter the rows. \n", + "Then we have only 2 rows left. \n", + "We iterate through those 2 rows, and add all the temps from data cells (td) into a list." + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2\n", + "['36', '40', '52', '63', '73', '82', '85', '84', '77', '65', '52', '41']\n" + ] + } + ], + "source": [ + "rows = [row for row in rows if 'Average high' in str(row)]\n", + "print(len(rows))\n", + "\n", + "high_temps = []\n", + "for row in rows:\n", + " tds = row.find_all('td')\n", + " for i in range(1,7):\n", + " high_temps.append(tds[i].text)\n", + "print(high_temps)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get the name of the State\n", + "First attempt we just split the title string into a list, and grab the second word. \n", + "But that doesn't work for 2-word states like New York and North Carolina. \n", + "So instead we slice the string from first blank to the hyphen. " + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Wyoming\n", + "Wyoming\n" + ] + } + ], + "source": [ + "state = soup.title.string.split()[1]\n", + "print(state)\n", + "s = soup.title.string\n", + "state = s[s.find(' '):s.find('-')].strip()\n", + "print(state)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Add state name and temp list to the data dictionary\n", + "For a single state, this is what our scraped data looks like. \n", + "In this example we only got monthly highs by state, but you could drill into cities, and could get lows and precipitation. " + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'Ohio': ['36', '40', '52', '63', '73', '82', '85', '84', '77', '65', '52', '41']}\n" + ] + } + ], + "source": [ + "data = {}\n", + "data[state] = high_temps\n", + "print(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Put it all together and iterate 51 states\n", + "We loop through our 51-state list, and get high temp data for each state, and add it to the data dict. \n", + "This combines all our work above into a single for loop. \n", + "The result is a dict with 51 states and a list of monthly highs for each." + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'Alabama': ['57', '62', '70', '77', '84', '90', '92', '92', '87', '78', '69', '60'], 'Kentucky': ['40', '45', '55', '66', '75', '83', '87', '86', '79', '68', '55', '44'], 'North Dakota': ['23', '28', '40', '57', '68', '77', '85', '83', '72', '58', '40', '26'], 'Alaska': ['23', '27', '34', '44', '56', '63', '65', '64', '55', '40', '28', '25'], 'Louisiana': ['62', '65', '72', '78', '85', '89', '91', '91', '87', '80', '72', '64'], 'Ohio': ['36', '40', '52', '63', '73', '82', '85', '84', '77', '65', '52', '41'], 'Arizona': ['67', '71', '77', '85', '95', '104', '106', '104', '100', '89', '76', '66'], 'Maine': ['28', '32', '40', '53', '65', '74', '79', '78', '70', '57', '45', '33'], 'Oklahoma': ['50', '55', '63', '72', '80', '88', '94', '93', '85', '73', '62', '51'], 'Arkansas': ['51', '55', '64', '73', '81', '89', '92', '93', '86', '75', '63', '52'], 'Maryland': ['42', '46', '54', '65', '75', '85', '89', '87', '80', '68', '58', '46'], 'Oregon': ['48', '52', '56', '61', '68', '74', '82', '82', '77', '64', '53', '46'], 'California': ['54', '60', '65', '71', '80', '87', '92', '91', '87', '78', '64', '54'], 'Massachusetts': ['36', '39', '45', '56', '66', '76', '81', '80', '72', '61', '51', '41'], 'Pennsylvania': ['40', '44', '53', '64', '74', '83', '87', '85', '78', '67', '56', '45'], 'Colorado': ['45', '46', '54', '61', '72', '82', '90', '88', '79', '66', '52', '45'], 'Michigan': ['30', '33', '44', '58', '69', '78', '82', '80', '73', '60', '47', '34'], 'Rhode Island': ['37', '40', '48', '59', '68', '78', '83', '81', '74', '63', '53', '42'], 'Connecticut': ['37', '40', '47', '58', '68', '77', '82', '81', '74', '63', '53', '42'], 'Minnesota': ['26', '31', '43', '58', '71', '80', '85', '82', '73', '59', '42', '29'], 'South Carolina': ['56', '60', '68', '76', '84', '90', '93', '91', '85', '76', '67', '58'], 'Delaware': ['43', '47', '55', '66', '75', '83', '87', '85', '79', '69', '58', '47'], 'Mississippi': ['56', '60', '69', '76', '83', '89', '92', '92', '87', '77', '67', '58'], 'South Dakota': ['22', '27', '39', '57', '69', '78', '84', '82', '72', '58', '39', '25'], 'District of Columbia': ['42', '44', '53', '64', '75', '83', '87', '84', '78', '67', '55', '45'], 'Missouri': ['40', '45', '56', '67', '75', '83', '88', '88', '80', '69', '56', '43'], 'Tennessee': ['50', '55', '64', '73', '81', '89', '92', '91', '85', '74', '63', '52'], 'Florida': ['64', '67', '74', '80', '87', '91', '92', '92', '88', '81', '73', '65'], 'Montana': ['33', '39', '48', '58', '67', '76', '86', '85', '73', '59', '43', '32'], 'Texas': ['62', '65', '72', '80', '87', '92', '96', '97', '91', '82', '71', '63'], 'Georgia': ['52', '57', '64', '72', '81', '86', '90', '88', '82', '73', '64', '54'], 'Nebraska': ['32', '37', '50', '63', '73', '84', '88', '86', '77', '64', '48', '36'], 'Utah': ['38', '44', '53', '61', '71', '82', '90', '89', '78', '65', '50', '40'], 'Hawaii': ['80', '80', '81', '83', '85', '87', '88', '89', '89', '87', '84', '81'], 'Nevada': ['45', '50', '57', '63', '71', '81', '90', '88', '80', '68', '54', '45'], 'Vermont': ['27', '31', '40', '55', '67', '76', '81', '79', '70', '57', '46', '33'], 'Idaho': ['38', '45', '55', '62', '72', '81', '91', '90', '79', '65', '48', '38'], 'New Hampshire': ['31', '35', '44', '57', '69', '77', '82', '81', '73', '60', '48', '36'], 'Virginia': ['47', '51', '60', '70', '78', '86', '90', '88', '81', '71', '61', '51'], 'Illinois': ['32', '36', '46', '59', '70', '81', '84', '82', '75', '63', '48', '36'], 'New Jersey': ['39', '42', '51', '62', '72', '82', '86', '84', '77', '65', '55', '44'], 'Washington': ['47', '50', '54', '58', '65', '70', '76', '76', '71', '60', '51', '46'], 'Indiana': ['35', '40', '51', '63', '73', '82', '85', '83', '77', '65', '52', '39'], 'New Mexico': ['44', '48', '56', '65', '74', '83', '86', '83', '78', '67', '53', '43'], 'West Virginia': ['42', '47', '56', '68', '75', '82', '85', '84', '78', '68', '57', '46'], 'Iowa': ['31', '36', '49', '62', '72', '82', '86', '84', '76', '63', '48', '34'], 'New York': ['39', '42', '50', '60', '71', '79', '85', '83', '76', '65', '54', '44'], 'Wisconsin': ['29', '33', '42', '54', '65', '75', '80', '78', '71', '59', '46', '33'], 'Kansas': ['40', '45', '56', '67', '76', '85', '89', '89', '80', '68', '55', '42'], 'North Carolina': ['51', '55', '63', '72', '79', '86', '89', '87', '81', '72', '62', '53'], 'Wyoming': ['40', '40', '47', '55', '65', '75', '83', '81', '72', '59', '47', '38']}\n" + ] + } + ], + "source": [ + "data = {}\n", + "for state_link in state_links:\n", + " url = base_url + state_link\n", + " r = requests.get(base_url + state_link)\n", + " soup = BeautifulSoup(r.text)\n", + " rows = soup.find_all('tr')\n", + " rows = [row for row in rows if 'Average high' in str(row)]\n", + " high_temps = []\n", + " for row in rows:\n", + " tds = row.find_all('td')\n", + " for i in range(1,7):\n", + " high_temps.append(tds[i].text)\n", + " s = soup.title.string\n", + " state = s[s.find(' '):s.find('-')].strip()\n", + " data[state] = high_temps\n", + "print(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Save to CSV file\n", + "Lastly, we might want to write all this data to a CSV file. \n", + "Here's a quick easy way to do that." + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [], + "source": [ + "import csv\n", + "\n", + "with open('high_temps.csv','w') as f:\n", + " w = csv.writer(f)\n", + " w.writerows(data.items())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Web Data Mining/Python Requests.ipynb b/Web Data Mining/Python Requests.ipynb new file mode 100644 index 00000000..66f27fed --- /dev/null +++ b/Web Data Mining/Python Requests.ipynb @@ -0,0 +1,419 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python Requests\n", + "(c) 2019, Joe James.\n", + "MIT License.\n", + "\n", + "Tutorial on using the [Requests](http://docs.python-requests.org/en/master/user/quickstart/) library to access HTTP requests, GET, POST, PUT, DELETE, HEAD, OPTIONS. \n", + "This notebook also covers how to use the Python [JSON](https://docs.python.org/3/library/json.html) library to parse values out of a GET response. \n", + "If you don't have the requests library installed you can run 'pip install requests' or some equivalent command for your system in the console window. " + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "import json" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "r = requests.get('https://api.github.com/events')\n", + "r = requests.post('https://httpbin.org/post', data = {'name':'Joe'})\n", + "r = requests.put('https://httpbin.org/put', data = {'name':'Joe'})\n", + "r = requests.delete('https://httpbin.org/delete')\n", + "r = requests.head('https://httpbin.org/get')\n", + "r = requests.options('https://httpbin.org/get')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### GET Requests - Passing Parameters in URLs\n", + "A URL that returns an HTTP response in JSON format is called an API endpoint. \n", + "Here's an example, https://httpbin.org/get \n", + "\n", + "With GET requests we can add parameters onto the URL to retrieve specific data. \n", + "We define the params as a dictionary, and add params=payload to the Request. \n", + "The Requests library builds the whole URL for us." + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://httpbin.org/get?key1=value1&key2=value2\n" + ] + } + ], + "source": [ + "payload = {'key1': 'value1', 'key2': 'value2'}\n", + "r = requests.get('https://httpbin.org/get', params=payload)\n", + "print(r.url)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Passing a List as a parameter** \n", + "Still use key:value pairs, with the list as the value. \n", + "You can see here all the different attributes included in an HTTP Request response." + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "URL: https://httpbin.org/get?key1=value1&key2=value2&key2=value3\n", + "ENCODING: None\n", + "STATUS_CODE: 200\n", + "HEADERS: {'Access-Control-Allow-Credentials': 'true', 'Access-Control-Allow-Origin': '*', 'Content-Encoding': 'gzip', 'Content-Type': 'application/json', 'Date': 'Tue, 26 Feb 2019 18:13:35 GMT', 'Server': 'nginx', 'Content-Length': '229', 'Connection': 'keep-alive'}\n", + "TEXT: {\n", + " \"args\": {\n", + " \"key1\": \"value1\", \n", + " \"key2\": [\n", + " \"value2\", \n", + " \"value3\"\n", + " ]\n", + " }, \n", + " \"headers\": {\n", + " \"Accept\": \"*/*\", \n", + " \"Accept-Encoding\": \"gzip, deflate\", \n", + " \"Host\": \"httpbin.org\", \n", + " \"User-Agent\": \"python-requests/2.21.0\"\n", + " }, \n", + " \"origin\": \"99.99.39.153, 99.99.39.153\", \n", + " \"url\": \"https://httpbin.org/get?key1=value1&key2=value2&key2=value3\"\n", + "}\n", + "\n", + "CONTENT: b'{\\n \"args\": {\\n \"key1\": \"value1\", \\n \"key2\": [\\n \"value2\", \\n \"value3\"\\n ]\\n }, \\n \"headers\": {\\n \"Accept\": \"*/*\", \\n \"Accept-Encoding\": \"gzip, deflate\", \\n \"Host\": \"httpbin.org\", \\n \"User-Agent\": \"python-requests/2.21.0\"\\n }, \\n \"origin\": \"99.99.39.153, 99.99.39.153\", \\n \"url\": \"https://httpbin.org/get?key1=value1&key2=value2&key2=value3\"\\n}\\n'\n", + "JSON: >\n" + ] + } + ], + "source": [ + "payload = {'key1': 'value1', 'key2': ['value2', 'value3']}\n", + "r = requests.get('https://httpbin.org/get', params=payload)\n", + "print('URL: ', r.url)\n", + "print('ENCODING: ', r.encoding)\n", + "print('STATUS_CODE: ', r.status_code)\n", + "print('HEADERS: ', r.headers)\n", + "print('TEXT: ', r.text)\n", + "print('CONTENT: ', r.content)\n", + "print('JSON: ', r.json)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### POST Requests\n", + "We can add parameters to a POST request in Dictionary format, but we use data=payload. \n", + "POST requests are used to upload new records to the server. \n", + "POST would typically be used to get data from a web form and submit it to the server. " + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"args\": {}, \n", + " \"data\": \"\", \n", + " \"files\": {}, \n", + " \"form\": {\n", + " \"key1\": \"value1\", \n", + " \"key2\": \"value2\"\n", + " }, \n", + " \"headers\": {\n", + " \"Accept\": \"*/*\", \n", + " \"Accept-Encoding\": \"gzip, deflate\", \n", + " \"Content-Length\": \"23\", \n", + " \"Content-Type\": \"application/x-www-form-urlencoded\", \n", + " \"Host\": \"httpbin.org\", \n", + " \"User-Agent\": \"python-requests/2.21.0\"\n", + " }, \n", + " \"json\": null, \n", + " \"origin\": \"99.99.39.153, 99.99.39.153\", \n", + " \"url\": \"https://httpbin.org/post/"\n", + "}\n", + "\n" + ] + } + ], + "source": [ + "r = requests.post('https://httpbin.org/post', data = {'name':'Joe'})\n", + "\n", + "payload = {'key1': 'value1', 'key2': 'value2'}\n", + "r = requests.post(\"https://httpbin.org/post/", data=payload)\n", + "print(r.text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using Requests to GET Currency Exchange Data\n", + "Here's a handy endpoint where we can GET foreign currency exchange rates in JSON format, https://api.exchangeratesapi.io/latest" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"rates\":{\"MXN\":21.7145,\"AUD\":1.5897,\"HKD\":8.9178,\"RON\":4.7626,\"HRK\":7.4275,\"CHF\":1.1371,\"IDR\":15917.9,\"CAD\":1.5024,\"USD\":1.1361,\"ZAR\":15.752,\"JPY\":125.93,\"BRL\":4.2574,\"HUF\":317.06,\"CZK\":25.663,\"NOK\":9.7725,\"INR\":80.853,\"PLN\":4.3282,\"ISK\":136.1,\"PHP\":59.144,\"SEK\":10.5858,\"ILS\":4.1148,\"GBP\":0.86055,\"SGD\":1.5332,\"CNY\":7.6077,\"TRY\":6.0254,\"MYR\":4.6157,\"RUB\":74.6158,\"NZD\":1.652,\"KRW\":1270.0,\"THB\":35.583,\"BGN\":1.9558,\"DKK\":7.4616},\"base\":\"EUR\",\"date\":\"2019-02-26\"}\n" + ] + } + ], + "source": [ + "url = 'https://api.exchangeratesapi.io/latest'\n", + "r = requests.get(url)\n", + "print(r.text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**It looks like the default is base:EUR, but we want exchange rates for USD, so we can pass in a parameter for base. \n", + "We can also put in any date we want.**" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"rates\":{\"MXN\":18.8315549401,\"AUD\":1.2571475116,\"HKD\":7.823572534,\"RON\":3.7694876599,\"HRK\":6.0552252179,\"CHF\":0.9610654069,\"IDR\":13307.03754989,\"CAD\":1.2432190274,\"USD\":1.0,\"JPY\":110.621487334,\"BRL\":3.1959762157,\"PHP\":50.2997474953,\"CZK\":20.7957970188,\"NOK\":7.8771686894,\"INR\":63.5175531482,\"PLN\":3.3954549157,\"MYR\":3.9560153132,\"ZAR\":12.302191089,\"ILS\":3.399609025,\"GBP\":0.7252830496,\"SGD\":1.3214140262,\"HUF\":251.6086991936,\"EUR\":0.8145312373,\"CNY\":6.4380548994,\"TRY\":3.7828459721,\"SEK\":8.0096929217,\"RUB\":56.4333306182,\"NZD\":1.3706931661,\"KRW\":1063.5660177568,\"THB\":31.9247373137,\"BGN\":1.5930601939,\"DKK\":6.0679319052},\"base\":\"USD\",\"date\":\"2018-01-15\"}\n" + ] + } + ], + "source": [ + "url = 'https://api.exchangeratesapi.io/2018-01-15'\n", + "r = requests.get(url, params={'base':'USD'})\n", + "print(r.text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Decoding JSON data\n", + "Now we have the rates in JSON format. We need to convert that to usable data. \n", + "The JSON library basically has two functions: \n", + "- json.loads( ) converts a text string into Python dict/list objects. \n", + "- json.dumps( ) converts dict/list objects into a string. \n", + "\n", + "We need to decode the JSON data into a dictionary, then get the rate for GBP, convert it to a float, and do a conversion." + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'MXN': 18.8315549401, 'AUD': 1.2571475116, 'HKD': 7.823572534, 'RON': 3.7694876599, 'HRK': 6.0552252179, 'CHF': 0.9610654069, 'IDR': 13307.03754989, 'CAD': 1.2432190274, 'USD': 1.0, 'JPY': 110.621487334, 'BRL': 3.1959762157, 'PHP': 50.2997474953, 'CZK': 20.7957970188, 'NOK': 7.8771686894, 'INR': 63.5175531482, 'PLN': 3.3954549157, 'MYR': 3.9560153132, 'ZAR': 12.302191089, 'ILS': 3.399609025, 'GBP': 0.7252830496, 'SGD': 1.3214140262, 'HUF': 251.6086991936, 'EUR': 0.8145312373, 'CNY': 6.4380548994, 'TRY': 3.7828459721, 'SEK': 8.0096929217, 'RUB': 56.4333306182, 'NZD': 1.3706931661, 'KRW': 1063.5660177568, 'THB': 31.9247373137, 'BGN': 1.5930601939, 'DKK': 6.0679319052}\n", + "0.7252830496\n", + "200USD = 145.05660992 GBP\n" + ] + } + ], + "source": [ + "rates_json = json.loads(r.text)['rates']\n", + "print(rates_json)\n", + "print(rates_json['GBP'])\n", + "gbp = float(rates_json['GBP'])\n", + "print('200USD = ', str(gbp * 200), 'GBP')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using Requests to GET Song Data\n", + "Every API has documentation on how to use it. \n", + "You can find the docs for this Song Data API [here.](https://documenter.getpostman.com/view/3719697/RzfarXB4)" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[{\"id\":12,\"name\":\"Beatles\",\"year_started\":1960,\"year_quit\":1970,\"text\":\"Beatles\"},{\"id\":14,\"name\":\"Dario G\",\"year_started\":1997,\"year_quit\":null,\"text\":\"Dario G\"},{\"id\":16,\"name\":\"Fleetwood Mac\",\"year_started\":1967,\"year_quit\":null,\"text\":\"Fleetwood Mac\"},{\"id\":17,\"name\":\"Blink 182\",\"year_started\":1992,\"year_quit\":null,\"text\":\"Blink 182\"},{\"id\":18,\"name\":\"Bloc Party\",\"year_started\":2002,\"year_quit\":null,\"text\":\"Bloc Party\"},{\"id\":19,\"name\":\"The Temper Trap\",\"year_started\":2005,\"year_quit\":null,\"text\":\"The Temper Trap\"},{\"id\":20,\"name\":\"MGMT\",\"year_started\":2002,\"year_quit\":null,\"text\":\"MGMT\"},{\"id\":21,\"name\":\"Coldplay\",\"year_started\":1996,\"year_quit\":null,\"text\":\"Coldplay\"},{\"id\":22,\"name\":\"\n" + ] + } + ], + "source": [ + "url = 'https://musicdemons.com/api/v1/artist'\n", + "r = requests.get(url)\n", + "print(r.text[:700])" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"id\":21,\"name\":\"Coldplay\",\"year_started\":1996,\"year_quit\":null,\"text\":\"Coldplay\"}\n" + ] + } + ], + "source": [ + "url = 'https://musicdemons.com/api/v1/artist/21'\n", + "r = requests.get(url)\n", + "print(r.text)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"id\":21,\"name\":\"Coldplay\",\"year_started\":1996,\"year_quit\":null,\"text\":\"Coldplay\",\"songs\":[{\"id\":1,\"title\":\"Something Just Like This\",\"released\":\"02\\/22\\/2017\",\"text\":\"Something Just Like This\",\"youtube_id\":\"FM7MFYoylVs\",\"pivot\":{\"artist_id\":21,\"song_id\":1},\"subject\":{\"id\":226,\"subjectable_id\":1,\"subjectable_type\":\"App\\\\Entities\\\\MusicDemons\\\\Song\"}},{\"id\":11,\"title\":\"Hymn For The Weekend\",\"released\":\"01\\/25\\/2016\",\"text\":\"Hymn For The Weekend\",\"youtube_id\":\"YykjpeuMNEk\",\"pivot\":{\"artist_id\":21,\"song_id\":11},\"subject\":{\"id\":233,\"subjectable_id\":11,\"subjectable_type\":\"App\\\\Entities\\\\MusicDemons\\\\Song\"}},{\"id\":78,\"title\":\"Sky Full Of Stars\",\"released\":\"05\\/02\\/2014\",\"text\":\"Sky Full Of Stars\",\n" + ] + } + ], + "source": [ + "url = 'https://musicdemons.com/api/v1/artist/21'\n", + "headers = {'with': 'songs,members'}\n", + "r = requests.get(url, headers=headers)\n", + "print(r.text[:700])" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Coldplay\n", + "Something Just Like This\n", + "Hymn For The Weekend\n", + "Sky Full Of Stars\n", + "Fix You\n", + "Brothers & Sisters\n", + "Shiver\n", + "The Scientist\n", + "Yellow\n", + "Trouble\n", + "Every Teardrop Is a Waterfall\n", + "Life in Technicolor ii\n", + "Adventure Of A Lifetime\n", + "Magic\n", + "The Hardest Part\n", + "Viva la Vida\n", + "1.36\n", + "42\n", + "A Head Full of Dreams\n", + "A Hopeful Transmission\n", + "A Message\n", + "A Rush of Blood to the Head\n", + "Princess of China\n" + ] + } + ], + "source": [ + "import json\n", + "text_json = json.loads(r.text)\n", + "print(text_json['name'])\n", + "for song in text_json['songs']:\n", + " print(song['title'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tips on breaking down JSON \n", + "To get data out of a JSON object, which is a combination of lists and dictionaries, \n", + "just remember for lists you need a numerical index, and for key-value pairs you need a text index. \n", + "So if the object looks like this, {\"cars\":[\"id\":1,\"model\":\"Camry\"... you can access the model of the first car with text['cars'][0]['model']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/addition of two number b/addition of two number new file mode 100644 index 00000000..d31335e3 --- /dev/null +++ b/addition of two number @@ -0,0 +1,9 @@ +# Store input numbers +num1 = input('Enter first number: ') +num2 = input('Enter second number: ') + +# Add two numbers +sum = float(num1) + float(num2) + +# Display the sum +print('The sum of {0} and {1} is {2}'.format(num1, num2, sum)) diff --git a/deep_copy.ipynb b/deep_copy.ipynb new file mode 100644 index 00000000..a11d7052 --- /dev/null +++ b/deep_copy.ipynb @@ -0,0 +1,248 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python: how to Copy and Deep Copy Python Lists \n", + "(c) Joe James 2023" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Assignment is not a Copy\n", + "listA = listB does not create a copy. Changes to one list will be reflected in the other.\n", + "listA and listB both reference the exact same list." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2, 44, 6, [1, 3]]\n", + "140554034568968\n", + "140554034568968\n" + ] + } + ], + "source": [ + "listA = [2, 4, 6, [1, 3]]\n", + "listB = listA\n", + "listB[1] = 44\n", + "print(listA)\n", + "print(id(listA))\n", + "print(id(listB))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Shallow copy using the list() constructor\n", + "Shallow copy only works for 1D lists of native data types. \n", + "Sublists, dicts, and other objects will retain the same referece to those objects." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2, 4, 6, [55, 3]]\n" + ] + } + ], + "source": [ + "listA = [2, 4, 6, [1, 3]]\n", + "listB = list(listA)\n", + "listB[1] = 44\n", + "listB[3][0] = 55\n", + "print(listA)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Other ways to make a Shallow copy\n", + "List comprehensions, list.copy(), or copy.copy() can also be used to make *shallow* copies" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2, 4, 6, [55, 3]]\n" + ] + } + ], + "source": [ + "listA = [2, 4, 6, [1, 3]]\n", + "listB = [x for x in listA]\n", + "listB[1] = 44\n", + "listB[3][0] = 55\n", + "print(listA)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2, 4, 6, [55, 3]]\n" + ] + } + ], + "source": [ + "listA = [2, 4, 6, [1, 3]]\n", + "listB = listA.copy()\n", + "listB[1] = 44\n", + "listB[3][0] = 55\n", + "print(listA)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2, 4, 6, [55, 3]]\n" + ] + } + ], + "source": [ + "import copy\n", + "listA = [2, 4, 6, [1, 3]]\n", + "listB = copy.copy(listA)\n", + "listB[1] = 44\n", + "listB[3][0] = 55\n", + "print(listA)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### How to Deep Copy a Python List\n", + "use copy.deepcopy()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2, 4, 6, [1, 3]]\n" + ] + } + ], + "source": [ + "import copy\n", + "listA = [2, 4, 6, [1, 3]]\n", + "listB = copy.deepcopy(listA)\n", + "listB[1] = 44\n", + "listB[3][0] = 55\n", + "print(listA)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Deepcopy with Objects" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "140554035637216 140554035637104\n", + "140554035637216 140554035637216\n", + "140554035637216 140554035637048\n" + ] + } + ], + "source": [ + "class Pony():\n", + " def __init__(self, n):\n", + " self.name = n\n", + " \n", + "# copy.copy on an object gives you 2 unique objects (with same attributes)\n", + "pony1 = Pony('Pinto')\n", + "pony2 = copy.copy(pony1)\n", + "print(id(pony1), id(pony2))\n", + "\n", + "# copy.copy on a list of objects gives you 2 unique lists containing the exact same objects \n", + "# (ie. new list is a shallow copy)\n", + "m = [pony1, pony2]\n", + "n = copy.copy (m)\n", + "print(id(m[0]), id(n[0]))\n", + "\n", + "# use copy.deepcopy to deep copy a list of objects\n", + "n = copy.deepcopy (m)\n", + "print(id(m[0]), id(n[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/dict_comprehensions.py b/dict_comprehensions.py new file mode 100644 index 00000000..890c7221 --- /dev/null +++ b/dict_comprehensions.py @@ -0,0 +1,54 @@ +# Python Dictionary Comprehensions +# (c) Joe James 2023 + +# 1. math function to compute values using list +dict1 = {x: 2*x for x in [0, 2, 4, 6]} +print ('1. ', dict1) + +# 2. math function to compute values using range +dict2 = {x: x**2 for x in range(0, 7, 2)} +print ('2. ', dict2) + +# 3. from chars in a string +dict3 = {x: ord(x) for x in 'Kumar'} +print ('3. ', dict3) + +# 4. given lists of keys & values +x = ['Aditii', 'Brandon', 'Clumley', 'Magomed', 'Rishi'] +y = [1, 2, 3, 13, 18] +dict4 = {i: j for (i,j) in zip(x,y)} +print ('4. ', dict4) + +# 5. from chars in a string +x = "python" +dict5 = {i: 3*i.upper() for i in x} +print('5. ', dict5) + +# 6. list comprehension for the value +x = [2, 4, 6, 8] +y = [5, 10, 15, 20] +dict6 = {i: [i + 2*j for j in y] for i in x} +print('6. ', dict6) + +#7. using items +x = {'A':10, 'B':20, 'C':30} +dict7 = {i: j*2 for (i,j) in x.items()} +print('7. ', dict7) + +# 8. conditional comprehension +dict8 = {i: i**3 for i in range(10) if i%2 == 0} +print('8. ', dict8) + +# 9. if-else conditional comprehension +x = {'A':10, 'B':20, 'C':30} +dict9 = {i: (j if j < 15 else j+100) for (i,j) in x.items()} +print('9. ', dict9) + +# 10. transformation from an existing dict +x = {'A':10, 'B':20, 'C':30} +dict10 = {i: x[i]+1 for i in x} +print('10. ', dict10) + + + + diff --git a/exception-handling.py b/exception-handling.py new file mode 100644 index 00000000..57ddf118 --- /dev/null +++ b/exception-handling.py @@ -0,0 +1,66 @@ +# something more about try except +# basic syntax +''' +try: + code1 + +except: + some code that will execute if code 1 fails or raise some error + +else: + this code is executed only if try was succesful i.e no error in code1 + +finally: + + this code will execute in every situation if try fails or not +''' + +filename = 'exception_data.txt' +# Outer try block catches file name or file doesn't exist errors. +try: + with open(filename) as fin: + for line in fin: + # print(line) + items = line.split(',') + total = 0 + + # Inner try bock catches integer conversion errors. + try: + for item in items: + num = int(item) + total += num + print('Total = ' + str(total)) + except: + print('Error converting to integer. ', items) +except: + print('Error opening file. ' + filename) + +finally: + print('This is our optional finally block. Code here will execute no matter what.') + + +# Second example: name Error type in except block; call function from try block. +def this_fails(): + x = 1/0 +try: + this_fails() +except ZeroDivisionError as err: + print('Handling run-time error:', err) + + +def divide_me(n): + x = 1/n + +i = int(input('enter a number ')) +try: + divide_me(i) + +except Exception as e: + print(e) # this will print the error msg but don't kill the execution of program + +else: + print('Your Code Run Successfully') # this will execute if divide_me(i) run sucessfully without an error + +finally: + print('thanks') # this will execute in every condition + diff --git a/exception_data.txt b/exception_data.txt new file mode 100644 index 00000000..bea828db --- /dev/null +++ b/exception_data.txt @@ -0,0 +1,5 @@ +5,7,28,35,42 +2.9,15,4,80,36 +16,25,7,11,19 +18,15,19,x,10 +27,39,12,21,122 \ No newline at end of file diff --git a/factorial.py b/factorial.py new file mode 100644 index 00000000..8e4a65ea --- /dev/null +++ b/factorial.py @@ -0,0 +1,19 @@ +def get_recursive_factorial(n): + if n < 0: + return -1 + elif n < 2: + return 1 + else: + return n * get_recursive_factorial(n-1) + +def get_iterative_factorial(n): + if n < 0: + return -1 + else: + fact = 1 + for i in range(1, n+1): + fact *= i + return fact +print("input should be an integer") +print(get_recursive_factorial(6)) +print(get_iterative_factorial(6)) diff --git a/flatten_list.py b/flatten_list.py new file mode 100644 index 00000000..3f3c57df --- /dev/null +++ b/flatten_list.py @@ -0,0 +1,27 @@ +# Python Flatten Nested Lists +# (c) Joe James 2023 + +# list comprehension method +def flatten1 (myList): + return [i for j in myList for i in j] + +# recursive method +def flatten2 (myList): + flatList = [] + for item in myList: + if isinstance(item, list): + flatList.extend(flatten2(item)) + else: + flatList.append(item) + return flatList + +list1 = [[0], [1, 2], [3, [4, 5]], [6], [7]] +list2 = [0, [1, 2], [3, [4, 5]], [6], 7] + +print("flatten1(list1): ", flatten1(list1)) # works, but only flattens 1 layer of sublists +# print(flatten1(list2)) # error - can't work with list of ints and sublists of ints + +print("flatten2(list1): ", flatten2(list1)) +print("flatten2(list2): ", flatten2(list2)) + + diff --git a/graph_adjacency-list.py b/graph_adjacency-list.py index fec2f958..ebc3f47c 100644 --- a/graph_adjacency-list.py +++ b/graph_adjacency-list.py @@ -4,9 +4,9 @@ def __init__(self, n): self.name = n self.neighbors = list() - def add_neighbor(self, v): + def add_neighbor(self, v, weight): if v not in self.neighbors: - self.neighbors.append(v) + self.neighbors.append((v, weight)) self.neighbors.sort() class Graph: @@ -19,11 +19,11 @@ def add_vertex(self, vertex): else: return False - def add_edge(self, u, v): + def add_edge(self, u, v, weight=0): if u in self.vertices and v in self.vertices: # my YouTube video shows a silly for loop here, but this is a much faster way to do it - self.vertices[u].add_neighbor(v) - self.vertices[v].add_neighbor(u) + self.vertices[u].add_neighbor(v, weight) + self.vertices[v].add_neighbor(u, weight) return True else: return False diff --git a/graph_adjacency-matrix.py b/graph_adjacency-matrix.py index b6d05589..3f315001 100644 --- a/graph_adjacency-matrix.py +++ b/graph_adjacency-matrix.py @@ -1,4 +1,5 @@ # implementation of an undirected graph using Adjacency Matrix, with weighted or unweighted edges +# its definitely work class Vertex: def __init__(self, n): self.name = n @@ -46,4 +47,4 @@ def print_graph(self): for edge in edges: g.add_edge(edge[:1], edge[1:]) -g.print_graph() \ No newline at end of file +g.print_graph() diff --git a/lcm.py b/lcm.py index 8d584ab7..a308141e 100644 --- a/lcm.py +++ b/lcm.py @@ -1,4 +1,4 @@ -# computes Lowest Common Multiple LCM / Least Common Denominator LCD +# computes Lowest Common Multiple (LCM) / Least Common Denominator (LCD) # useful for adding and subtracting fractions # 2 numbers @@ -21,4 +21,4 @@ def lcm3(nums): print(str(lcm(7, 12))) nums = [3, 2, 16] -print(str(lcm3(nums))) \ No newline at end of file +print(str(lcm3(nums))) diff --git a/list_comprehensions.py b/list_comprehensions.py new file mode 100644 index 00000000..6e003514 --- /dev/null +++ b/list_comprehensions.py @@ -0,0 +1,42 @@ +# list comprehensions +# basic format: new_list = [transform sequence [filter] ] +import random + +under_10 = [x for x in range(10)] +print('under_10: ' + str(under_10)) + +squares = [x**2 for x in under_10] +print('squares: ' + str(squares)) + +odds = [x for x in range(10) if x%2 == 1] +print('odds: ' + str(odds)) + +ten_x = [x * 10 for x in range(10)] +print('ten_x: ' + str(ten_x)) + +# get all numbers from a string +s = 'I love 2 go t0 the store 7 times a w3ek.' +nums = [x for x in s if x.isnumeric()] +print('nums: ' + ''.join(nums)) + +# get index of a list item +names = ['Cosmo', 'Pedro', 'Anu', 'Ray'] +idx = [k for k, v in enumerate(names) if v == 'Anu'] +print('index = ' + str(idx[0])) + +# delete an item from a list +letters = [x for x in 'ABCDEF'] +random.shuffle(letters) +letrs = [a for a in letters if a != 'C'] +print(letters, letrs) + +# if-else condition in a comprehension (must come before iteration) +nums = [5, 3, 10, 18, 6, 7] +new_list = [x if x%2 == 0 else 10*x for x in nums] +print('new list: ' + str(new_list)) + +# nested loop iteration for 2D list +a = [[1,2],[3,4]] +new_list = [x for b in a for x in b] +print(new_list) + diff --git a/match statements.ipynb b/match statements.ipynb new file mode 100644 index 00000000..a8fc422d --- /dev/null +++ b/match statements.ipynb @@ -0,0 +1,327 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Python 10 - Structural Pattern Matching\n", + "### match statements \n", + "Very similar to switch/case statements in C, Java, and Javascript. \n", + "Can be used in lieu of if/elif/else blocks. \n", + "[documentation](https://www.python.org/dev/peps/pep-0622/)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Can use integer for match variable..." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "large\n" + ] + } + ], + "source": [ + "var = 3\n", + "\n", + "match var:\n", + " case 1:\n", + " print('small')\n", + " case 2:\n", + " print('medium')\n", + " case 3:\n", + " print('large')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ...or floating point..." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "large\n" + ] + } + ], + "source": [ + "var = 1.5\n", + "\n", + "match var:\n", + " case 1.3:\n", + " print('small')\n", + " case 1.4:\n", + " print('medium')\n", + " case 1.5:\n", + " print('large')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ...or Tuple...\n", + "Note here we also use a variable to receive *any* value." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "on x-axis\n" + ] + } + ], + "source": [ + "var = (8,0)\n", + "\n", + "match var:\n", + " case (0,x):\n", + " print('on y-axis')\n", + " case (x,0):\n", + " print('on x-axis')\n", + " case (x,y):\n", + " print('not on axis')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ...or String" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "small\n" + ] + } + ], + "source": [ + "var = \"S\"\n", + "\n", + "match var:\n", + " case \"S\":\n", + " print('small')\n", + " case \"Med\":\n", + " print('medium')\n", + " case \"Lg\":\n", + " print('large')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### The Default case _ \n", + "The default case, using underscore, is optional. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "large\n" + ] + } + ], + "source": [ + "var = 4\n", + "\n", + "match var:\n", + " case 1:\n", + " print('small')\n", + " case 2:\n", + " print('medium')\n", + " case _:\n", + " print('large')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Conditionals in case \n", + "*or* conditions (using bar) are supported in case statements." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "small\n" + ] + } + ], + "source": [ + "var = 2\n", + "\n", + "match var:\n", + " case 2 | 3:\n", + " print('small')\n", + " case 4 | 5 | 6:\n", + " print('medium')\n", + " case _:\n", + " print('large')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### No breaks needed\n", + "*if* statements are supported, but must follow syntax, case var if (inequality expression). \n", + "\n", + "Note that you do not need break statements. The match block will automatically end execution after one case is executed." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "A\n", + "F\n" + ] + } + ], + "source": [ + "def print_grade(score):\n", + " match score:\n", + " # case score > 90 this does not work!\n", + " case score if score >= 90:\n", + " print('A')\n", + " case score if score >= 80:\n", + " print('B')\n", + " case score if score >= 70:\n", + " print('C')\n", + " case score if score >= 60:\n", + " print('D')\n", + " case _:\n", + " print('F')\n", + " \n", + "print_grade(94)\n", + "print_grade(48)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Python Objects \n", + "Match statements can also use Python objects and instance variables. \n", + "In the final case here we could have used _ default case, but instead used x so that we could use the value of x in our print statement." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "medium\n", + "Size XL is not recognized.\n" + ] + } + ], + "source": [ + "class T_shirt:\n", + " def __init__(self, s):\n", + " self.size = s\n", + "\n", + " def order(self):\n", + " match self.size:\n", + " case 'S' | 'Sm':\n", + " print('small')\n", + " case 'M' | 'Med':\n", + " print('medium')\n", + " case 'L' | 'Lg':\n", + " print('large')\n", + " case x:\n", + " print(f'Size {x} is not recognized.')\n", + " \n", + "shirt1 = T_shirt('Med')\n", + "shirt1.order()\n", + "\n", + "shirt2 = T_shirt('XL')\n", + "shirt2.order()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/python oriented programming b/python oriented programming new file mode 100644 index 00000000..8e3a4499 --- /dev/null +++ b/python oriented programming @@ -0,0 +1,34 @@ +class Mobile: + def make_call(self): + print("i am making a call") + def play_game(self): + print("i am playing games") + +m1=Mobile() + +m1.make_call() + +m1.play_game() + +class Mobile: + def set_color(self,color): + self.color=color + def set_cost(self,cost): + self.cost=cost + def show_color(self): + print("black") + def show_price(self): + print("5000") + def make_call(self): + print("i am making a call") + def play_game(self): + print("i am playing games") + + + +m2=Mobile() + +m2.show_price() + +m2.show_color() + diff --git a/remove_from_list.py b/remove_from_list.py new file mode 100644 index 00000000..9619664f --- /dev/null +++ b/remove_from_list.py @@ -0,0 +1,48 @@ +# Python: del vs pop vs remove from a list +# (c) Joe James 2023 + +def get_dogs(): + return ['Fido', 'Rover', 'Spot', 'Duke', 'Chip', 'Spot'] + +dogs = get_dogs() +print(dogs) + +# Use pop() to remove last item or an item by index and get the returned value. +print('1. pop last item from list:') +myDog = dogs.pop() +print(myDog, dogs) + +dogs = get_dogs() +print('2. pop item with index 1:') +myDog = dogs.pop(1) +print(myDog, dogs) + +# Use remove() to delete an item by value. (raises ValueError if value not found) +dogs = get_dogs() +print('3. remove first Spot from list:') +dogs.remove('Spot') +print(dogs) + +# Use del to remove an item or range of items by index. Or delete entire list. +dogs = get_dogs() +print('4. del item with index 3:') +del(dogs[3]) +print(dogs) + +dogs = get_dogs() +print('5. del items [1:3] from list:') +del(dogs[1:3]) +print(dogs) + +dogs = get_dogs() +print('6. del entire list:') +del(dogs) +print(dogs) + + + + + + + + diff --git a/turtle_graphics.py b/turtle_graphics.py new file mode 100644 index 00000000..da671ef3 --- /dev/null +++ b/turtle_graphics.py @@ -0,0 +1,35 @@ +import turtle as tt +from random import randint, sample + +def draw(): + size = randint(40, 300) + angles = (144, 150, 157.5, 160, 165) + angle = sample(angles, 1)[0] + + colors = [ + ('#922B21', '#E6B0AA'), ('#76448A', '#D2B4DE'), ('#1F618D', '#AED6F1'), ('#515A5A', '#EAEDED'), + ('#148F77', '#D1F2EB'), ('#B7950B', '#F7DC6F'), ('#F39C12', '#FDEBD0'), ('#BA4A00', '#F6DDCC')] + color = sample(colors, 1)[0] + tt.color(color[0], color[1]) + + x_pos = randint(-200,200) + y_pos = randint(-200,200) + tt.pu() + tt.setpos(x_pos, y_pos) + start_position = tt.pos() + tt.pd() + + tt.begin_fill() + while True: + tt.forward(size) + tt.left(angle) + if abs(tt.pos() - start_position) < 1: + break + tt.end_fill() + +tt.circle(100) +for i in range(3): + tt.pensize(i%3) + draw() + +tt.done() \ No newline at end of file