From ef5d393092b07de17c29684357ad6bf8c8722408 Mon Sep 17 00:00:00 2001
From: Muralidharan <114425694+murale1@users.noreply.github.com>
Date: Sun, 10 Aug 2025 20:18:44 +0530
Subject: [PATCH] Created using Colab
---
booksource/exercises/PandasExercises.ipynb | 11278 +++++++++++++++++++
1 file changed, 11278 insertions(+)
create mode 100644 booksource/exercises/PandasExercises.ipynb
diff --git a/booksource/exercises/PandasExercises.ipynb b/booksource/exercises/PandasExercises.ipynb
new file mode 100644
index 0000000..b3ca85e
--- /dev/null
+++ b/booksource/exercises/PandasExercises.ipynb
@@ -0,0 +1,11278 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "e79a44bb-42c8-4ace-8d71-770c06522bfc",
+ "metadata": {
+ "id": "e79a44bb-42c8-4ace-8d71-770c06522bfc"
+ },
+ "source": [
+ "# Pandas Exercises"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1d26ef1e-3c49-4a46-a4c5-b68825d5a953",
+ "metadata": {
+ "id": "1d26ef1e-3c49-4a46-a4c5-b68825d5a953"
+ },
+ "source": [
+ "## Creating DataFrames and Using Sample Data Sets"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bc5dd17f-4431-4655-b2c5-ea537e23223c",
+ "metadata": {
+ "id": "bc5dd17f-4431-4655-b2c5-ea537e23223c"
+ },
+ "source": [
+ "This is the Jupyter Notebook runnable exercises version of the article, [Pandas Practice Questions – Fifty-Two Examples to Make You an Expert](https://codesolid.com/pandas-practice-questions-twenty-one-examples-to-make-you-an-expert/)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "df91e92f-5f2b-4e98-81f7-84e7478ed432",
+ "metadata": {
+ "id": "df91e92f-5f2b-4e98-81f7-84e7478ed432"
+ },
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import seaborn as sb"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "tdkheqKk6_4s"
+ },
+ "id": "tdkheqKk6_4s",
+ "execution_count": 2,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6950cc47-3cc0-4565-8df1-d01741b7ea5f",
+ "metadata": {
+ "id": "6950cc47-3cc0-4565-8df1-d01741b7ea5f"
+ },
+ "source": [
+ "**1.** Using NumPy, create a Pandas DataFrame with five rows and three columms:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "dd1a5014-d17e-4fd0-b7a2-db261a53a02a",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 143
+ },
+ "id": "dd1a5014-d17e-4fd0-b7a2-db261a53a02a",
+ "outputId": "45ab68cd-eb73-4ff3-9fdd-5580c11d1ccd"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " 0 1 2 3\n",
+ "0 2 8 8 7\n",
+ "1 5 7 7 9\n",
+ "2 8 5 9 7"
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2 | \n",
+ " 8 | \n",
+ " 8 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 5 | \n",
+ " 7 | \n",
+ " 7 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 8 | \n",
+ " 5 | \n",
+ " 9 | \n",
+ " 7 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": 0,\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 2,\n \"max\": 8,\n \"num_unique_values\": 3,\n \"samples\": [\n 2,\n 5,\n 8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": 1,\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 5,\n \"max\": 8,\n \"num_unique_values\": 3,\n \"samples\": [\n 8,\n 7,\n 5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": 2,\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 7,\n \"max\": 9,\n \"num_unique_values\": 3,\n \"samples\": [\n 8,\n 7,\n 9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": 3,\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 7,\n \"max\": 9,\n \"num_unique_values\": 2,\n \"samples\": [\n 9,\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 3
+ }
+ ],
+ "source": [
+ "array=np.random.randint(1,10,(3,4))\n",
+ "df=pd.DataFrame(array)\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9c74f94d-4adf-403d-a2c4-9efd200ba5b2",
+ "metadata": {
+ "id": "9c74f94d-4adf-403d-a2c4-9efd200ba5b2"
+ },
+ "source": [
+ "**2.** For a Pandas DataFrame created from a NumPy array, what is the default behavior for the labels for the columns? For the rows?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e7508b50-d724-47a7-8272-9ba040cd9c65",
+ "metadata": {
+ "id": "e7508b50-d724-47a7-8272-9ba040cd9c65"
+ },
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1a20442a-ea2c-4d32-877b-bfe5ae349318",
+ "metadata": {
+ "id": "1a20442a-ea2c-4d32-877b-bfe5ae349318"
+ },
+ "source": [
+ "**3.** Create a second DataFrame as above with five rows and three columns, setting the row labels to the names of any five major US cities and the column labels to the first three months of the year."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "02686eae-cba2-4180-94ff-4dc85eca0731",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "02686eae-cba2-4180-94ff-4dc85eca0731",
+ "outputId": "ecb1448f-599a-4af1-ed05-a03a58bd7150"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Jan Feb Mar\n",
+ "Newyork 1 7 1\n",
+ "Washington 6 4 9\n",
+ "Florida 2 1 4\n",
+ "Texas 4 4 6\n",
+ "Ohio 5 6 3"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Jan | \n",
+ " Feb | \n",
+ " Mar | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Newyork | \n",
+ " 1 | \n",
+ " 7 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " | Washington | \n",
+ " 6 | \n",
+ " 4 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " | Florida | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ "
\n",
+ " \n",
+ " | Texas | \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 6 | \n",
+ "
\n",
+ " \n",
+ " | Ohio | \n",
+ " 5 | \n",
+ " 6 | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "array2",
+ "summary": "{\n \"name\": \"array2\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"Jan\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 6,\n \"num_unique_values\": 5,\n \"samples\": [\n 6,\n 5,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Feb\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2,\n \"min\": 1,\n \"max\": 7,\n \"num_unique_values\": 4,\n \"samples\": [\n 4,\n 6,\n 7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Mar\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3,\n \"min\": 1,\n \"max\": 9,\n \"num_unique_values\": 5,\n \"samples\": [\n 9,\n 3,\n 4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 4
+ }
+ ],
+ "source": [
+ "indices=[\"Newyork\",\"Washington\",\"Florida\",\"Texas\",\"Ohio\"]\n",
+ "columns_name=[\"Jan\",\"Feb\",\"Mar\"]\n",
+ "array2=pd.DataFrame(np.random.randint(1,10,(5,3)),index=indices,columns=columns_name)\n",
+ "array2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f6c2f45b-0fea-49bf-98a0-572d594a0680",
+ "metadata": {
+ "id": "f6c2f45b-0fea-49bf-98a0-572d594a0680"
+ },
+ "source": [
+ "**4.** You recall that the Seaborn package has some data sets built in, but can't remember how to list and load them. Assuming the functions to do so have \"data\" in the name, how might you locate them? You can assume a Jupyter Notebook / IPython environment and explain the process, or write the code to do it in Python."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8afae59d-eb5a-4692-9924-b54d49942769",
+ "metadata": {
+ "id": "8afae59d-eb5a-4692-9924-b54d49942769"
+ },
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "ce709b0a-cb22-4f33-9492-62f8346d0ef4",
+ "metadata": {
+ "id": "ce709b0a-cb22-4f33-9492-62f8346d0ef4"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "70efcde8-cd73-4d61-8534-e6a3c469dbff",
+ "metadata": {
+ "id": "70efcde8-cd73-4d61-8534-e6a3c469dbff"
+ },
+ "source": [
+ "## Loading data from CSV"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "00214582-1340-4700-99b5-9f36afce3e16",
+ "metadata": {
+ "id": "00214582-1340-4700-99b5-9f36afce3e16"
+ },
+ "source": [
+ "**5**. Zillow home data is available at this URL: https://files.zillowstatic.com/research/public_csvs/zhvi/Metro_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv\n",
+ "\n",
+ "Open this file as a DataFrame named df_homes in Pandas."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "2838d3d2-5f1c-4e97-aab6-c7236ced82ad",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 530
+ },
+ "id": "2838d3d2-5f1c-4e97-aab6-c7236ced82ad",
+ "outputId": "625ff4d0-c62c-44c9-89d8-27cfb7d72c95"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " RegionID SizeRank RegionName RegionType StateName 2000-01-31 \\\n",
+ "0 102001 0 United States country NaN 124952.583264 \n",
+ "1 394913 1 New York, NY msa NY 224104.107786 \n",
+ "2 753899 2 Los Angeles, CA msa CA 228441.264620 \n",
+ "3 394463 3 Chicago, IL msa IL 159204.293651 \n",
+ "4 394514 4 Dallas, TX msa TX 130776.117982 \n",
+ ".. ... ... ... ... ... ... \n",
+ "890 753929 935 Zapata, TX msa TX NaN \n",
+ "891 394743 936 Ketchikan, AK msa AK NaN \n",
+ "892 753874 937 Craig, CO msa CO 99313.814620 \n",
+ "893 395188 938 Vernon, TX msa TX NaN \n",
+ "894 394767 939 Lamesa, TX msa TX NaN \n",
+ "\n",
+ " 2000-02-29 2000-03-31 2000-04-30 2000-05-31 ... \\\n",
+ "0 125172.414915 125445.038530 126029.762672 126702.710055 ... \n",
+ "1 225056.423800 226017.559977 227965.193784 229982.036039 ... \n",
+ "2 229291.355968 230423.669537 232676.593166 235140.585322 ... \n",
+ "3 159351.670450 159632.352157 160330.618493 161170.548906 ... \n",
+ "4 130834.366121 130901.328321 131075.222120 131304.496332 ... \n",
+ ".. ... ... ... ... ... \n",
+ "890 NaN NaN NaN NaN ... \n",
+ "891 NaN NaN NaN NaN ... \n",
+ "892 99567.766989 100040.559776 100713.860374 101496.281760 ... \n",
+ "893 NaN NaN NaN NaN ... \n",
+ "894 NaN NaN NaN NaN ... \n",
+ "\n",
+ " 2024-09-30 2024-10-31 2024-11-30 2024-12-31 \\\n",
+ "0 368342.550149 368944.348980 369448.501052 370184.720224 \n",
+ "1 694463.939098 698112.907260 700981.582357 702828.956760 \n",
+ "2 979263.294758 984668.937931 989326.464086 993489.580771 \n",
+ "3 336401.662700 337417.542386 338496.548214 339792.563198 \n",
+ "4 386210.886078 386101.104828 385802.176509 385258.927239 \n",
+ ".. ... ... ... ... \n",
+ "890 142519.346796 141543.788987 140694.812207 140081.758149 \n",
+ "891 384645.541128 386527.958063 387033.635531 387415.739710 \n",
+ "892 290338.697990 292045.315574 293275.268519 294341.950896 \n",
+ "893 113880.850440 111889.316393 109490.077942 107891.774574 \n",
+ "894 109418.481275 109429.692091 108856.803440 108480.184567 \n",
+ "\n",
+ " 2025-01-31 2025-02-28 2025-03-31 2025-04-30 \\\n",
+ "0 370828.567353 371298.260498 371016.982722 370477.532421 \n",
+ "1 703938.611027 705266.111170 707009.440660 709510.127516 \n",
+ "2 994044.173013 991951.863479 986697.459794 982246.652053 \n",
+ "3 341188.253115 342585.506509 343463.210750 344086.738417 \n",
+ "4 384716.914401 383910.093012 382370.526648 380191.067291 \n",
+ ".. ... ... ... ... \n",
+ "890 139591.033021 139060.037085 138296.346480 136113.475153 \n",
+ "891 387896.552064 389139.536436 389956.139487 391704.282777 \n",
+ "892 294625.438545 295400.972873 295720.152107 297001.710359 \n",
+ "893 106725.247139 105661.977276 104405.236538 102838.777165 \n",
+ "894 107779.740122 108144.520025 108602.309747 108934.388219 \n",
+ "\n",
+ " 2025-05-31 2025-06-30 \n",
+ "0 369777.238792 369146.758903 \n",
+ "1 711326.861714 712842.475773 \n",
+ "2 977325.621835 972836.618922 \n",
+ "3 344388.252982 344762.755307 \n",
+ "4 377715.452411 375293.228730 \n",
+ ".. ... ... \n",
+ "890 133730.331027 131478.742029 \n",
+ "891 394878.913726 399233.112133 \n",
+ "892 298402.180459 299467.780061 \n",
+ "893 101577.252143 100654.840043 \n",
+ "894 108796.035251 108656.736812 \n",
+ "\n",
+ "[895 rows x 311 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " RegionID | \n",
+ " SizeRank | \n",
+ " RegionName | \n",
+ " RegionType | \n",
+ " StateName | \n",
+ " 2000-01-31 | \n",
+ " 2000-02-29 | \n",
+ " 2000-03-31 | \n",
+ " 2000-04-30 | \n",
+ " 2000-05-31 | \n",
+ " ... | \n",
+ " 2024-09-30 | \n",
+ " 2024-10-31 | \n",
+ " 2024-11-30 | \n",
+ " 2024-12-31 | \n",
+ " 2025-01-31 | \n",
+ " 2025-02-28 | \n",
+ " 2025-03-31 | \n",
+ " 2025-04-30 | \n",
+ " 2025-05-31 | \n",
+ " 2025-06-30 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 102001 | \n",
+ " 0 | \n",
+ " United States | \n",
+ " country | \n",
+ " NaN | \n",
+ " 124952.583264 | \n",
+ " 125172.414915 | \n",
+ " 125445.038530 | \n",
+ " 126029.762672 | \n",
+ " 126702.710055 | \n",
+ " ... | \n",
+ " 368342.550149 | \n",
+ " 368944.348980 | \n",
+ " 369448.501052 | \n",
+ " 370184.720224 | \n",
+ " 370828.567353 | \n",
+ " 371298.260498 | \n",
+ " 371016.982722 | \n",
+ " 370477.532421 | \n",
+ " 369777.238792 | \n",
+ " 369146.758903 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 394913 | \n",
+ " 1 | \n",
+ " New York, NY | \n",
+ " msa | \n",
+ " NY | \n",
+ " 224104.107786 | \n",
+ " 225056.423800 | \n",
+ " 226017.559977 | \n",
+ " 227965.193784 | \n",
+ " 229982.036039 | \n",
+ " ... | \n",
+ " 694463.939098 | \n",
+ " 698112.907260 | \n",
+ " 700981.582357 | \n",
+ " 702828.956760 | \n",
+ " 703938.611027 | \n",
+ " 705266.111170 | \n",
+ " 707009.440660 | \n",
+ " 709510.127516 | \n",
+ " 711326.861714 | \n",
+ " 712842.475773 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 753899 | \n",
+ " 2 | \n",
+ " Los Angeles, CA | \n",
+ " msa | \n",
+ " CA | \n",
+ " 228441.264620 | \n",
+ " 229291.355968 | \n",
+ " 230423.669537 | \n",
+ " 232676.593166 | \n",
+ " 235140.585322 | \n",
+ " ... | \n",
+ " 979263.294758 | \n",
+ " 984668.937931 | \n",
+ " 989326.464086 | \n",
+ " 993489.580771 | \n",
+ " 994044.173013 | \n",
+ " 991951.863479 | \n",
+ " 986697.459794 | \n",
+ " 982246.652053 | \n",
+ " 977325.621835 | \n",
+ " 972836.618922 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 394463 | \n",
+ " 3 | \n",
+ " Chicago, IL | \n",
+ " msa | \n",
+ " IL | \n",
+ " 159204.293651 | \n",
+ " 159351.670450 | \n",
+ " 159632.352157 | \n",
+ " 160330.618493 | \n",
+ " 161170.548906 | \n",
+ " ... | \n",
+ " 336401.662700 | \n",
+ " 337417.542386 | \n",
+ " 338496.548214 | \n",
+ " 339792.563198 | \n",
+ " 341188.253115 | \n",
+ " 342585.506509 | \n",
+ " 343463.210750 | \n",
+ " 344086.738417 | \n",
+ " 344388.252982 | \n",
+ " 344762.755307 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 394514 | \n",
+ " 4 | \n",
+ " Dallas, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " 130776.117982 | \n",
+ " 130834.366121 | \n",
+ " 130901.328321 | \n",
+ " 131075.222120 | \n",
+ " 131304.496332 | \n",
+ " ... | \n",
+ " 386210.886078 | \n",
+ " 386101.104828 | \n",
+ " 385802.176509 | \n",
+ " 385258.927239 | \n",
+ " 384716.914401 | \n",
+ " 383910.093012 | \n",
+ " 382370.526648 | \n",
+ " 380191.067291 | \n",
+ " 377715.452411 | \n",
+ " 375293.228730 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 890 | \n",
+ " 753929 | \n",
+ " 935 | \n",
+ " Zapata, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 142519.346796 | \n",
+ " 141543.788987 | \n",
+ " 140694.812207 | \n",
+ " 140081.758149 | \n",
+ " 139591.033021 | \n",
+ " 139060.037085 | \n",
+ " 138296.346480 | \n",
+ " 136113.475153 | \n",
+ " 133730.331027 | \n",
+ " 131478.742029 | \n",
+ "
\n",
+ " \n",
+ " | 891 | \n",
+ " 394743 | \n",
+ " 936 | \n",
+ " Ketchikan, AK | \n",
+ " msa | \n",
+ " AK | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 384645.541128 | \n",
+ " 386527.958063 | \n",
+ " 387033.635531 | \n",
+ " 387415.739710 | \n",
+ " 387896.552064 | \n",
+ " 389139.536436 | \n",
+ " 389956.139487 | \n",
+ " 391704.282777 | \n",
+ " 394878.913726 | \n",
+ " 399233.112133 | \n",
+ "
\n",
+ " \n",
+ " | 892 | \n",
+ " 753874 | \n",
+ " 937 | \n",
+ " Craig, CO | \n",
+ " msa | \n",
+ " CO | \n",
+ " 99313.814620 | \n",
+ " 99567.766989 | \n",
+ " 100040.559776 | \n",
+ " 100713.860374 | \n",
+ " 101496.281760 | \n",
+ " ... | \n",
+ " 290338.697990 | \n",
+ " 292045.315574 | \n",
+ " 293275.268519 | \n",
+ " 294341.950896 | \n",
+ " 294625.438545 | \n",
+ " 295400.972873 | \n",
+ " 295720.152107 | \n",
+ " 297001.710359 | \n",
+ " 298402.180459 | \n",
+ " 299467.780061 | \n",
+ "
\n",
+ " \n",
+ " | 893 | \n",
+ " 395188 | \n",
+ " 938 | \n",
+ " Vernon, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 113880.850440 | \n",
+ " 111889.316393 | \n",
+ " 109490.077942 | \n",
+ " 107891.774574 | \n",
+ " 106725.247139 | \n",
+ " 105661.977276 | \n",
+ " 104405.236538 | \n",
+ " 102838.777165 | \n",
+ " 101577.252143 | \n",
+ " 100654.840043 | \n",
+ "
\n",
+ " \n",
+ " | 894 | \n",
+ " 394767 | \n",
+ " 939 | \n",
+ " Lamesa, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 109418.481275 | \n",
+ " 109429.692091 | \n",
+ " 108856.803440 | \n",
+ " 108480.184567 | \n",
+ " 107779.740122 | \n",
+ " 108144.520025 | \n",
+ " 108602.309747 | \n",
+ " 108934.388219 | \n",
+ " 108796.035251 | \n",
+ " 108656.736812 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
895 rows × 311 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_homes"
+ }
+ },
+ "metadata": {},
+ "execution_count": 5
+ }
+ ],
+ "source": [
+ "df_homes=pd.read_csv(\"https://files.zillowstatic.com/research/public_csvs/zhvi/Metro_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv/")\n",
+ "df_homes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4476553a-1f93-4259-87df-69670ef486c0",
+ "metadata": {
+ "id": "4476553a-1f93-4259-87df-69670ef486c0"
+ },
+ "source": [
+ "**6.** Save the DataFrame, df_homes, to a local CSV file, \"zillow_home_data.csv\". "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "1d237615-d23c-4f62-a0b8-c709deb95647",
+ "metadata": {
+ "id": "1d237615-d23c-4f62-a0b8-c709deb95647"
+ },
+ "outputs": [],
+ "source": [
+ "df_homes.to_csv(\"zillow_home_data.csv\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "182e0039-63dc-49df-9413-7eede1453fe0",
+ "metadata": {
+ "id": "182e0039-63dc-49df-9413-7eede1453fe0"
+ },
+ "source": [
+ "**7.** Load zillow_home_data.csv back into a new Dataframe, df_homes_2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "53665adb-adb2-462f-a244-01665182e870",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 547
+ },
+ "id": "53665adb-adb2-462f-a244-01665182e870",
+ "outputId": "c29f8c39-d52f-44df-b4c5-109989fe8fc9"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " Unnamed: 0 RegionID SizeRank RegionName RegionType StateName \\\n",
+ "0 0 102001 0 United States country NaN \n",
+ "1 1 394913 1 New York, NY msa NY \n",
+ "2 2 753899 2 Los Angeles, CA msa CA \n",
+ "3 3 394463 3 Chicago, IL msa IL \n",
+ "4 4 394514 4 Dallas, TX msa TX \n",
+ ".. ... ... ... ... ... ... \n",
+ "890 890 753929 935 Zapata, TX msa TX \n",
+ "891 891 394743 936 Ketchikan, AK msa AK \n",
+ "892 892 753874 937 Craig, CO msa CO \n",
+ "893 893 395188 938 Vernon, TX msa TX \n",
+ "894 894 394767 939 Lamesa, TX msa TX \n",
+ "\n",
+ " 2000-01-31 2000-02-29 2000-03-31 2000-04-30 ... \\\n",
+ "0 124952.583264 125172.414915 125445.038530 126029.762672 ... \n",
+ "1 224104.107786 225056.423800 226017.559977 227965.193784 ... \n",
+ "2 228441.264620 229291.355968 230423.669537 232676.593166 ... \n",
+ "3 159204.293651 159351.670450 159632.352157 160330.618493 ... \n",
+ "4 130776.117982 130834.366121 130901.328321 131075.222120 ... \n",
+ ".. ... ... ... ... ... \n",
+ "890 NaN NaN NaN NaN ... \n",
+ "891 NaN NaN NaN NaN ... \n",
+ "892 99313.814620 99567.766989 100040.559776 100713.860374 ... \n",
+ "893 NaN NaN NaN NaN ... \n",
+ "894 NaN NaN NaN NaN ... \n",
+ "\n",
+ " 2024-09-30 2024-10-31 2024-11-30 2024-12-31 \\\n",
+ "0 368342.550149 368944.348980 369448.501052 370184.720224 \n",
+ "1 694463.939098 698112.907260 700981.582357 702828.956760 \n",
+ "2 979263.294758 984668.937931 989326.464086 993489.580771 \n",
+ "3 336401.662700 337417.542386 338496.548214 339792.563198 \n",
+ "4 386210.886078 386101.104828 385802.176509 385258.927239 \n",
+ ".. ... ... ... ... \n",
+ "890 142519.346796 141543.788987 140694.812207 140081.758149 \n",
+ "891 384645.541128 386527.958063 387033.635531 387415.739710 \n",
+ "892 290338.697990 292045.315574 293275.268519 294341.950896 \n",
+ "893 113880.850440 111889.316393 109490.077942 107891.774574 \n",
+ "894 109418.481275 109429.692091 108856.803440 108480.184567 \n",
+ "\n",
+ " 2025-01-31 2025-02-28 2025-03-31 2025-04-30 \\\n",
+ "0 370828.567353 371298.260498 371016.982722 370477.532421 \n",
+ "1 703938.611027 705266.111170 707009.440660 709510.127516 \n",
+ "2 994044.173013 991951.863479 986697.459794 982246.652053 \n",
+ "3 341188.253115 342585.506509 343463.210750 344086.738417 \n",
+ "4 384716.914401 383910.093012 382370.526648 380191.067291 \n",
+ ".. ... ... ... ... \n",
+ "890 139591.033021 139060.037085 138296.346480 136113.475153 \n",
+ "891 387896.552064 389139.536436 389956.139487 391704.282777 \n",
+ "892 294625.438545 295400.972873 295720.152107 297001.710359 \n",
+ "893 106725.247139 105661.977276 104405.236538 102838.777165 \n",
+ "894 107779.740122 108144.520025 108602.309747 108934.388219 \n",
+ "\n",
+ " 2025-05-31 2025-06-30 \n",
+ "0 369777.238792 369146.758903 \n",
+ "1 711326.861714 712842.475773 \n",
+ "2 977325.621835 972836.618922 \n",
+ "3 344388.252982 344762.755307 \n",
+ "4 377715.452411 375293.228730 \n",
+ ".. ... ... \n",
+ "890 133730.331027 131478.742029 \n",
+ "891 394878.913726 399233.112133 \n",
+ "892 298402.180459 299467.780061 \n",
+ "893 101577.252143 100654.840043 \n",
+ "894 108796.035251 108656.736812 \n",
+ "\n",
+ "[895 rows x 312 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " RegionID | \n",
+ " SizeRank | \n",
+ " RegionName | \n",
+ " RegionType | \n",
+ " StateName | \n",
+ " 2000-01-31 | \n",
+ " 2000-02-29 | \n",
+ " 2000-03-31 | \n",
+ " 2000-04-30 | \n",
+ " ... | \n",
+ " 2024-09-30 | \n",
+ " 2024-10-31 | \n",
+ " 2024-11-30 | \n",
+ " 2024-12-31 | \n",
+ " 2025-01-31 | \n",
+ " 2025-02-28 | \n",
+ " 2025-03-31 | \n",
+ " 2025-04-30 | \n",
+ " 2025-05-31 | \n",
+ " 2025-06-30 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ " 102001 | \n",
+ " 0 | \n",
+ " United States | \n",
+ " country | \n",
+ " NaN | \n",
+ " 124952.583264 | \n",
+ " 125172.414915 | \n",
+ " 125445.038530 | \n",
+ " 126029.762672 | \n",
+ " ... | \n",
+ " 368342.550149 | \n",
+ " 368944.348980 | \n",
+ " 369448.501052 | \n",
+ " 370184.720224 | \n",
+ " 370828.567353 | \n",
+ " 371298.260498 | \n",
+ " 371016.982722 | \n",
+ " 370477.532421 | \n",
+ " 369777.238792 | \n",
+ " 369146.758903 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " 394913 | \n",
+ " 1 | \n",
+ " New York, NY | \n",
+ " msa | \n",
+ " NY | \n",
+ " 224104.107786 | \n",
+ " 225056.423800 | \n",
+ " 226017.559977 | \n",
+ " 227965.193784 | \n",
+ " ... | \n",
+ " 694463.939098 | \n",
+ " 698112.907260 | \n",
+ " 700981.582357 | \n",
+ " 702828.956760 | \n",
+ " 703938.611027 | \n",
+ " 705266.111170 | \n",
+ " 707009.440660 | \n",
+ " 709510.127516 | \n",
+ " 711326.861714 | \n",
+ " 712842.475773 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2 | \n",
+ " 753899 | \n",
+ " 2 | \n",
+ " Los Angeles, CA | \n",
+ " msa | \n",
+ " CA | \n",
+ " 228441.264620 | \n",
+ " 229291.355968 | \n",
+ " 230423.669537 | \n",
+ " 232676.593166 | \n",
+ " ... | \n",
+ " 979263.294758 | \n",
+ " 984668.937931 | \n",
+ " 989326.464086 | \n",
+ " 993489.580771 | \n",
+ " 994044.173013 | \n",
+ " 991951.863479 | \n",
+ " 986697.459794 | \n",
+ " 982246.652053 | \n",
+ " 977325.621835 | \n",
+ " 972836.618922 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 3 | \n",
+ " 394463 | \n",
+ " 3 | \n",
+ " Chicago, IL | \n",
+ " msa | \n",
+ " IL | \n",
+ " 159204.293651 | \n",
+ " 159351.670450 | \n",
+ " 159632.352157 | \n",
+ " 160330.618493 | \n",
+ " ... | \n",
+ " 336401.662700 | \n",
+ " 337417.542386 | \n",
+ " 338496.548214 | \n",
+ " 339792.563198 | \n",
+ " 341188.253115 | \n",
+ " 342585.506509 | \n",
+ " 343463.210750 | \n",
+ " 344086.738417 | \n",
+ " 344388.252982 | \n",
+ " 344762.755307 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 4 | \n",
+ " 394514 | \n",
+ " 4 | \n",
+ " Dallas, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " 130776.117982 | \n",
+ " 130834.366121 | \n",
+ " 130901.328321 | \n",
+ " 131075.222120 | \n",
+ " ... | \n",
+ " 386210.886078 | \n",
+ " 386101.104828 | \n",
+ " 385802.176509 | \n",
+ " 385258.927239 | \n",
+ " 384716.914401 | \n",
+ " 383910.093012 | \n",
+ " 382370.526648 | \n",
+ " 380191.067291 | \n",
+ " 377715.452411 | \n",
+ " 375293.228730 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 890 | \n",
+ " 890 | \n",
+ " 753929 | \n",
+ " 935 | \n",
+ " Zapata, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 142519.346796 | \n",
+ " 141543.788987 | \n",
+ " 140694.812207 | \n",
+ " 140081.758149 | \n",
+ " 139591.033021 | \n",
+ " 139060.037085 | \n",
+ " 138296.346480 | \n",
+ " 136113.475153 | \n",
+ " 133730.331027 | \n",
+ " 131478.742029 | \n",
+ "
\n",
+ " \n",
+ " | 891 | \n",
+ " 891 | \n",
+ " 394743 | \n",
+ " 936 | \n",
+ " Ketchikan, AK | \n",
+ " msa | \n",
+ " AK | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 384645.541128 | \n",
+ " 386527.958063 | \n",
+ " 387033.635531 | \n",
+ " 387415.739710 | \n",
+ " 387896.552064 | \n",
+ " 389139.536436 | \n",
+ " 389956.139487 | \n",
+ " 391704.282777 | \n",
+ " 394878.913726 | \n",
+ " 399233.112133 | \n",
+ "
\n",
+ " \n",
+ " | 892 | \n",
+ " 892 | \n",
+ " 753874 | \n",
+ " 937 | \n",
+ " Craig, CO | \n",
+ " msa | \n",
+ " CO | \n",
+ " 99313.814620 | \n",
+ " 99567.766989 | \n",
+ " 100040.559776 | \n",
+ " 100713.860374 | \n",
+ " ... | \n",
+ " 290338.697990 | \n",
+ " 292045.315574 | \n",
+ " 293275.268519 | \n",
+ " 294341.950896 | \n",
+ " 294625.438545 | \n",
+ " 295400.972873 | \n",
+ " 295720.152107 | \n",
+ " 297001.710359 | \n",
+ " 298402.180459 | \n",
+ " 299467.780061 | \n",
+ "
\n",
+ " \n",
+ " | 893 | \n",
+ " 893 | \n",
+ " 395188 | \n",
+ " 938 | \n",
+ " Vernon, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 113880.850440 | \n",
+ " 111889.316393 | \n",
+ " 109490.077942 | \n",
+ " 107891.774574 | \n",
+ " 106725.247139 | \n",
+ " 105661.977276 | \n",
+ " 104405.236538 | \n",
+ " 102838.777165 | \n",
+ " 101577.252143 | \n",
+ " 100654.840043 | \n",
+ "
\n",
+ " \n",
+ " | 894 | \n",
+ " 894 | \n",
+ " 394767 | \n",
+ " 939 | \n",
+ " Lamesa, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 109418.481275 | \n",
+ " 109429.692091 | \n",
+ " 108856.803440 | \n",
+ " 108480.184567 | \n",
+ " 107779.740122 | \n",
+ " 108144.520025 | \n",
+ " 108602.309747 | \n",
+ " 108934.388219 | \n",
+ " 108796.035251 | \n",
+ " 108656.736812 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
895 rows × 312 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_homes_2"
+ }
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ],
+ "source": [
+ "df_homes_2=pd.read_csv(\"zillow_home_data.csv\")\n",
+ "df_homes_2"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eaaa6d3b-21e6-4e4a-916a-d522f1fd60c6",
+ "metadata": {
+ "id": "eaaa6d3b-21e6-4e4a-916a-d522f1fd60c6"
+ },
+ "source": [
+ "**8.** Compare the dimensions of the two DataFrames, df_homes and df_homes_2. Are they equal? If not, how can you fix it?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "5c0a7f30-3d6a-4283-9efb-c1dc1d5d7d42",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "5c0a7f30-3d6a-4283-9efb-c1dc1d5d7d42",
+ "outputId": "d137ccb5-0de3-4c7d-fe12-299022cda9ba"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "False"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 8
+ }
+ ],
+ "source": [
+ "df_homes.shape==df_homes_2.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_homes_2.drop(columns=\"Unnamed: 0\",inplace=True)\n",
+ "df_homes_2"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 530
+ },
+ "id": "TBJG6UW1-3RS",
+ "outputId": "975c870e-7dbc-4f06-b2e4-d8003207d7e4"
+ },
+ "id": "TBJG6UW1-3RS",
+ "execution_count": 9,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " RegionID SizeRank RegionName RegionType StateName 2000-01-31 \\\n",
+ "0 102001 0 United States country NaN 124952.583264 \n",
+ "1 394913 1 New York, NY msa NY 224104.107786 \n",
+ "2 753899 2 Los Angeles, CA msa CA 228441.264620 \n",
+ "3 394463 3 Chicago, IL msa IL 159204.293651 \n",
+ "4 394514 4 Dallas, TX msa TX 130776.117982 \n",
+ ".. ... ... ... ... ... ... \n",
+ "890 753929 935 Zapata, TX msa TX NaN \n",
+ "891 394743 936 Ketchikan, AK msa AK NaN \n",
+ "892 753874 937 Craig, CO msa CO 99313.814620 \n",
+ "893 395188 938 Vernon, TX msa TX NaN \n",
+ "894 394767 939 Lamesa, TX msa TX NaN \n",
+ "\n",
+ " 2000-02-29 2000-03-31 2000-04-30 2000-05-31 ... \\\n",
+ "0 125172.414915 125445.038530 126029.762672 126702.710055 ... \n",
+ "1 225056.423800 226017.559977 227965.193784 229982.036039 ... \n",
+ "2 229291.355968 230423.669537 232676.593166 235140.585322 ... \n",
+ "3 159351.670450 159632.352157 160330.618493 161170.548906 ... \n",
+ "4 130834.366121 130901.328321 131075.222120 131304.496332 ... \n",
+ ".. ... ... ... ... ... \n",
+ "890 NaN NaN NaN NaN ... \n",
+ "891 NaN NaN NaN NaN ... \n",
+ "892 99567.766989 100040.559776 100713.860374 101496.281760 ... \n",
+ "893 NaN NaN NaN NaN ... \n",
+ "894 NaN NaN NaN NaN ... \n",
+ "\n",
+ " 2024-09-30 2024-10-31 2024-11-30 2024-12-31 \\\n",
+ "0 368342.550149 368944.348980 369448.501052 370184.720224 \n",
+ "1 694463.939098 698112.907260 700981.582357 702828.956760 \n",
+ "2 979263.294758 984668.937931 989326.464086 993489.580771 \n",
+ "3 336401.662700 337417.542386 338496.548214 339792.563198 \n",
+ "4 386210.886078 386101.104828 385802.176509 385258.927239 \n",
+ ".. ... ... ... ... \n",
+ "890 142519.346796 141543.788987 140694.812207 140081.758149 \n",
+ "891 384645.541128 386527.958063 387033.635531 387415.739710 \n",
+ "892 290338.697990 292045.315574 293275.268519 294341.950896 \n",
+ "893 113880.850440 111889.316393 109490.077942 107891.774574 \n",
+ "894 109418.481275 109429.692091 108856.803440 108480.184567 \n",
+ "\n",
+ " 2025-01-31 2025-02-28 2025-03-31 2025-04-30 \\\n",
+ "0 370828.567353 371298.260498 371016.982722 370477.532421 \n",
+ "1 703938.611027 705266.111170 707009.440660 709510.127516 \n",
+ "2 994044.173013 991951.863479 986697.459794 982246.652053 \n",
+ "3 341188.253115 342585.506509 343463.210750 344086.738417 \n",
+ "4 384716.914401 383910.093012 382370.526648 380191.067291 \n",
+ ".. ... ... ... ... \n",
+ "890 139591.033021 139060.037085 138296.346480 136113.475153 \n",
+ "891 387896.552064 389139.536436 389956.139487 391704.282777 \n",
+ "892 294625.438545 295400.972873 295720.152107 297001.710359 \n",
+ "893 106725.247139 105661.977276 104405.236538 102838.777165 \n",
+ "894 107779.740122 108144.520025 108602.309747 108934.388219 \n",
+ "\n",
+ " 2025-05-31 2025-06-30 \n",
+ "0 369777.238792 369146.758903 \n",
+ "1 711326.861714 712842.475773 \n",
+ "2 977325.621835 972836.618922 \n",
+ "3 344388.252982 344762.755307 \n",
+ "4 377715.452411 375293.228730 \n",
+ ".. ... ... \n",
+ "890 133730.331027 131478.742029 \n",
+ "891 394878.913726 399233.112133 \n",
+ "892 298402.180459 299467.780061 \n",
+ "893 101577.252143 100654.840043 \n",
+ "894 108796.035251 108656.736812 \n",
+ "\n",
+ "[895 rows x 311 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " RegionID | \n",
+ " SizeRank | \n",
+ " RegionName | \n",
+ " RegionType | \n",
+ " StateName | \n",
+ " 2000-01-31 | \n",
+ " 2000-02-29 | \n",
+ " 2000-03-31 | \n",
+ " 2000-04-30 | \n",
+ " 2000-05-31 | \n",
+ " ... | \n",
+ " 2024-09-30 | \n",
+ " 2024-10-31 | \n",
+ " 2024-11-30 | \n",
+ " 2024-12-31 | \n",
+ " 2025-01-31 | \n",
+ " 2025-02-28 | \n",
+ " 2025-03-31 | \n",
+ " 2025-04-30 | \n",
+ " 2025-05-31 | \n",
+ " 2025-06-30 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 102001 | \n",
+ " 0 | \n",
+ " United States | \n",
+ " country | \n",
+ " NaN | \n",
+ " 124952.583264 | \n",
+ " 125172.414915 | \n",
+ " 125445.038530 | \n",
+ " 126029.762672 | \n",
+ " 126702.710055 | \n",
+ " ... | \n",
+ " 368342.550149 | \n",
+ " 368944.348980 | \n",
+ " 369448.501052 | \n",
+ " 370184.720224 | \n",
+ " 370828.567353 | \n",
+ " 371298.260498 | \n",
+ " 371016.982722 | \n",
+ " 370477.532421 | \n",
+ " 369777.238792 | \n",
+ " 369146.758903 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 394913 | \n",
+ " 1 | \n",
+ " New York, NY | \n",
+ " msa | \n",
+ " NY | \n",
+ " 224104.107786 | \n",
+ " 225056.423800 | \n",
+ " 226017.559977 | \n",
+ " 227965.193784 | \n",
+ " 229982.036039 | \n",
+ " ... | \n",
+ " 694463.939098 | \n",
+ " 698112.907260 | \n",
+ " 700981.582357 | \n",
+ " 702828.956760 | \n",
+ " 703938.611027 | \n",
+ " 705266.111170 | \n",
+ " 707009.440660 | \n",
+ " 709510.127516 | \n",
+ " 711326.861714 | \n",
+ " 712842.475773 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 753899 | \n",
+ " 2 | \n",
+ " Los Angeles, CA | \n",
+ " msa | \n",
+ " CA | \n",
+ " 228441.264620 | \n",
+ " 229291.355968 | \n",
+ " 230423.669537 | \n",
+ " 232676.593166 | \n",
+ " 235140.585322 | \n",
+ " ... | \n",
+ " 979263.294758 | \n",
+ " 984668.937931 | \n",
+ " 989326.464086 | \n",
+ " 993489.580771 | \n",
+ " 994044.173013 | \n",
+ " 991951.863479 | \n",
+ " 986697.459794 | \n",
+ " 982246.652053 | \n",
+ " 977325.621835 | \n",
+ " 972836.618922 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 394463 | \n",
+ " 3 | \n",
+ " Chicago, IL | \n",
+ " msa | \n",
+ " IL | \n",
+ " 159204.293651 | \n",
+ " 159351.670450 | \n",
+ " 159632.352157 | \n",
+ " 160330.618493 | \n",
+ " 161170.548906 | \n",
+ " ... | \n",
+ " 336401.662700 | \n",
+ " 337417.542386 | \n",
+ " 338496.548214 | \n",
+ " 339792.563198 | \n",
+ " 341188.253115 | \n",
+ " 342585.506509 | \n",
+ " 343463.210750 | \n",
+ " 344086.738417 | \n",
+ " 344388.252982 | \n",
+ " 344762.755307 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 394514 | \n",
+ " 4 | \n",
+ " Dallas, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " 130776.117982 | \n",
+ " 130834.366121 | \n",
+ " 130901.328321 | \n",
+ " 131075.222120 | \n",
+ " 131304.496332 | \n",
+ " ... | \n",
+ " 386210.886078 | \n",
+ " 386101.104828 | \n",
+ " 385802.176509 | \n",
+ " 385258.927239 | \n",
+ " 384716.914401 | \n",
+ " 383910.093012 | \n",
+ " 382370.526648 | \n",
+ " 380191.067291 | \n",
+ " 377715.452411 | \n",
+ " 375293.228730 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 890 | \n",
+ " 753929 | \n",
+ " 935 | \n",
+ " Zapata, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 142519.346796 | \n",
+ " 141543.788987 | \n",
+ " 140694.812207 | \n",
+ " 140081.758149 | \n",
+ " 139591.033021 | \n",
+ " 139060.037085 | \n",
+ " 138296.346480 | \n",
+ " 136113.475153 | \n",
+ " 133730.331027 | \n",
+ " 131478.742029 | \n",
+ "
\n",
+ " \n",
+ " | 891 | \n",
+ " 394743 | \n",
+ " 936 | \n",
+ " Ketchikan, AK | \n",
+ " msa | \n",
+ " AK | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 384645.541128 | \n",
+ " 386527.958063 | \n",
+ " 387033.635531 | \n",
+ " 387415.739710 | \n",
+ " 387896.552064 | \n",
+ " 389139.536436 | \n",
+ " 389956.139487 | \n",
+ " 391704.282777 | \n",
+ " 394878.913726 | \n",
+ " 399233.112133 | \n",
+ "
\n",
+ " \n",
+ " | 892 | \n",
+ " 753874 | \n",
+ " 937 | \n",
+ " Craig, CO | \n",
+ " msa | \n",
+ " CO | \n",
+ " 99313.814620 | \n",
+ " 99567.766989 | \n",
+ " 100040.559776 | \n",
+ " 100713.860374 | \n",
+ " 101496.281760 | \n",
+ " ... | \n",
+ " 290338.697990 | \n",
+ " 292045.315574 | \n",
+ " 293275.268519 | \n",
+ " 294341.950896 | \n",
+ " 294625.438545 | \n",
+ " 295400.972873 | \n",
+ " 295720.152107 | \n",
+ " 297001.710359 | \n",
+ " 298402.180459 | \n",
+ " 299467.780061 | \n",
+ "
\n",
+ " \n",
+ " | 893 | \n",
+ " 395188 | \n",
+ " 938 | \n",
+ " Vernon, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 113880.850440 | \n",
+ " 111889.316393 | \n",
+ " 109490.077942 | \n",
+ " 107891.774574 | \n",
+ " 106725.247139 | \n",
+ " 105661.977276 | \n",
+ " 104405.236538 | \n",
+ " 102838.777165 | \n",
+ " 101577.252143 | \n",
+ " 100654.840043 | \n",
+ "
\n",
+ " \n",
+ " | 894 | \n",
+ " 394767 | \n",
+ " 939 | \n",
+ " Lamesa, TX | \n",
+ " msa | \n",
+ " TX | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " ... | \n",
+ " 109418.481275 | \n",
+ " 109429.692091 | \n",
+ " 108856.803440 | \n",
+ " 108480.184567 | \n",
+ " 107779.740122 | \n",
+ " 108144.520025 | \n",
+ " 108602.309747 | \n",
+ " 108934.388219 | \n",
+ " 108796.035251 | \n",
+ " 108656.736812 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
895 rows × 311 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_homes_2"
+ }
+ },
+ "metadata": {},
+ "execution_count": 9
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0f34b104-fc3a-492c-964b-45daa12850b0",
+ "metadata": {
+ "id": "0f34b104-fc3a-492c-964b-45daa12850b0"
+ },
+ "source": [
+ "**9.** A remote spreadsheet showing how a snapshot of how traffic increased for a hypothetical website is available here: https://github.com/CodeSolid/CodeSolid.github.io/raw/main/booksource/data/AnalyticsSnapshot.xlsx. Load the worksheet page of the spreasheet data labelled \"February 2022\" as a DataFrame named \"feb\". Note: the leftmost column in the spreadsheet is the index column."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "7b8772dd-5d19-4510-a725-80c7273ce61b",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 143
+ },
+ "id": "7b8772dd-5d19-4510-a725-80c7273ce61b",
+ "outputId": "1122f1b2-d026-4d18-e6f1-dfc9d4640380"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " This Month Last Month Month to Month Increase\n",
+ "Users 1800 280 5.428571\n",
+ "New Users 1700 298 4.704698\n",
+ "Page Views 2534 436 4.811927"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " This Month | \n",
+ " Last Month | \n",
+ " Month to Month Increase | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Users | \n",
+ " 1800 | \n",
+ " 280 | \n",
+ " 5.428571 | \n",
+ "
\n",
+ " \n",
+ " | New Users | \n",
+ " 1700 | \n",
+ " 298 | \n",
+ " 4.704698 | \n",
+ "
\n",
+ " \n",
+ " | Page Views | \n",
+ " 2534 | \n",
+ " 436 | \n",
+ " 4.811927 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "feb",
+ "summary": "{\n \"name\": \"feb\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"This Month\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 455,\n \"min\": 1700,\n \"max\": 2534,\n \"num_unique_values\": 3,\n \"samples\": [\n 1800,\n 1700,\n 2534\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Last Month\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 85,\n \"min\": 280,\n \"max\": 436,\n \"num_unique_values\": 3,\n \"samples\": [\n 280,\n 298,\n 436\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Month to Month Increase\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39067069988637804,\n \"min\": 4.704697987,\n \"max\": 5.428571429,\n \"num_unique_values\": 3,\n \"samples\": [\n 5.428571429,\n 4.704697987,\n 4.811926606\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 10
+ }
+ ],
+ "source": [
+ "feb=pd.read_excel(\"https://github.com/CodeSolid/CodeSolid.github.io/raw/main/booksource/data/AnalyticsSnapshot.xlsx/",\"February 2022\",index_col=0)\n",
+ "feb"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ad366534-b168-490c-a55d-654f3ef44288",
+ "metadata": {
+ "id": "ad366534-b168-490c-a55d-654f3ef44288"
+ },
+ "source": [
+ "**10.** The \"Month to Month Increase\" column is a bit hard to understand, so ignore it for now. Given the values for \"This Month\" and \"Last Month\", create a new column, \"Percentage Increase\"."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "d053f773-c9a9-4592-aaaa-660ae8e189e5",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 143
+ },
+ "id": "d053f773-c9a9-4592-aaaa-660ae8e189e5",
+ "outputId": "18c91f8f-49fd-4776-b36c-7901e93b9809"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " This Month Last Month Month to Month Increase \\\n",
+ "Users 1800 280 5.428571 \n",
+ "New Users 1700 298 4.704698 \n",
+ "Page Views 2534 436 4.811927 \n",
+ "\n",
+ " Percentage Increase \n",
+ "Users 542.857143 \n",
+ "New Users 470.469799 \n",
+ "Page Views 481.192661 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " This Month | \n",
+ " Last Month | \n",
+ " Month to Month Increase | \n",
+ " Percentage Increase | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Users | \n",
+ " 1800 | \n",
+ " 280 | \n",
+ " 5.428571 | \n",
+ " 542.857143 | \n",
+ "
\n",
+ " \n",
+ " | New Users | \n",
+ " 1700 | \n",
+ " 298 | \n",
+ " 4.704698 | \n",
+ " 470.469799 | \n",
+ "
\n",
+ " \n",
+ " | Page Views | \n",
+ " 2534 | \n",
+ " 436 | \n",
+ " 4.811927 | \n",
+ " 481.192661 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "feb",
+ "summary": "{\n \"name\": \"feb\",\n \"rows\": 3,\n \"fields\": [\n {\n \"column\": \"This Month\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 455,\n \"min\": 1700,\n \"max\": 2534,\n \"num_unique_values\": 3,\n \"samples\": [\n 1800,\n 1700,\n 2534\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Last Month\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 85,\n \"min\": 280,\n \"max\": 436,\n \"num_unique_values\": 3,\n \"samples\": [\n 280,\n 298,\n 436\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Month to Month Increase\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.39067069988637804,\n \"min\": 4.704697987,\n \"max\": 5.428571429,\n \"num_unique_values\": 3,\n \"samples\": [\n 5.428571429,\n 4.704697987,\n 4.811926606\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Percentage Increase\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 39.067069989886505,\n \"min\": 470.4697986577181,\n \"max\": 542.8571428571429,\n \"num_unique_values\": 3,\n \"samples\": [\n 542.8571428571429,\n 470.4697986577181,\n 481.19266055045875\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 11
+ }
+ ],
+ "source": [
+ "febf=feb.copy()\n",
+ "feb[\"Percentage Increase\"]=((feb[\"This Month\"]-feb[\"Last Month\"])/feb[\"Last Month\"])*100\n",
+ "feb"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8a71222f-2ab4-47bb-806d-2610e25b3a91",
+ "metadata": {
+ "id": "8a71222f-2ab4-47bb-806d-2610e25b3a91"
+ },
+ "source": [
+ "## Basic Operations on Data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0b9cf32b-3132-40cc-a5ca-26d6e6a36e4b",
+ "metadata": {
+ "id": "0b9cf32b-3132-40cc-a5ca-26d6e6a36e4b"
+ },
+ "source": [
+ "**11.** Using Seaborn, get a dataset about penguins into a dataframe named \"df_penguins\". Note that because all of the following questions depend on this example, we'll provide the solution here so no one gets stuck:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "a8b68caf-a998-414a-9de7-899eae7213c7",
+ "metadata": {
+ "id": "a8b68caf-a998-414a-9de7-899eae7213c7"
+ },
+ "outputs": [],
+ "source": [
+ "df_penguins = sb.load_dataset('penguins')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f7170135-17bd-45e2-9239-dad647ed6eaf",
+ "metadata": {
+ "id": "f7170135-17bd-45e2-9239-dad647ed6eaf"
+ },
+ "source": [
+ "**12.** Write the code to show the the number of rows and columns in df_penguins"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "6f565afb-d5f4-462d-b3e5-946bc91bd83e",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "6f565afb-d5f4-462d-b3e5-946bc91bd83e",
+ "outputId": "9fec84d2-7019-4e57-d864-f237226132d1"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "(344, 7)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 13
+ }
+ ],
+ "source": [
+ "df_penguins.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9d3396f3-3fe6-4571-83b2-1e70f8a11513",
+ "metadata": {
+ "id": "9d3396f3-3fe6-4571-83b2-1e70f8a11513"
+ },
+ "source": [
+ "**13.** How might you show the first few rows of df_penguins?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "68d5946e-5011-4735-983e-7485f44a60f2",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "68d5946e-5011-4735-983e-7485f44a60f2",
+ "outputId": "815d0f37-803a-44aa-b87e-8dd985d1831d"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
+ "0 Adelie Torgersen 39.1 18.7 181.0 \n",
+ "1 Adelie Torgersen 39.5 17.4 186.0 \n",
+ "2 Adelie Torgersen 40.3 18.0 195.0 \n",
+ "3 Adelie Torgersen NaN NaN NaN \n",
+ "4 Adelie Torgersen 36.7 19.3 193.0 \n",
+ "\n",
+ " body_mass_g sex \n",
+ "0 3750.0 Male \n",
+ "1 3800.0 Female \n",
+ "2 3250.0 Female \n",
+ "3 NaN NaN \n",
+ "4 3450.0 Female "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " island | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.1 | \n",
+ " 18.7 | \n",
+ " 181.0 | \n",
+ " 3750.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.5 | \n",
+ " 17.4 | \n",
+ " 186.0 | \n",
+ " 3800.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 40.3 | \n",
+ " 18.0 | \n",
+ " 195.0 | \n",
+ " 3250.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 36.7 | \n",
+ " 19.3 | \n",
+ " 193.0 | \n",
+ " 3450.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_penguins",
+ "summary": "{\n \"name\": \"df_penguins\",\n \"rows\": 344,\n \"fields\": [\n {\n \"column\": \"species\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Adelie\",\n \"Chinstrap\",\n \"Gentoo\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"island\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Torgersen\",\n \"Biscoe\",\n \"Dream\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.459583713926532,\n \"min\": 32.1,\n \"max\": 59.6,\n \"num_unique_values\": 164,\n \"samples\": [\n 48.2,\n 49.8,\n 45.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_depth_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.9747931568167816,\n \"min\": 13.1,\n \"max\": 21.5,\n \"num_unique_values\": 80,\n \"samples\": [\n 16.9,\n 18.7,\n 18.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"flipper_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.061713679356894,\n \"min\": 172.0,\n \"max\": 231.0,\n \"num_unique_values\": 55,\n \"samples\": [\n 201.0,\n 180.0,\n 212.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"body_mass_g\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 801.9545356980954,\n \"min\": 2700.0,\n \"max\": 6300.0,\n \"num_unique_values\": 94,\n \"samples\": [\n 4350.0,\n 4150.0,\n 3525.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Female\",\n \"Male\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 14
+ }
+ ],
+ "source": [
+ "df_penguins.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c841930d-e235-468a-809d-8427f8739076",
+ "metadata": {
+ "id": "c841930d-e235-468a-809d-8427f8739076"
+ },
+ "source": [
+ "**14.** How can you return the unique species of penguins from df_penguins? How many unique species are there?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "130ed9bd-397b-4504-bf26-6b0dc716ba8b",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "130ed9bd-397b-4504-bf26-6b0dc716ba8b",
+ "outputId": "82368d39-5c72-4251-9a97-43ccb3022c61"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "array(['Adelie', 'Chinstrap', 'Gentoo'], dtype=object)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 15
+ }
+ ],
+ "source": [
+ "df_penguins[\"species\"].unique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3912a3df-d931-4d74-a6fa-34ad11f83f94",
+ "metadata": {
+ "id": "3912a3df-d931-4d74-a6fa-34ad11f83f94"
+ },
+ "source": [
+ "**15.** What function can we use to drop the rows that have missing data?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "436cf3d3-db4a-42e3-81d3-9f2df28265f1",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 423
+ },
+ "id": "436cf3d3-db4a-42e3-81d3-9f2df28265f1",
+ "outputId": "2518df98-db8e-4634-f3ca-d059f2a11321"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
+ "0 Adelie Torgersen 39.1 18.7 181.0 \n",
+ "1 Adelie Torgersen 39.5 17.4 186.0 \n",
+ "2 Adelie Torgersen 40.3 18.0 195.0 \n",
+ "4 Adelie Torgersen 36.7 19.3 193.0 \n",
+ "5 Adelie Torgersen 39.3 20.6 190.0 \n",
+ ".. ... ... ... ... ... \n",
+ "338 Gentoo Biscoe 47.2 13.7 214.0 \n",
+ "340 Gentoo Biscoe 46.8 14.3 215.0 \n",
+ "341 Gentoo Biscoe 50.4 15.7 222.0 \n",
+ "342 Gentoo Biscoe 45.2 14.8 212.0 \n",
+ "343 Gentoo Biscoe 49.9 16.1 213.0 \n",
+ "\n",
+ " body_mass_g sex \n",
+ "0 3750.0 Male \n",
+ "1 3800.0 Female \n",
+ "2 3250.0 Female \n",
+ "4 3450.0 Female \n",
+ "5 3650.0 Male \n",
+ ".. ... ... \n",
+ "338 4925.0 Female \n",
+ "340 4850.0 Female \n",
+ "341 5750.0 Male \n",
+ "342 5200.0 Female \n",
+ "343 5400.0 Male \n",
+ "\n",
+ "[333 rows x 7 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " island | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.1 | \n",
+ " 18.7 | \n",
+ " 181.0 | \n",
+ " 3750.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.5 | \n",
+ " 17.4 | \n",
+ " 186.0 | \n",
+ " 3800.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 40.3 | \n",
+ " 18.0 | \n",
+ " 195.0 | \n",
+ " 3250.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 36.7 | \n",
+ " 19.3 | \n",
+ " 193.0 | \n",
+ " 3450.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.3 | \n",
+ " 20.6 | \n",
+ " 190.0 | \n",
+ " 3650.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 338 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 47.2 | \n",
+ " 13.7 | \n",
+ " 214.0 | \n",
+ " 4925.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 340 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 46.8 | \n",
+ " 14.3 | \n",
+ " 215.0 | \n",
+ " 4850.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 341 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 50.4 | \n",
+ " 15.7 | \n",
+ " 222.0 | \n",
+ " 5750.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 342 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 45.2 | \n",
+ " 14.8 | \n",
+ " 212.0 | \n",
+ " 5200.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 343 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 49.9 | \n",
+ " 16.1 | \n",
+ " 213.0 | \n",
+ " 5400.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
333 rows × 7 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_penguins\",\n \"rows\": 333,\n \"fields\": [\n {\n \"column\": \"species\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Adelie\",\n \"Chinstrap\",\n \"Gentoo\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"island\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Torgersen\",\n \"Biscoe\",\n \"Dream\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.468668342647562,\n \"min\": 32.1,\n \"max\": 59.6,\n \"num_unique_values\": 163,\n \"samples\": [\n 59.6,\n 48.1,\n 46.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_depth_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.9692354633199,\n \"min\": 13.1,\n \"max\": 21.5,\n \"num_unique_values\": 79,\n \"samples\": [\n 19.5,\n 18.7,\n 17.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"flipper_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.015765288287882,\n \"min\": 172.0,\n \"max\": 231.0,\n \"num_unique_values\": 54,\n \"samples\": [\n 188.0,\n 231.0,\n 224.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"body_mass_g\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 805.2158019428966,\n \"min\": 2700.0,\n \"max\": 6300.0,\n \"num_unique_values\": 93,\n \"samples\": [\n 4725.0,\n 3100.0,\n 3575.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Female\",\n \"Male\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 16
+ }
+ ],
+ "source": [
+ "df_penguins.dropna()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_penguins"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 423
+ },
+ "id": "DLTClqrvA5W3",
+ "outputId": "2ba7c998-9955-4e08-cf5a-60700cad0859"
+ },
+ "id": "DLTClqrvA5W3",
+ "execution_count": 17,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
+ "0 Adelie Torgersen 39.1 18.7 181.0 \n",
+ "1 Adelie Torgersen 39.5 17.4 186.0 \n",
+ "2 Adelie Torgersen 40.3 18.0 195.0 \n",
+ "3 Adelie Torgersen NaN NaN NaN \n",
+ "4 Adelie Torgersen 36.7 19.3 193.0 \n",
+ ".. ... ... ... ... ... \n",
+ "339 Gentoo Biscoe NaN NaN NaN \n",
+ "340 Gentoo Biscoe 46.8 14.3 215.0 \n",
+ "341 Gentoo Biscoe 50.4 15.7 222.0 \n",
+ "342 Gentoo Biscoe 45.2 14.8 212.0 \n",
+ "343 Gentoo Biscoe 49.9 16.1 213.0 \n",
+ "\n",
+ " body_mass_g sex \n",
+ "0 3750.0 Male \n",
+ "1 3800.0 Female \n",
+ "2 3250.0 Female \n",
+ "3 NaN NaN \n",
+ "4 3450.0 Female \n",
+ ".. ... ... \n",
+ "339 NaN NaN \n",
+ "340 4850.0 Female \n",
+ "341 5750.0 Male \n",
+ "342 5200.0 Female \n",
+ "343 5400.0 Male \n",
+ "\n",
+ "[344 rows x 7 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " island | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.1 | \n",
+ " 18.7 | \n",
+ " 181.0 | \n",
+ " 3750.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.5 | \n",
+ " 17.4 | \n",
+ " 186.0 | \n",
+ " 3800.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 40.3 | \n",
+ " 18.0 | \n",
+ " 195.0 | \n",
+ " 3250.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 36.7 | \n",
+ " 19.3 | \n",
+ " 193.0 | \n",
+ " 3450.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 339 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " | 340 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 46.8 | \n",
+ " 14.3 | \n",
+ " 215.0 | \n",
+ " 4850.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 341 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 50.4 | \n",
+ " 15.7 | \n",
+ " 222.0 | \n",
+ " 5750.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 342 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 45.2 | \n",
+ " 14.8 | \n",
+ " 212.0 | \n",
+ " 5200.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 343 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 49.9 | \n",
+ " 16.1 | \n",
+ " 213.0 | \n",
+ " 5400.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
344 rows × 7 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_penguins",
+ "summary": "{\n \"name\": \"df_penguins\",\n \"rows\": 344,\n \"fields\": [\n {\n \"column\": \"species\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Adelie\",\n \"Chinstrap\",\n \"Gentoo\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"island\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Torgersen\",\n \"Biscoe\",\n \"Dream\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.459583713926532,\n \"min\": 32.1,\n \"max\": 59.6,\n \"num_unique_values\": 164,\n \"samples\": [\n 48.2,\n 49.8,\n 45.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_depth_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.9747931568167816,\n \"min\": 13.1,\n \"max\": 21.5,\n \"num_unique_values\": 80,\n \"samples\": [\n 16.9,\n 18.7,\n 18.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"flipper_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.061713679356894,\n \"min\": 172.0,\n \"max\": 231.0,\n \"num_unique_values\": 55,\n \"samples\": [\n 201.0,\n 180.0,\n 212.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"body_mass_g\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 801.9545356980954,\n \"min\": 2700.0,\n \"max\": 6300.0,\n \"num_unique_values\": 94,\n \"samples\": [\n 4350.0,\n 4150.0,\n 3525.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Female\",\n \"Male\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 17
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f1feab44-7136-4736-a11c-ee005fdb0698",
+ "metadata": {
+ "id": "f1feab44-7136-4736-a11c-ee005fdb0698"
+ },
+ "source": [
+ "**16.** By default, will this modify df_penguins or will it return a copy?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8b0981bc-d8b1-42a5-9df6-e2723f61013c",
+ "metadata": {
+ "id": "8b0981bc-d8b1-42a5-9df6-e2723f61013c"
+ },
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7f35c024-ac5c-43e0-8ea7-3e03953f644b",
+ "metadata": {
+ "id": "7f35c024-ac5c-43e0-8ea7-3e03953f644b"
+ },
+ "source": [
+ "**17.** How can we override the default?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3f62e100-1000-40df-b21b-6af919fbb945",
+ "metadata": {
+ "id": "3f62e100-1000-40df-b21b-6af919fbb945"
+ },
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a17f9c69-c8e2-4331-b9c7-c5be6a13a968",
+ "metadata": {
+ "id": "a17f9c69-c8e2-4331-b9c7-c5be6a13a968"
+ },
+ "source": [
+ "**18.** Create a new DataFrame, df_penguins_full, with the missing data deleted."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "f160094c-ba0c-4fe1-96f0-d6b23e8162c9",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 423
+ },
+ "id": "f160094c-ba0c-4fe1-96f0-d6b23e8162c9",
+ "outputId": "75a67717-d06a-47e6-d654-88cb75753985"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
+ "0 Adelie Torgersen 39.1 18.7 181.0 \n",
+ "1 Adelie Torgersen 39.5 17.4 186.0 \n",
+ "2 Adelie Torgersen 40.3 18.0 195.0 \n",
+ "4 Adelie Torgersen 36.7 19.3 193.0 \n",
+ "5 Adelie Torgersen 39.3 20.6 190.0 \n",
+ ".. ... ... ... ... ... \n",
+ "338 Gentoo Biscoe 47.2 13.7 214.0 \n",
+ "340 Gentoo Biscoe 46.8 14.3 215.0 \n",
+ "341 Gentoo Biscoe 50.4 15.7 222.0 \n",
+ "342 Gentoo Biscoe 45.2 14.8 212.0 \n",
+ "343 Gentoo Biscoe 49.9 16.1 213.0 \n",
+ "\n",
+ " body_mass_g sex \n",
+ "0 3750.0 Male \n",
+ "1 3800.0 Female \n",
+ "2 3250.0 Female \n",
+ "4 3450.0 Female \n",
+ "5 3650.0 Male \n",
+ ".. ... ... \n",
+ "338 4925.0 Female \n",
+ "340 4850.0 Female \n",
+ "341 5750.0 Male \n",
+ "342 5200.0 Female \n",
+ "343 5400.0 Male \n",
+ "\n",
+ "[333 rows x 7 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " island | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.1 | \n",
+ " 18.7 | \n",
+ " 181.0 | \n",
+ " 3750.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.5 | \n",
+ " 17.4 | \n",
+ " 186.0 | \n",
+ " 3800.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 40.3 | \n",
+ " 18.0 | \n",
+ " 195.0 | \n",
+ " 3250.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 36.7 | \n",
+ " 19.3 | \n",
+ " 193.0 | \n",
+ " 3450.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.3 | \n",
+ " 20.6 | \n",
+ " 190.0 | \n",
+ " 3650.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 338 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 47.2 | \n",
+ " 13.7 | \n",
+ " 214.0 | \n",
+ " 4925.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 340 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 46.8 | \n",
+ " 14.3 | \n",
+ " 215.0 | \n",
+ " 4850.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 341 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 50.4 | \n",
+ " 15.7 | \n",
+ " 222.0 | \n",
+ " 5750.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 342 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 45.2 | \n",
+ " 14.8 | \n",
+ " 212.0 | \n",
+ " 5200.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 343 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 49.9 | \n",
+ " 16.1 | \n",
+ " 213.0 | \n",
+ " 5400.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
333 rows × 7 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_penguins_full",
+ "summary": "{\n \"name\": \"df_penguins_full\",\n \"rows\": 333,\n \"fields\": [\n {\n \"column\": \"species\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Adelie\",\n \"Chinstrap\",\n \"Gentoo\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"island\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Torgersen\",\n \"Biscoe\",\n \"Dream\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.468668342647562,\n \"min\": 32.1,\n \"max\": 59.6,\n \"num_unique_values\": 163,\n \"samples\": [\n 59.6,\n 48.1,\n 46.3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_depth_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.9692354633199,\n \"min\": 13.1,\n \"max\": 21.5,\n \"num_unique_values\": 79,\n \"samples\": [\n 19.5,\n 18.7,\n 17.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"flipper_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.015765288287882,\n \"min\": 172.0,\n \"max\": 231.0,\n \"num_unique_values\": 54,\n \"samples\": [\n 188.0,\n 231.0,\n 224.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"body_mass_g\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 805.2158019428966,\n \"min\": 2700.0,\n \"max\": 6300.0,\n \"num_unique_values\": 93,\n \"samples\": [\n 4725.0,\n 3100.0,\n 3575.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Female\",\n \"Male\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 18
+ }
+ ],
+ "source": [
+ "df_penguins_full=df_penguins.dropna().copy()\n",
+ "df_penguins_full"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c34afa01-c0c4-40fa-b3a6-3b7c6abc58e1",
+ "metadata": {
+ "id": "c34afa01-c0c4-40fa-b3a6-3b7c6abc58e1"
+ },
+ "source": [
+ "**19.** What is the average bill length of a penguin, in millimeters, in this (df_full) data set?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "6012e669-3c97-4949-be33-3e44daf9534a",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "6012e669-3c97-4949-be33-3e44daf9534a",
+ "outputId": "3a277681-0f75-4655-adb4-32f48e18d916"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "np.float64(43.99279279279279)"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 19
+ }
+ ],
+ "source": [
+ "df_penguins_full[\"bill_length_mm\"].mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "38076367-ae05-4567-8963-12b5e0d77214",
+ "metadata": {
+ "id": "38076367-ae05-4567-8963-12b5e0d77214"
+ },
+ "source": [
+ "**20.** Which of the following is most strongly correlated with bill length? a) Body mass? b) Flipper length? c) Bill depth? Show how you arrived at the answer."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2edbbf08-8f1d-40ea-b137-cdcdbfe02787",
+ "metadata": {
+ "id": "2edbbf08-8f1d-40ea-b137-cdcdbfe02787"
+ },
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "93840bc0-eda0-48fd-b14d-4405a484c547",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 174
+ },
+ "id": "93840bc0-eda0-48fd-b14d-4405a484c547",
+ "outputId": "2eed1bff-226e-4bcb-c80b-8546b6f7856c"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " bill_length_mm bill_depth_mm flipper_length_mm \\\n",
+ "bill_length_mm 1.000000 -0.228626 0.653096 \n",
+ "bill_depth_mm -0.228626 1.000000 -0.577792 \n",
+ "flipper_length_mm 0.653096 -0.577792 1.000000 \n",
+ "body_mass_g 0.589451 -0.472016 0.872979 \n",
+ "\n",
+ " body_mass_g \n",
+ "bill_length_mm 0.589451 \n",
+ "bill_depth_mm -0.472016 \n",
+ "flipper_length_mm 0.872979 \n",
+ "body_mass_g 1.000000 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | bill_length_mm | \n",
+ " 1.000000 | \n",
+ " -0.228626 | \n",
+ " 0.653096 | \n",
+ " 0.589451 | \n",
+ "
\n",
+ " \n",
+ " | bill_depth_mm | \n",
+ " -0.228626 | \n",
+ " 1.000000 | \n",
+ " -0.577792 | \n",
+ " -0.472016 | \n",
+ "
\n",
+ " \n",
+ " | flipper_length_mm | \n",
+ " 0.653096 | \n",
+ " -0.577792 | \n",
+ " 1.000000 | \n",
+ " 0.872979 | \n",
+ "
\n",
+ " \n",
+ " | body_mass_g | \n",
+ " 0.589451 | \n",
+ " -0.472016 | \n",
+ " 0.872979 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_penguins_full\",\n \"rows\": 4,\n \"fields\": [\n {\n \"column\": \"bill_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.5203481666647599,\n \"min\": -0.22862563591303017,\n \"max\": 1.0,\n \"num_unique_values\": 4,\n \"samples\": [\n -0.22862563591303017,\n 0.5894511101769501,\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_depth_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7279034909476336,\n \"min\": -0.5777916963366738,\n \"max\": 1.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 1.0,\n -0.4720156601951401,\n -0.22862563591303017\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"flipper_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7242278204887708,\n \"min\": -0.5777916963366738,\n \"max\": 1.0,\n \"num_unique_values\": 4,\n \"samples\": [\n -0.5777916963366738,\n 0.8729788985653616,\n 0.6530956386670871\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"body_mass_g\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.6688064573983192,\n \"min\": -0.4720156601951401,\n \"max\": 1.0,\n \"num_unique_values\": 4,\n \"samples\": [\n -0.4720156601951401,\n 1.0,\n 0.5894511101769501\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 21
+ }
+ ],
+ "source": [
+ "df_penguins_full.corr()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "538ed22f-f2c0-46df-bf41-6a27246e356d",
+ "metadata": {
+ "id": "538ed22f-f2c0-46df-bf41-6a27246e356d"
+ },
+ "source": [
+ "**21.** How could you show the median flipper length, grouped by species?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "a39321c8-1822-4078-91f9-6f9a36a3bab7",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 209
+ },
+ "id": "a39321c8-1822-4078-91f9-6f9a36a3bab7",
+ "outputId": "340d18dd-28c9-46f1-e145-45ef4b990fda"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "species\n",
+ "Adelie 190.0\n",
+ "Chinstrap 196.0\n",
+ "Gentoo 216.0\n",
+ "Name: flipper_length_mm, dtype: float64"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " flipper_length_mm | \n",
+ "
\n",
+ " \n",
+ " | species | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Adelie | \n",
+ " 190.0 | \n",
+ "
\n",
+ " \n",
+ " | Chinstrap | \n",
+ " 196.0 | \n",
+ "
\n",
+ " \n",
+ " | Gentoo | \n",
+ " 216.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 22
+ }
+ ],
+ "source": [
+ "df_penguins_full.groupby(\"species\")[\"flipper_length_mm\"].median()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0d6ff0a0-f72b-47a0-a8c5-50cf2e9c8ef4",
+ "metadata": {
+ "id": "0d6ff0a0-f72b-47a0-a8c5-50cf2e9c8ef4"
+ },
+ "source": [
+ "**22.** Which species has the longest flippers?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "20f5ec8f-1580-438d-8ddb-ad2eeabed1ba",
+ "metadata": {
+ "id": "20f5ec8f-1580-438d-8ddb-ad2eeabed1ba"
+ },
+ "source": [
+ "Gentoo\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f50dacfd-a59d-4763-b8bf-004a02d8f05f",
+ "metadata": {
+ "id": "f50dacfd-a59d-4763-b8bf-004a02d8f05f"
+ },
+ "source": [
+ "**23.** Which two species have the most similar mean weight? Show how you arrived at the answer."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "be5582aa-447d-4645-85cd-072894fcbb82",
+ "metadata": {
+ "id": "be5582aa-447d-4645-85cd-072894fcbb82"
+ },
+ "source": [
+ "Adelie and Chinstrap"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "c61f152b-d401-4c28-aea1-014d01100136",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 209
+ },
+ "id": "c61f152b-d401-4c28-aea1-014d01100136",
+ "outputId": "2ed6ab74-fafb-47c8-dd4b-2ce48a697db1"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "species\n",
+ "Adelie 3706.164384\n",
+ "Chinstrap 3733.088235\n",
+ "Gentoo 5092.436975\n",
+ "Name: body_mass_g, dtype: float64"
+ ],
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " body_mass_g | \n",
+ "
\n",
+ " \n",
+ " | species | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | Adelie | \n",
+ " 3706.164384 | \n",
+ "
\n",
+ " \n",
+ " | Chinstrap | \n",
+ " 3733.088235 | \n",
+ "
\n",
+ " \n",
+ " | Gentoo | \n",
+ " 5092.436975 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 23
+ }
+ ],
+ "source": [
+ "df_penguins_full.groupby(\"species\")[\"body_mass_g\"].mean()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ddca88e5-374f-4c8a-bf61-26b70a8b3a90",
+ "metadata": {
+ "id": "ddca88e5-374f-4c8a-bf61-26b70a8b3a90"
+ },
+ "source": [
+ "**24.** How could you sort the rows by bill length?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "4faabd67-0417-45ca-b907-7a170f4e9b71",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 423
+ },
+ "id": "4faabd67-0417-45ca-b907-7a170f4e9b71",
+ "outputId": "8bc736c3-4dfe-4ddb-8eca-971651052528"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
+ "142 Adelie Dream 32.1 15.5 188.0 \n",
+ "98 Adelie Dream 33.1 16.1 178.0 \n",
+ "70 Adelie Torgersen 33.5 19.0 190.0 \n",
+ "92 Adelie Dream 34.0 17.1 185.0 \n",
+ "18 Adelie Torgersen 34.4 18.4 184.0 \n",
+ ".. ... ... ... ... ... \n",
+ "335 Gentoo Biscoe 55.1 16.0 230.0 \n",
+ "215 Chinstrap Dream 55.8 19.8 207.0 \n",
+ "321 Gentoo Biscoe 55.9 17.0 228.0 \n",
+ "169 Chinstrap Dream 58.0 17.8 181.0 \n",
+ "253 Gentoo Biscoe 59.6 17.0 230.0 \n",
+ "\n",
+ " body_mass_g sex \n",
+ "142 3050.0 Female \n",
+ "98 2900.0 Female \n",
+ "70 3600.0 Female \n",
+ "92 3400.0 Female \n",
+ "18 3325.0 Female \n",
+ ".. ... ... \n",
+ "335 5850.0 Male \n",
+ "215 4000.0 Male \n",
+ "321 5600.0 Male \n",
+ "169 3700.0 Female \n",
+ "253 6050.0 Male \n",
+ "\n",
+ "[333 rows x 7 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " island | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 142 | \n",
+ " Adelie | \n",
+ " Dream | \n",
+ " 32.1 | \n",
+ " 15.5 | \n",
+ " 188.0 | \n",
+ " 3050.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 98 | \n",
+ " Adelie | \n",
+ " Dream | \n",
+ " 33.1 | \n",
+ " 16.1 | \n",
+ " 178.0 | \n",
+ " 2900.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 70 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 33.5 | \n",
+ " 19.0 | \n",
+ " 190.0 | \n",
+ " 3600.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 92 | \n",
+ " Adelie | \n",
+ " Dream | \n",
+ " 34.0 | \n",
+ " 17.1 | \n",
+ " 185.0 | \n",
+ " 3400.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 34.4 | \n",
+ " 18.4 | \n",
+ " 184.0 | \n",
+ " 3325.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 335 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 55.1 | \n",
+ " 16.0 | \n",
+ " 230.0 | \n",
+ " 5850.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 215 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 55.8 | \n",
+ " 19.8 | \n",
+ " 207.0 | \n",
+ " 4000.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 321 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 55.9 | \n",
+ " 17.0 | \n",
+ " 228.0 | \n",
+ " 5600.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 169 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 58.0 | \n",
+ " 17.8 | \n",
+ " 181.0 | \n",
+ " 3700.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 253 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 59.6 | \n",
+ " 17.0 | \n",
+ " 230.0 | \n",
+ " 6050.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
333 rows × 7 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_penguins_full\",\n \"rows\": 333,\n \"fields\": [\n {\n \"column\": \"species\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Adelie\",\n \"Chinstrap\",\n \"Gentoo\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"island\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Dream\",\n \"Torgersen\",\n \"Biscoe\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.468668342647561,\n \"min\": 32.1,\n \"max\": 59.6,\n \"num_unique_values\": 163,\n \"samples\": [\n 50.4,\n 48.2,\n 50.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_depth_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.9692354633199005,\n \"min\": 13.1,\n \"max\": 21.5,\n \"num_unique_values\": 79,\n \"samples\": [\n 16.0,\n 15.5,\n 19.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"flipper_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.01576528828788,\n \"min\": 172.0,\n \"max\": 231.0,\n \"num_unique_values\": 54,\n \"samples\": [\n 180.0,\n 229.0,\n 224.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"body_mass_g\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 805.2158019428965,\n \"min\": 2700.0,\n \"max\": 6300.0,\n \"num_unique_values\": 93,\n \"samples\": [\n 4650.0,\n 3900.0,\n 4775.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Male\",\n \"Female\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 24
+ }
+ ],
+ "source": [
+ "df_penguins_full.sort_values(by=\"bill_length_mm\",ascending=True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "56d12fe3-805d-4f79-bf83-e515b5070229",
+ "metadata": {
+ "id": "56d12fe3-805d-4f79-bf83-e515b5070229"
+ },
+ "source": [
+ "**25.** How could you run the same sort in descending order?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "02544f08-974e-463c-a2da-ac0b3ba3ff0d",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 423
+ },
+ "id": "02544f08-974e-463c-a2da-ac0b3ba3ff0d",
+ "outputId": "ef3a3fa4-1079-4472-9a0a-24003e0a2028"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
+ "253 Gentoo Biscoe 59.6 17.0 230.0 \n",
+ "169 Chinstrap Dream 58.0 17.8 181.0 \n",
+ "321 Gentoo Biscoe 55.9 17.0 228.0 \n",
+ "215 Chinstrap Dream 55.8 19.8 207.0 \n",
+ "335 Gentoo Biscoe 55.1 16.0 230.0 \n",
+ ".. ... ... ... ... ... \n",
+ "18 Adelie Torgersen 34.4 18.4 184.0 \n",
+ "92 Adelie Dream 34.0 17.1 185.0 \n",
+ "70 Adelie Torgersen 33.5 19.0 190.0 \n",
+ "98 Adelie Dream 33.1 16.1 178.0 \n",
+ "142 Adelie Dream 32.1 15.5 188.0 \n",
+ "\n",
+ " body_mass_g sex \n",
+ "253 6050.0 Male \n",
+ "169 3700.0 Female \n",
+ "321 5600.0 Male \n",
+ "215 4000.0 Male \n",
+ "335 5850.0 Male \n",
+ ".. ... ... \n",
+ "18 3325.0 Female \n",
+ "92 3400.0 Female \n",
+ "70 3600.0 Female \n",
+ "98 2900.0 Female \n",
+ "142 3050.0 Female \n",
+ "\n",
+ "[333 rows x 7 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " island | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 253 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 59.6 | \n",
+ " 17.0 | \n",
+ " 230.0 | \n",
+ " 6050.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 169 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 58.0 | \n",
+ " 17.8 | \n",
+ " 181.0 | \n",
+ " 3700.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 321 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 55.9 | \n",
+ " 17.0 | \n",
+ " 228.0 | \n",
+ " 5600.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 215 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 55.8 | \n",
+ " 19.8 | \n",
+ " 207.0 | \n",
+ " 4000.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 335 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 55.1 | \n",
+ " 16.0 | \n",
+ " 230.0 | \n",
+ " 5850.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 34.4 | \n",
+ " 18.4 | \n",
+ " 184.0 | \n",
+ " 3325.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 92 | \n",
+ " Adelie | \n",
+ " Dream | \n",
+ " 34.0 | \n",
+ " 17.1 | \n",
+ " 185.0 | \n",
+ " 3400.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 70 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 33.5 | \n",
+ " 19.0 | \n",
+ " 190.0 | \n",
+ " 3600.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 98 | \n",
+ " Adelie | \n",
+ " Dream | \n",
+ " 33.1 | \n",
+ " 16.1 | \n",
+ " 178.0 | \n",
+ " 2900.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 142 | \n",
+ " Adelie | \n",
+ " Dream | \n",
+ " 32.1 | \n",
+ " 15.5 | \n",
+ " 188.0 | \n",
+ " 3050.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
333 rows × 7 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_penguins_full\",\n \"rows\": 333,\n \"fields\": [\n {\n \"column\": \"species\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Gentoo\",\n \"Chinstrap\",\n \"Adelie\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"island\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Biscoe\",\n \"Dream\",\n \"Torgersen\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.468668342647561,\n \"min\": 32.1,\n \"max\": 59.6,\n \"num_unique_values\": 163,\n \"samples\": [\n 37.5,\n 39.8,\n 37.9\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_depth_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.9692354633199003,\n \"min\": 13.1,\n \"max\": 21.5,\n \"num_unique_values\": 79,\n \"samples\": [\n 18.4,\n 17.0,\n 16.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"flipper_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.015765288287882,\n \"min\": 172.0,\n \"max\": 231.0,\n \"num_unique_values\": 54,\n \"samples\": [\n 203.0,\n 183.0,\n 184.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"body_mass_g\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 805.2158019428964,\n \"min\": 2700.0,\n \"max\": 6300.0,\n \"num_unique_values\": 93,\n \"samples\": [\n 3600.0,\n 5300.0,\n 4725.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Female\",\n \"Male\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 25
+ }
+ ],
+ "source": [
+ "df_penguins_full.sort_values(by=\"bill_length_mm\",ascending=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3b69d24f-cd5b-41c9-a4eb-b8325251fe7b",
+ "metadata": {
+ "id": "3b69d24f-cd5b-41c9-a4eb-b8325251fe7b"
+ },
+ "source": [
+ "**26.** How could you sort by species first, then by body mass?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "bda80b1d-2773-493f-b13d-7b2fa60e6849",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 423
+ },
+ "id": "bda80b1d-2773-493f-b13d-7b2fa60e6849",
+ "outputId": "c7f1a2fa-7d6b-4b18-d1fa-fd8c130f8333"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
+ "58 Adelie Biscoe 36.5 16.6 181.0 \n",
+ "64 Adelie Biscoe 36.4 17.1 184.0 \n",
+ "54 Adelie Biscoe 34.5 18.1 187.0 \n",
+ "98 Adelie Dream 33.1 16.1 178.0 \n",
+ "116 Adelie Torgersen 38.6 17.0 188.0 \n",
+ ".. ... ... ... ... ... \n",
+ "331 Gentoo Biscoe 49.8 15.9 229.0 \n",
+ "297 Gentoo Biscoe 51.1 16.3 220.0 \n",
+ "337 Gentoo Biscoe 48.8 16.2 222.0 \n",
+ "253 Gentoo Biscoe 59.6 17.0 230.0 \n",
+ "237 Gentoo Biscoe 49.2 15.2 221.0 \n",
+ "\n",
+ " body_mass_g sex \n",
+ "58 2850.0 Female \n",
+ "64 2850.0 Female \n",
+ "54 2900.0 Female \n",
+ "98 2900.0 Female \n",
+ "116 2900.0 Female \n",
+ ".. ... ... \n",
+ "331 5950.0 Male \n",
+ "297 6000.0 Male \n",
+ "337 6000.0 Male \n",
+ "253 6050.0 Male \n",
+ "237 6300.0 Male \n",
+ "\n",
+ "[333 rows x 7 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " island | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " sex | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 58 | \n",
+ " Adelie | \n",
+ " Biscoe | \n",
+ " 36.5 | \n",
+ " 16.6 | \n",
+ " 181.0 | \n",
+ " 2850.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 64 | \n",
+ " Adelie | \n",
+ " Biscoe | \n",
+ " 36.4 | \n",
+ " 17.1 | \n",
+ " 184.0 | \n",
+ " 2850.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 54 | \n",
+ " Adelie | \n",
+ " Biscoe | \n",
+ " 34.5 | \n",
+ " 18.1 | \n",
+ " 187.0 | \n",
+ " 2900.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 98 | \n",
+ " Adelie | \n",
+ " Dream | \n",
+ " 33.1 | \n",
+ " 16.1 | \n",
+ " 178.0 | \n",
+ " 2900.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | 116 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 38.6 | \n",
+ " 17.0 | \n",
+ " 188.0 | \n",
+ " 2900.0 | \n",
+ " Female | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 331 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 49.8 | \n",
+ " 15.9 | \n",
+ " 229.0 | \n",
+ " 5950.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 297 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 51.1 | \n",
+ " 16.3 | \n",
+ " 220.0 | \n",
+ " 6000.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 337 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 48.8 | \n",
+ " 16.2 | \n",
+ " 222.0 | \n",
+ " 6000.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 253 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 59.6 | \n",
+ " 17.0 | \n",
+ " 230.0 | \n",
+ " 6050.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ " | 237 | \n",
+ " Gentoo | \n",
+ " Biscoe | \n",
+ " 49.2 | \n",
+ " 15.2 | \n",
+ " 221.0 | \n",
+ " 6300.0 | \n",
+ " Male | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
333 rows × 7 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_penguins_full\",\n \"rows\": 333,\n \"fields\": [\n {\n \"column\": \"species\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Adelie\",\n \"Chinstrap\",\n \"Gentoo\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"island\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Biscoe\",\n \"Dream\",\n \"Torgersen\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.468668342647559,\n \"min\": 32.1,\n \"max\": 59.6,\n \"num_unique_values\": 163,\n \"samples\": [\n 49.1,\n 51.4,\n 45.1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"bill_depth_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.9692354633199007,\n \"min\": 13.1,\n \"max\": 21.5,\n \"num_unique_values\": 79,\n \"samples\": [\n 18.9,\n 16.6,\n 18.4\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"flipper_length_mm\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 14.01576528828788,\n \"min\": 172.0,\n \"max\": 231.0,\n \"num_unique_values\": 54,\n \"samples\": [\n 197.0,\n 228.0,\n 224.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"body_mass_g\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 805.2158019428966,\n \"min\": 2700.0,\n \"max\": 6300.0,\n \"num_unique_values\": 93,\n \"samples\": [\n 4250.0,\n 3625.0,\n 3525.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"sex\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Male\",\n \"Female\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 26
+ }
+ ],
+ "source": [
+ "df_penguins_full.sort_values(by=[\"species\",\"body_mass_g\"],ascending=[True,True])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "82890e5f-d9f7-4729-a179-b0883f1bc498",
+ "metadata": {
+ "id": "82890e5f-d9f7-4729-a179-b0883f1bc498"
+ },
+ "source": [
+ "## Selecting Rows, Columns, and Cells\n",
+ "\n",
+ "Let's look at some precious stones now, and leave the poor penguins alone for a while. Let's look at some precious stones now, and leave the poor penguins alone for a while. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5d1abb35-5d24-4bd2-89d4-dd9dbbdadccf",
+ "metadata": {
+ "id": "5d1abb35-5d24-4bd2-89d4-dd9dbbdadccf"
+ },
+ "source": [
+ "**27.** Load the Seaborn \"diamonds\" dataset into a Pandas dataframe named diamonds."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "9247186e-0c21-43d3-88db-79c7f4ecbc06",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 423
+ },
+ "id": "9247186e-0c21-43d3-88db-79c7f4ecbc06",
+ "outputId": "c1a82fb4-b27a-4394-9b9f-c91f4b1c524f"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " carat cut color clarity depth table price x y z\n",
+ "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43\n",
+ "1 0.21 Premium E SI1 59.8 61.0 326 3.89 3.84 2.31\n",
+ "2 0.23 Good E VS1 56.9 65.0 327 4.05 4.07 2.31\n",
+ "3 0.29 Premium I VS2 62.4 58.0 334 4.20 4.23 2.63\n",
+ "4 0.31 Good J SI2 63.3 58.0 335 4.34 4.35 2.75\n",
+ "... ... ... ... ... ... ... ... ... ... ...\n",
+ "53935 0.72 Ideal D SI1 60.8 57.0 2757 5.75 5.76 3.50\n",
+ "53936 0.72 Good D SI1 63.1 55.0 2757 5.69 5.75 3.61\n",
+ "53937 0.70 Very Good D SI1 62.8 60.0 2757 5.66 5.68 3.56\n",
+ "53938 0.86 Premium H SI2 61.0 58.0 2757 6.15 6.12 3.74\n",
+ "53939 0.75 Ideal D SI2 62.2 55.0 2757 5.83 5.87 3.64\n",
+ "\n",
+ "[53940 rows x 10 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.23 | \n",
+ " Ideal | \n",
+ " E | \n",
+ " SI2 | \n",
+ " 61.5 | \n",
+ " 55.0 | \n",
+ " 326 | \n",
+ " 3.95 | \n",
+ " 3.98 | \n",
+ " 2.43 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 0.21 | \n",
+ " Premium | \n",
+ " E | \n",
+ " SI1 | \n",
+ " 59.8 | \n",
+ " 61.0 | \n",
+ " 326 | \n",
+ " 3.89 | \n",
+ " 3.84 | \n",
+ " 2.31 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 0.23 | \n",
+ " Good | \n",
+ " E | \n",
+ " VS1 | \n",
+ " 56.9 | \n",
+ " 65.0 | \n",
+ " 327 | \n",
+ " 4.05 | \n",
+ " 4.07 | \n",
+ " 2.31 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 0.29 | \n",
+ " Premium | \n",
+ " I | \n",
+ " VS2 | \n",
+ " 62.4 | \n",
+ " 58.0 | \n",
+ " 334 | \n",
+ " 4.20 | \n",
+ " 4.23 | \n",
+ " 2.63 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 0.31 | \n",
+ " Good | \n",
+ " J | \n",
+ " SI2 | \n",
+ " 63.3 | \n",
+ " 58.0 | \n",
+ " 335 | \n",
+ " 4.34 | \n",
+ " 4.35 | \n",
+ " 2.75 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 53935 | \n",
+ " 0.72 | \n",
+ " Ideal | \n",
+ " D | \n",
+ " SI1 | \n",
+ " 60.8 | \n",
+ " 57.0 | \n",
+ " 2757 | \n",
+ " 5.75 | \n",
+ " 5.76 | \n",
+ " 3.50 | \n",
+ "
\n",
+ " \n",
+ " | 53936 | \n",
+ " 0.72 | \n",
+ " Good | \n",
+ " D | \n",
+ " SI1 | \n",
+ " 63.1 | \n",
+ " 55.0 | \n",
+ " 2757 | \n",
+ " 5.69 | \n",
+ " 5.75 | \n",
+ " 3.61 | \n",
+ "
\n",
+ " \n",
+ " | 53937 | \n",
+ " 0.70 | \n",
+ " Very Good | \n",
+ " D | \n",
+ " SI1 | \n",
+ " 62.8 | \n",
+ " 60.0 | \n",
+ " 2757 | \n",
+ " 5.66 | \n",
+ " 5.68 | \n",
+ " 3.56 | \n",
+ "
\n",
+ " \n",
+ " | 53938 | \n",
+ " 0.86 | \n",
+ " Premium | \n",
+ " H | \n",
+ " SI2 | \n",
+ " 61.0 | \n",
+ " 58.0 | \n",
+ " 2757 | \n",
+ " 6.15 | \n",
+ " 6.12 | \n",
+ " 3.74 | \n",
+ "
\n",
+ " \n",
+ " | 53939 | \n",
+ " 0.75 | \n",
+ " Ideal | \n",
+ " D | \n",
+ " SI2 | \n",
+ " 62.2 | \n",
+ " 55.0 | \n",
+ " 2757 | \n",
+ " 5.83 | \n",
+ " 5.87 | \n",
+ " 3.64 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
53940 rows × 10 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_diamonds",
+ "summary": "{\n \"name\": \"df_diamonds\",\n \"rows\": 53940,\n \"fields\": [\n {\n \"column\": \"carat\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.47401124440538067,\n \"min\": 0.2,\n \"max\": 5.01,\n \"num_unique_values\": 273,\n \"samples\": [\n 0.77,\n 1.58,\n 0.65\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cut\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Premium\",\n \"Fair\",\n \"Good\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"color\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"E\",\n \"I\",\n \"G\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"clarity\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"SI1\",\n \"VVS1\",\n \"SI2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"depth\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.4326213188337733,\n \"min\": 43.0,\n \"max\": 79.0,\n \"num_unique_values\": 184,\n \"samples\": [\n 61.0,\n 62.9,\n 70.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"table\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.234490562820938,\n \"min\": 43.0,\n \"max\": 95.0,\n \"num_unique_values\": 127,\n \"samples\": [\n 54.8,\n 61.5,\n 57.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3989,\n \"min\": 326,\n \"max\": 18823,\n \"num_unique_values\": 11602,\n \"samples\": [\n 16368,\n 895,\n 748\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"x\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.1217607467924422,\n \"min\": 0.0,\n \"max\": 10.74,\n \"num_unique_values\": 554,\n \"samples\": [\n 5.88,\n 5.69,\n 4.61\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"y\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.1421346741235396,\n \"min\": 0.0,\n \"max\": 58.9,\n \"num_unique_values\": 552,\n \"samples\": [\n 5.33,\n 5.61,\n 5.25\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"z\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7056988469499964,\n \"min\": 0.0,\n \"max\": 31.8,\n \"num_unique_values\": 375,\n \"samples\": [\n 3.36,\n 2.45,\n 2.7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 27
+ }
+ ],
+ "source": [
+ "df_diamonds=sb.load_dataset(\"diamonds\")\n",
+ "df_diamonds"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "cbe20f32-5cb2-4191-9994-dbadaf7488dd",
+ "metadata": {
+ "id": "cbe20f32-5cb2-4191-9994-dbadaf7488dd"
+ },
+ "source": [
+ "**28.** Display the columns that are available."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "c12fd634-2c83-4130-87f3-5580e42ce496",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "c12fd634-2c83-4130-87f3-5580e42ce496",
+ "outputId": "dec24bb9-bfac-4585-de73-1c4de22367f0"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',\n",
+ " 'z'],\n",
+ " dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 28
+ }
+ ],
+ "source": [
+ "df_diamonds.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "505b677e-c1bb-4daa-8615-a78ea4860a71",
+ "metadata": {
+ "id": "505b677e-c1bb-4daa-8615-a78ea4860a71"
+ },
+ "source": [
+ "**29.** If you select a single column from the diamonds DataFrame, what will be the type of the return value?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "10307c17-e2ad-48ce-b0b9-ddde83b5f6c3",
+ "metadata": {
+ "id": "10307c17-e2ad-48ce-b0b9-ddde83b5f6c3"
+ },
+ "source": [
+ "tuple\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5f5d7cae-b784-449d-be28-a1473721b4fa",
+ "metadata": {
+ "id": "5f5d7cae-b784-449d-be28-a1473721b4fa"
+ },
+ "source": [
+ "**30.** Select the 'table' column and show its type"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "18963cfe-266d-4ca6-9471-b47000c35ee5",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "18963cfe-266d-4ca6-9471-b47000c35ee5",
+ "outputId": "88a1182f-2df9-4341-ea43-f5f31f773342"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "dtype('float64')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 30
+ }
+ ],
+ "source": [
+ "df_diamonds[\"table\"].dtype"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "82de5bb4-baed-422e-83a6-d7581ea15ab8",
+ "metadata": {
+ "id": "82de5bb4-baed-422e-83a6-d7581ea15ab8"
+ },
+ "source": [
+ "**31.** Select the first ten rows of the price and carat columns ten rows of the diamonds DataFrame into a variable called subset, and display them."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "55ccbc17-b267-4ffb-9876-d2192ba2437d",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 363
+ },
+ "id": "55ccbc17-b267-4ffb-9876-d2192ba2437d",
+ "outputId": "44e05385-ea7d-42ac-f119-41950861ca5c"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " price carat\n",
+ "0 326 0.23\n",
+ "1 326 0.21\n",
+ "2 327 0.23\n",
+ "3 334 0.29\n",
+ "4 335 0.31\n",
+ "5 336 0.24\n",
+ "6 336 0.24\n",
+ "7 337 0.26\n",
+ "8 337 0.22\n",
+ "9 338 0.23"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " price | \n",
+ " carat | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 326 | \n",
+ " 0.23 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 326 | \n",
+ " 0.21 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 327 | \n",
+ " 0.23 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 334 | \n",
+ " 0.29 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 335 | \n",
+ " 0.31 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 336 | \n",
+ " 0.24 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 336 | \n",
+ " 0.24 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 337 | \n",
+ " 0.26 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 337 | \n",
+ " 0.22 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 338 | \n",
+ " 0.23 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "subset",
+ "summary": "{\n \"name\": \"subset\",\n \"rows\": 10,\n \"fields\": [\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4,\n \"min\": 326,\n \"max\": 338,\n \"num_unique_values\": 7,\n \"samples\": [\n 326,\n 327,\n 337\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"carat\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.03169297153067923,\n \"min\": 0.21,\n \"max\": 0.31,\n \"num_unique_values\": 7,\n \"samples\": [\n 0.23,\n 0.21,\n 0.26\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 31
+ }
+ ],
+ "source": [
+ "subset=df_diamonds[[\"price\",\"carat\"]].head(10)\n",
+ "subset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [],
+ "metadata": {
+ "id": "btDN1wAAYGwK"
+ },
+ "id": "btDN1wAAYGwK",
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fdb4c150-ebdc-420d-979b-14231fc4764b",
+ "metadata": {
+ "id": "fdb4c150-ebdc-420d-979b-14231fc4764b"
+ },
+ "source": [
+ "**32.** For a given column, show the code to display the datatype of the _values_ in the column? "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "bb6b2d54-e58f-4da4-bcf2-a4d08ee34602",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "bb6b2d54-e58f-4da4-bcf2-a4d08ee34602",
+ "outputId": "94ed71fa-f7bb-45a1-8bd7-e820db5d1845"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "dtype('int64')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 32
+ }
+ ],
+ "source": [
+ "df_diamonds[\"price\"].dtype"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ca510203-8c7e-48a1-81c3-c9e1bc5aa7b1",
+ "metadata": {
+ "id": "ca510203-8c7e-48a1-81c3-c9e1bc5aa7b1"
+ },
+ "source": [
+ "**33.** Select the first row of the diamonds DataFrame into a variable called row."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "8535dd1a-db9d-49c7-a6b2-0257616d7334",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 89
+ },
+ "id": "8535dd1a-db9d-49c7-a6b2-0257616d7334",
+ "outputId": "c5e27975-13e5-4cef-a1fb-33cfee7ccd53"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " carat cut color clarity depth table price x y z\n",
+ "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.23 | \n",
+ " Ideal | \n",
+ " E | \n",
+ " SI2 | \n",
+ " 61.5 | \n",
+ " 55.0 | \n",
+ " 326 | \n",
+ " 3.95 | \n",
+ " 3.98 | \n",
+ " 2.43 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "row",
+ "summary": "{\n \"name\": \"row\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"carat\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.23,\n \"max\": 0.23,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.23\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cut\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Ideal\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"color\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"E\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"clarity\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"SI2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"depth\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 61.5,\n \"max\": 61.5,\n \"num_unique_values\": 1,\n \"samples\": [\n 61.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"table\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 55.0,\n \"max\": 55.0,\n \"num_unique_values\": 1,\n \"samples\": [\n 55.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 326,\n \"max\": 326,\n \"num_unique_values\": 1,\n \"samples\": [\n 326\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"x\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 3.95,\n \"max\": 3.95,\n \"num_unique_values\": 1,\n \"samples\": [\n 3.95\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"y\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 3.98,\n \"max\": 3.98,\n \"num_unique_values\": 1,\n \"samples\": [\n 3.98\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"z\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 2.43,\n \"max\": 2.43,\n \"num_unique_values\": 1,\n \"samples\": [\n 2.43\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 40
+ }
+ ],
+ "source": [
+ "row=df_diamonds.head(1)\n",
+ "row"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "edbd9394-ec43-479c-919a-5eab17acb792",
+ "metadata": {
+ "id": "edbd9394-ec43-479c-919a-5eab17acb792"
+ },
+ "source": [
+ "**34.** What would you expect the data type of the row to be? Display it."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "22d7c0bd-cf41-491e-8fc5-41aed540c16e",
+ "metadata": {
+ "id": "22d7c0bd-cf41-491e-8fc5-41aed540c16e"
+ },
+ "source": [
+ "A Pandas series"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "777274bd-3653-4d0c-b73e-75f879cb48ae",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 80
+ },
+ "id": "777274bd-3653-4d0c-b73e-75f879cb48ae",
+ "outputId": "193f5e0a-cc6b-4f91-e199-89989f9883c6"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " carat cut color clarity depth table price x y z\n",
+ "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.23 | \n",
+ " Ideal | \n",
+ " E | \n",
+ " SI2 | \n",
+ " 61.5 | \n",
+ " 55.0 | \n",
+ " 326 | \n",
+ " 3.95 | \n",
+ " 3.98 | \n",
+ " 2.43 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_diamonds",
+ "summary": "{\n \"name\": \"df_diamonds\",\n \"rows\": 53940,\n \"fields\": [\n {\n \"column\": \"carat\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.47401124440538067,\n \"min\": 0.2,\n \"max\": 5.01,\n \"num_unique_values\": 273,\n \"samples\": [\n 0.77,\n 1.58,\n 0.65\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cut\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Premium\",\n \"Fair\",\n \"Good\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"color\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 7,\n \"samples\": [\n \"E\",\n \"I\",\n \"G\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"clarity\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 8,\n \"samples\": [\n \"SI1\",\n \"VVS1\",\n \"SI2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"depth\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.4326213188337733,\n \"min\": 43.0,\n \"max\": 79.0,\n \"num_unique_values\": 184,\n \"samples\": [\n 61.0,\n 62.9,\n 70.2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"table\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.234490562820938,\n \"min\": 43.0,\n \"max\": 95.0,\n \"num_unique_values\": 127,\n \"samples\": [\n 54.8,\n 61.5,\n 57.6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3989,\n \"min\": 326,\n \"max\": 18823,\n \"num_unique_values\": 11602,\n \"samples\": [\n 16368,\n 895,\n 748\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"x\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.1217607467924422,\n \"min\": 0.0,\n \"max\": 10.74,\n \"num_unique_values\": 554,\n \"samples\": [\n 5.88,\n 5.69,\n 4.61\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"y\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.1421346741235396,\n \"min\": 0.0,\n \"max\": 58.9,\n \"num_unique_values\": 552,\n \"samples\": [\n 5.33,\n 5.61,\n 5.25\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"z\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.7056988469499964,\n \"min\": 0.0,\n \"max\": 31.8,\n \"num_unique_values\": 375,\n \"samples\": [\n 3.36,\n 2.45,\n 2.7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 41
+ }
+ ],
+ "source": [
+ "df_diamonds.head(1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f6027d4a-4df5-45c6-bb82-c4ed046c2681",
+ "metadata": {
+ "id": "f6027d4a-4df5-45c6-bb82-c4ed046c2681"
+ },
+ "source": [
+ "**35.** Can you discover the names of the columns using only the row returned in #33? Why or why not?Can you discover the names of the columns using only the row returned in #33? Why or why not?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "55c063e1-2346-4782-9133-e684c2714fe0",
+ "metadata": {
+ "id": "55c063e1-2346-4782-9133-e684c2714fe0"
+ },
+ "source": [
+ "Yes, because a row series should have the columns as the index (See below):"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "1a5b9a02-9764-43b7-b49b-4522506e5669",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "1a5b9a02-9764-43b7-b49b-4522506e5669",
+ "outputId": "8871fb6b-9f7c-48b4-808c-d41a7f5ade97"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',\n",
+ " 'z'],\n",
+ " dtype='object')"
+ ]
+ },
+ "metadata": {},
+ "execution_count": 42
+ }
+ ],
+ "source": [
+ "df_diamonds.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "369f761e-0dbe-4589-835f-526ab8f9884f",
+ "metadata": {
+ "id": "369f761e-0dbe-4589-835f-526ab8f9884f"
+ },
+ "source": [
+ "**36.** Select the row with the highest priced diamond."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "41f8510d-cf79-43eb-910c-1c03aebc21ba",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 80
+ },
+ "id": "41f8510d-cf79-43eb-910c-1c03aebc21ba",
+ "outputId": "69e5b568-a522-451e-e29d-32c954ac21db"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " carat cut color clarity depth table price x y z\n",
+ "27749 2.29 Premium I VS2 60.8 60.0 18823 8.5 8.47 5.16"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 27749 | \n",
+ " 2.29 | \n",
+ " Premium | \n",
+ " I | \n",
+ " VS2 | \n",
+ " 60.8 | \n",
+ " 60.0 | \n",
+ " 18823 | \n",
+ " 8.5 | \n",
+ " 8.47 | \n",
+ " 5.16 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_diamonds\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"carat\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 2.29,\n \"max\": 2.29,\n \"num_unique_values\": 1,\n \"samples\": [\n 2.29\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cut\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Premium\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"color\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"I\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"clarity\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"VS2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"depth\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 60.8,\n \"max\": 60.8,\n \"num_unique_values\": 1,\n \"samples\": [\n 60.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"table\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 60.0,\n \"max\": 60.0,\n \"num_unique_values\": 1,\n \"samples\": [\n 60.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 18823,\n \"max\": 18823,\n \"num_unique_values\": 1,\n \"samples\": [\n 18823\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"x\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 8.5,\n \"max\": 8.5,\n \"num_unique_values\": 1,\n \"samples\": [\n 8.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"y\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 8.47,\n \"max\": 8.47,\n \"num_unique_values\": 1,\n \"samples\": [\n 8.47\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"z\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 5.16,\n \"max\": 5.16,\n \"num_unique_values\": 1,\n \"samples\": [\n 5.16\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 44
+ }
+ ],
+ "source": [
+ "df_diamonds.sort_values(by=\"price\",ascending=False).head(1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2694d362-a5d1-4ad9-8b09-f408d7531948",
+ "metadata": {
+ "id": "2694d362-a5d1-4ad9-8b09-f408d7531948"
+ },
+ "source": [
+ "**37.** Select the row with the lowest priced diamond."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "b6559ffa-fbb6-4ce9-b882-5bccbf5403e4",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 80
+ },
+ "id": "b6559ffa-fbb6-4ce9-b882-5bccbf5403e4",
+ "outputId": "749f381b-17c1-4d41-fafc-9ce618f44da0"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " carat cut color clarity depth table price x y z\n",
+ "0 0.23 Ideal E SI2 61.5 55.0 326 3.95 3.98 2.43"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " carat | \n",
+ " cut | \n",
+ " color | \n",
+ " clarity | \n",
+ " depth | \n",
+ " table | \n",
+ " price | \n",
+ " x | \n",
+ " y | \n",
+ " z | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0.23 | \n",
+ " Ideal | \n",
+ " E | \n",
+ " SI2 | \n",
+ " 61.5 | \n",
+ " 55.0 | \n",
+ " 326 | \n",
+ " 3.95 | \n",
+ " 3.98 | \n",
+ " 2.43 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "summary": "{\n \"name\": \"df_diamonds\",\n \"rows\": 1,\n \"fields\": [\n {\n \"column\": \"carat\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 0.23,\n \"max\": 0.23,\n \"num_unique_values\": 1,\n \"samples\": [\n 0.23\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"cut\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"Ideal\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"color\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"E\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"clarity\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"SI2\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"depth\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 61.5,\n \"max\": 61.5,\n \"num_unique_values\": 1,\n \"samples\": [\n 61.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"table\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 55.0,\n \"max\": 55.0,\n \"num_unique_values\": 1,\n \"samples\": [\n 55.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"price\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 326,\n \"max\": 326,\n \"num_unique_values\": 1,\n \"samples\": [\n 326\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"x\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 3.95,\n \"max\": 3.95,\n \"num_unique_values\": 1,\n \"samples\": [\n 3.95\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"y\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 3.98,\n \"max\": 3.98,\n \"num_unique_values\": 1,\n \"samples\": [\n 3.98\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"z\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": null,\n \"min\": 2.43,\n \"max\": 2.43,\n \"num_unique_values\": 1,\n \"samples\": [\n 2.43\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 45
+ }
+ ],
+ "source": [
+ "df_diamonds.sort_values(by=\"price\",ascending=True).head(1)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e366af69-1739-48c5-b964-298fa294bc7a",
+ "metadata": {
+ "id": "e366af69-1739-48c5-b964-298fa294bc7a"
+ },
+ "source": [
+ "## Some Exercises Using Time Series"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ca92f2e5-c4f3-460e-b3b1-0d6bff4694ff",
+ "metadata": {
+ "id": "ca92f2e5-c4f3-460e-b3b1-0d6bff4694ff"
+ },
+ "source": [
+ "**38.** Load the taxis dataset into a DataFrame, ```taxis```."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "5bb6dee5-3828-4a80-8a35-98397639e5db",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 770
+ },
+ "id": "5bb6dee5-3828-4a80-8a35-98397639e5db",
+ "outputId": "bf71f114-0736-4ab8-dcd6-bbf4c879a904"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " pickup dropoff passengers distance fare \\\n",
+ "0 2019-03-23 20:21:09 2019-03-23 20:27:24 1 1.60 7.0 \n",
+ "1 2019-03-04 16:11:55 2019-03-04 16:19:00 1 0.79 5.0 \n",
+ "2 2019-03-27 17:53:01 2019-03-27 18:00:25 1 1.37 7.5 \n",
+ "3 2019-03-10 01:23:59 2019-03-10 01:49:51 1 7.70 27.0 \n",
+ "4 2019-03-30 13:27:42 2019-03-30 13:37:14 3 2.16 9.0 \n",
+ "... ... ... ... ... ... \n",
+ "6428 2019-03-31 09:51:53 2019-03-31 09:55:27 1 0.75 4.5 \n",
+ "6429 2019-03-31 17:38:00 2019-03-31 18:34:23 1 18.74 58.0 \n",
+ "6430 2019-03-23 22:55:18 2019-03-23 23:14:25 1 4.14 16.0 \n",
+ "6431 2019-03-04 10:09:25 2019-03-04 10:14:29 1 1.12 6.0 \n",
+ "6432 2019-03-13 19:31:22 2019-03-13 19:48:02 1 3.85 15.0 \n",
+ "\n",
+ " tip tolls total color payment pickup_zone \\\n",
+ "0 2.15 0.0 12.95 yellow credit card Lenox Hill West \n",
+ "1 0.00 0.0 9.30 yellow cash Upper West Side South \n",
+ "2 2.36 0.0 14.16 yellow credit card Alphabet City \n",
+ "3 6.15 0.0 36.95 yellow credit card Hudson Sq \n",
+ "4 1.10 0.0 13.40 yellow credit card Midtown East \n",
+ "... ... ... ... ... ... ... \n",
+ "6428 1.06 0.0 6.36 green credit card East Harlem North \n",
+ "6429 0.00 0.0 58.80 green credit card Jamaica \n",
+ "6430 0.00 0.0 17.30 green cash Crown Heights North \n",
+ "6431 0.00 0.0 6.80 green credit card East New York \n",
+ "6432 3.36 0.0 20.16 green credit card Boerum Hill \n",
+ "\n",
+ " dropoff_zone pickup_borough dropoff_borough \n",
+ "0 UN/Turtle Bay South Manhattan Manhattan \n",
+ "1 Upper West Side South Manhattan Manhattan \n",
+ "2 West Village Manhattan Manhattan \n",
+ "3 Yorkville West Manhattan Manhattan \n",
+ "4 Yorkville West Manhattan Manhattan \n",
+ "... ... ... ... \n",
+ "6428 Central Harlem North Manhattan Manhattan \n",
+ "6429 East Concourse/Concourse Village Queens Bronx \n",
+ "6430 Bushwick North Brooklyn Brooklyn \n",
+ "6431 East Flatbush/Remsen Village Brooklyn Brooklyn \n",
+ "6432 Windsor Terrace Brooklyn Brooklyn \n",
+ "\n",
+ "[6433 rows x 14 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " pickup | \n",
+ " dropoff | \n",
+ " passengers | \n",
+ " distance | \n",
+ " fare | \n",
+ " tip | \n",
+ " tolls | \n",
+ " total | \n",
+ " color | \n",
+ " payment | \n",
+ " pickup_zone | \n",
+ " dropoff_zone | \n",
+ " pickup_borough | \n",
+ " dropoff_borough | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2019-03-23 20:21:09 | \n",
+ " 2019-03-23 20:27:24 | \n",
+ " 1 | \n",
+ " 1.60 | \n",
+ " 7.0 | \n",
+ " 2.15 | \n",
+ " 0.0 | \n",
+ " 12.95 | \n",
+ " yellow | \n",
+ " credit card | \n",
+ " Lenox Hill West | \n",
+ " UN/Turtle Bay South | \n",
+ " Manhattan | \n",
+ " Manhattan | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2019-03-04 16:11:55 | \n",
+ " 2019-03-04 16:19:00 | \n",
+ " 1 | \n",
+ " 0.79 | \n",
+ " 5.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 9.30 | \n",
+ " yellow | \n",
+ " cash | \n",
+ " Upper West Side South | \n",
+ " Upper West Side South | \n",
+ " Manhattan | \n",
+ " Manhattan | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2019-03-27 17:53:01 | \n",
+ " 2019-03-27 18:00:25 | \n",
+ " 1 | \n",
+ " 1.37 | \n",
+ " 7.5 | \n",
+ " 2.36 | \n",
+ " 0.0 | \n",
+ " 14.16 | \n",
+ " yellow | \n",
+ " credit card | \n",
+ " Alphabet City | \n",
+ " West Village | \n",
+ " Manhattan | \n",
+ " Manhattan | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 2019-03-10 01:23:59 | \n",
+ " 2019-03-10 01:49:51 | \n",
+ " 1 | \n",
+ " 7.70 | \n",
+ " 27.0 | \n",
+ " 6.15 | \n",
+ " 0.0 | \n",
+ " 36.95 | \n",
+ " yellow | \n",
+ " credit card | \n",
+ " Hudson Sq | \n",
+ " Yorkville West | \n",
+ " Manhattan | \n",
+ " Manhattan | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 2019-03-30 13:27:42 | \n",
+ " 2019-03-30 13:37:14 | \n",
+ " 3 | \n",
+ " 2.16 | \n",
+ " 9.0 | \n",
+ " 1.10 | \n",
+ " 0.0 | \n",
+ " 13.40 | \n",
+ " yellow | \n",
+ " credit card | \n",
+ " Midtown East | \n",
+ " Yorkville West | \n",
+ " Manhattan | \n",
+ " Manhattan | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 6428 | \n",
+ " 2019-03-31 09:51:53 | \n",
+ " 2019-03-31 09:55:27 | \n",
+ " 1 | \n",
+ " 0.75 | \n",
+ " 4.5 | \n",
+ " 1.06 | \n",
+ " 0.0 | \n",
+ " 6.36 | \n",
+ " green | \n",
+ " credit card | \n",
+ " East Harlem North | \n",
+ " Central Harlem North | \n",
+ " Manhattan | \n",
+ " Manhattan | \n",
+ "
\n",
+ " \n",
+ " | 6429 | \n",
+ " 2019-03-31 17:38:00 | \n",
+ " 2019-03-31 18:34:23 | \n",
+ " 1 | \n",
+ " 18.74 | \n",
+ " 58.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 58.80 | \n",
+ " green | \n",
+ " credit card | \n",
+ " Jamaica | \n",
+ " East Concourse/Concourse Village | \n",
+ " Queens | \n",
+ " Bronx | \n",
+ "
\n",
+ " \n",
+ " | 6430 | \n",
+ " 2019-03-23 22:55:18 | \n",
+ " 2019-03-23 23:14:25 | \n",
+ " 1 | \n",
+ " 4.14 | \n",
+ " 16.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 17.30 | \n",
+ " green | \n",
+ " cash | \n",
+ " Crown Heights North | \n",
+ " Bushwick North | \n",
+ " Brooklyn | \n",
+ " Brooklyn | \n",
+ "
\n",
+ " \n",
+ " | 6431 | \n",
+ " 2019-03-04 10:09:25 | \n",
+ " 2019-03-04 10:14:29 | \n",
+ " 1 | \n",
+ " 1.12 | \n",
+ " 6.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 6.80 | \n",
+ " green | \n",
+ " credit card | \n",
+ " East New York | \n",
+ " East Flatbush/Remsen Village | \n",
+ " Brooklyn | \n",
+ " Brooklyn | \n",
+ "
\n",
+ " \n",
+ " | 6432 | \n",
+ " 2019-03-13 19:31:22 | \n",
+ " 2019-03-13 19:48:02 | \n",
+ " 1 | \n",
+ " 3.85 | \n",
+ " 15.0 | \n",
+ " 3.36 | \n",
+ " 0.0 | \n",
+ " 20.16 | \n",
+ " green | \n",
+ " credit card | \n",
+ " Boerum Hill | \n",
+ " Windsor Terrace | \n",
+ " Brooklyn | \n",
+ " Brooklyn | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6433 rows × 14 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "taxis",
+ "summary": "{\n \"name\": \"taxis\",\n \"rows\": 6433,\n \"fields\": [\n {\n \"column\": \"pickup\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 6414,\n \"samples\": [\n \"2019-03-11 21:32:20\",\n \"2019-03-01 11:36:49\",\n \"2019-03-24 19:36:52\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"dropoff\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 6425,\n \"samples\": [\n \"2019-03-13 22:53:10\",\n \"2019-03-12 23:25:20\",\n \"2019-03-04 04:08:04\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"passengers\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 6,\n \"num_unique_values\": 7,\n \"samples\": [\n 1,\n 3,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 3.8278670010117537,\n \"min\": 0.0,\n \"max\": 36.7,\n \"num_unique_values\": 1079,\n \"samples\": [\n 2.08,\n 19.43,\n 16.65\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 11.55180426641491,\n \"min\": 1.0,\n \"max\": 150.0,\n \"num_unique_values\": 220,\n \"samples\": [\n 143.5,\n 16.39,\n 44.5\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tip\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.4485595916360454,\n \"min\": 0.0,\n \"max\": 33.2,\n \"num_unique_values\": 489,\n \"samples\": [\n 3.21,\n 7.31,\n 6.82\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tolls\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.4152673297904774,\n \"min\": 0.0,\n \"max\": 24.02,\n \"num_unique_values\": 16,\n \"samples\": [\n 0.0,\n 5.76,\n 17.28\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 13.81557004106683,\n \"min\": 1.3,\n \"max\": 174.82,\n \"num_unique_values\": 898,\n \"samples\": [\n 4.8,\n 42.0,\n 50.76\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"color\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"green\",\n \"yellow\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"payment\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"cash\",\n \"credit card\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pickup_zone\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 194,\n \"samples\": [\n \"University Heights/Morris Heights\",\n \"Gramercy\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"dropoff_zone\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 203,\n \"samples\": [\n \"East Chelsea\",\n \"Astoria\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pickup_borough\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Queens\",\n \"Brooklyn\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"dropoff_borough\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Queens\",\n \"Staten Island\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 46
+ }
+ ],
+ "source": [
+ "taxis=pd.read_csv(\"https://raw.githubusercontent.com/mwaskom/seaborn-data/master/taxis.csv/")\n",
+ "taxis"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "06ef6738-b5fb-4341-8ced-15f02ee5aad6",
+ "metadata": {
+ "id": "06ef6738-b5fb-4341-8ced-15f02ee5aad6"
+ },
+ "source": [
+ "**39.** The 'pickup' column contains the date and time the customer picked up, but it's a string. Add a column to the DataFrame, 'pickup_time', containing the value in 'pickup' as a DateTime."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "a857614a-ada0-43c5-9ba0-6e00422bf518",
+ "metadata": {
+ "id": "a857614a-ada0-43c5-9ba0-6e00422bf518"
+ },
+ "outputs": [],
+ "source": [
+ "taxis[\"pickup_time\"]=pd.to_datetime(taxis[\"pickup\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d2aca5a0-b67d-4c2b-928f-49a0df783a4a",
+ "metadata": {
+ "id": "d2aca5a0-b67d-4c2b-928f-49a0df783a4a"
+ },
+ "source": [
+ "**40.** We have a hypothesis that as the day goes on, the tips get higher. We'll need to wrangle the data a bit before testing this, however. First, now that we have a datetime column, pickup_time, create a subset of it to create a new DataFrame, taxis_one_day. This new DataFrame should have values between '2019-03-23 00:06:00' (inclusive) and '2019-03-24 00:00:00' (exlusive)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "4c92be8d-ee1a-452d-84e9-64617cbcd850",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 790
+ },
+ "id": "4c92be8d-ee1a-452d-84e9-64617cbcd850",
+ "outputId": "d9812a56-4023-4b8f-a8f8-c034fb000a8e"
+ },
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " pickup dropoff passengers distance fare \\\n",
+ "0 2019-03-23 20:21:09 2019-03-23 20:27:24 1 1.60 7.0 \n",
+ "8 2019-03-23 11:48:50 2019-03-23 12:06:14 1 3.63 15.0 \n",
+ "17 2019-03-23 20:50:49 2019-03-23 21:02:07 1 2.60 10.5 \n",
+ "117 2019-03-23 09:39:25 2019-03-23 09:56:45 0 3.60 15.5 \n",
+ "144 2019-03-23 18:35:01 2019-03-23 18:47:39 1 3.20 12.5 \n",
+ "... ... ... ... ... ... \n",
+ "6325 2019-03-23 20:52:40 2019-03-23 21:10:12 1 3.62 14.5 \n",
+ "6331 2019-03-23 11:27:00 2019-03-23 12:20:11 1 7.67 28.0 \n",
+ "6338 2019-03-23 18:05:38 2019-03-23 18:25:36 1 2.82 14.0 \n",
+ "6427 2019-03-23 18:26:09 2019-03-23 18:49:12 1 7.07 20.0 \n",
+ "6430 2019-03-23 22:55:18 2019-03-23 23:14:25 1 4.14 16.0 \n",
+ "\n",
+ " tip tolls total color payment pickup_zone \\\n",
+ "0 2.15 0.0 12.95 yellow credit card Lenox Hill West \n",
+ "8 1.00 0.0 19.30 yellow credit card East Harlem South \n",
+ "17 2.00 0.0 16.30 yellow credit card Midtown Center \n",
+ "117 3.75 0.0 22.55 yellow credit card Yorkville East \n",
+ "144 2.00 0.0 17.80 yellow credit card UN/Turtle Bay South \n",
+ "... ... ... ... ... ... ... \n",
+ "6325 0.00 0.0 15.80 green cash Long Island City/Hunters Point \n",
+ "6331 0.00 0.0 28.00 green cash Jackson Heights \n",
+ "6338 0.00 0.0 14.80 green credit card Claremont/Bathgate \n",
+ "6427 0.00 0.0 20.00 green cash Parkchester \n",
+ "6430 0.00 0.0 17.30 green cash Crown Heights North \n",
+ "\n",
+ " dropoff_zone pickup_borough dropoff_borough \\\n",
+ "0 UN/Turtle Bay South Manhattan Manhattan \n",
+ "8 Midtown Center Manhattan Manhattan \n",
+ "17 East Harlem South Manhattan Manhattan \n",
+ "117 Penn Station/Madison Sq West Manhattan Manhattan \n",
+ "144 East Village Manhattan Manhattan \n",
+ "... ... ... ... \n",
+ "6325 Steinway Queens Queens \n",
+ "6331 Maspeth Queens Queens \n",
+ "6338 Spuyten Duyvil/Kingsbridge Bronx Bronx \n",
+ "6427 East Harlem South Bronx Manhattan \n",
+ "6430 Bushwick North Brooklyn Brooklyn \n",
+ "\n",
+ " pickup_time \n",
+ "0 2019-03-23 20:21:09 \n",
+ "8 2019-03-23 11:48:50 \n",
+ "17 2019-03-23 20:50:49 \n",
+ "117 2019-03-23 09:39:25 \n",
+ "144 2019-03-23 18:35:01 \n",
+ "... ... \n",
+ "6325 2019-03-23 20:52:40 \n",
+ "6331 2019-03-23 11:27:00 \n",
+ "6338 2019-03-23 18:05:38 \n",
+ "6427 2019-03-23 18:26:09 \n",
+ "6430 2019-03-23 22:55:18 \n",
+ "\n",
+ "[209 rows x 15 columns]"
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " pickup | \n",
+ " dropoff | \n",
+ " passengers | \n",
+ " distance | \n",
+ " fare | \n",
+ " tip | \n",
+ " tolls | \n",
+ " total | \n",
+ " color | \n",
+ " payment | \n",
+ " pickup_zone | \n",
+ " dropoff_zone | \n",
+ " pickup_borough | \n",
+ " dropoff_borough | \n",
+ " pickup_time | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2019-03-23 20:21:09 | \n",
+ " 2019-03-23 20:27:24 | \n",
+ " 1 | \n",
+ " 1.60 | \n",
+ " 7.0 | \n",
+ " 2.15 | \n",
+ " 0.0 | \n",
+ " 12.95 | \n",
+ " yellow | \n",
+ " credit card | \n",
+ " Lenox Hill West | \n",
+ " UN/Turtle Bay South | \n",
+ " Manhattan | \n",
+ " Manhattan | \n",
+ " 2019-03-23 20:21:09 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 2019-03-23 11:48:50 | \n",
+ " 2019-03-23 12:06:14 | \n",
+ " 1 | \n",
+ " 3.63 | \n",
+ " 15.0 | \n",
+ " 1.00 | \n",
+ " 0.0 | \n",
+ " 19.30 | \n",
+ " yellow | \n",
+ " credit card | \n",
+ " East Harlem South | \n",
+ " Midtown Center | \n",
+ " Manhattan | \n",
+ " Manhattan | \n",
+ " 2019-03-23 11:48:50 | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 2019-03-23 20:50:49 | \n",
+ " 2019-03-23 21:02:07 | \n",
+ " 1 | \n",
+ " 2.60 | \n",
+ " 10.5 | \n",
+ " 2.00 | \n",
+ " 0.0 | \n",
+ " 16.30 | \n",
+ " yellow | \n",
+ " credit card | \n",
+ " Midtown Center | \n",
+ " East Harlem South | \n",
+ " Manhattan | \n",
+ " Manhattan | \n",
+ " 2019-03-23 20:50:49 | \n",
+ "
\n",
+ " \n",
+ " | 117 | \n",
+ " 2019-03-23 09:39:25 | \n",
+ " 2019-03-23 09:56:45 | \n",
+ " 0 | \n",
+ " 3.60 | \n",
+ " 15.5 | \n",
+ " 3.75 | \n",
+ " 0.0 | \n",
+ " 22.55 | \n",
+ " yellow | \n",
+ " credit card | \n",
+ " Yorkville East | \n",
+ " Penn Station/Madison Sq West | \n",
+ " Manhattan | \n",
+ " Manhattan | \n",
+ " 2019-03-23 09:39:25 | \n",
+ "
\n",
+ " \n",
+ " | 144 | \n",
+ " 2019-03-23 18:35:01 | \n",
+ " 2019-03-23 18:47:39 | \n",
+ " 1 | \n",
+ " 3.20 | \n",
+ " 12.5 | \n",
+ " 2.00 | \n",
+ " 0.0 | \n",
+ " 17.80 | \n",
+ " yellow | \n",
+ " credit card | \n",
+ " UN/Turtle Bay South | \n",
+ " East Village | \n",
+ " Manhattan | \n",
+ " Manhattan | \n",
+ " 2019-03-23 18:35:01 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 6325 | \n",
+ " 2019-03-23 20:52:40 | \n",
+ " 2019-03-23 21:10:12 | \n",
+ " 1 | \n",
+ " 3.62 | \n",
+ " 14.5 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 15.80 | \n",
+ " green | \n",
+ " cash | \n",
+ " Long Island City/Hunters Point | \n",
+ " Steinway | \n",
+ " Queens | \n",
+ " Queens | \n",
+ " 2019-03-23 20:52:40 | \n",
+ "
\n",
+ " \n",
+ " | 6331 | \n",
+ " 2019-03-23 11:27:00 | \n",
+ " 2019-03-23 12:20:11 | \n",
+ " 1 | \n",
+ " 7.67 | \n",
+ " 28.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 28.00 | \n",
+ " green | \n",
+ " cash | \n",
+ " Jackson Heights | \n",
+ " Maspeth | \n",
+ " Queens | \n",
+ " Queens | \n",
+ " 2019-03-23 11:27:00 | \n",
+ "
\n",
+ " \n",
+ " | 6338 | \n",
+ " 2019-03-23 18:05:38 | \n",
+ " 2019-03-23 18:25:36 | \n",
+ " 1 | \n",
+ " 2.82 | \n",
+ " 14.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 14.80 | \n",
+ " green | \n",
+ " credit card | \n",
+ " Claremont/Bathgate | \n",
+ " Spuyten Duyvil/Kingsbridge | \n",
+ " Bronx | \n",
+ " Bronx | \n",
+ " 2019-03-23 18:05:38 | \n",
+ "
\n",
+ " \n",
+ " | 6427 | \n",
+ " 2019-03-23 18:26:09 | \n",
+ " 2019-03-23 18:49:12 | \n",
+ " 1 | \n",
+ " 7.07 | \n",
+ " 20.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 20.00 | \n",
+ " green | \n",
+ " cash | \n",
+ " Parkchester | \n",
+ " East Harlem South | \n",
+ " Bronx | \n",
+ " Manhattan | \n",
+ " 2019-03-23 18:26:09 | \n",
+ "
\n",
+ " \n",
+ " | 6430 | \n",
+ " 2019-03-23 22:55:18 | \n",
+ " 2019-03-23 23:14:25 | \n",
+ " 1 | \n",
+ " 4.14 | \n",
+ " 16.0 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 17.30 | \n",
+ " green | \n",
+ " cash | \n",
+ " Crown Heights North | \n",
+ " Bushwick North | \n",
+ " Brooklyn | \n",
+ " Brooklyn | \n",
+ " 2019-03-23 22:55:18 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
209 rows × 15 columns
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "taxis_one_day",
+ "summary": "{\n \"name\": \"taxis_one_day\",\n \"rows\": 209,\n \"fields\": [\n {\n \"column\": \"pickup\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 209,\n \"samples\": [\n \"2019-03-23 11:26:58\",\n \"2019-03-23 12:26:03\",\n \"2019-03-23 15:38:12\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"dropoff\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 209,\n \"samples\": [\n \"2019-03-23 11:35:17\",\n \"2019-03-23 12:35:01\",\n \"2019-03-23 15:48:10\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"passengers\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 6,\n \"num_unique_values\": 7,\n \"samples\": [\n 1,\n 0,\n 6\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 2.912685287071664,\n \"min\": 0.0,\n \"max\": 18.7,\n \"num_unique_values\": 140,\n \"samples\": [\n 9.31,\n 0.7,\n 18.7\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 9.260668970291944,\n \"min\": 2.5,\n \"max\": 55.0,\n \"num_unique_values\": 48,\n \"samples\": [\n 14.0,\n 19.0,\n 13.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tip\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.9184299450794153,\n \"min\": 0.0,\n \"max\": 12.21,\n \"num_unique_values\": 70,\n \"samples\": [\n 12.2,\n 2.15,\n 0.01\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tolls\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.0332532925574367,\n \"min\": 0.0,\n \"max\": 5.76,\n \"num_unique_values\": 3,\n \"samples\": [\n 0.0,\n 5.76,\n 5.54\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 10.823047811961626,\n \"min\": 3.3,\n \"max\": 73.27,\n \"num_unique_values\": 119,\n \"samples\": [\n 29.3,\n 18.3,\n 17.8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"color\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"green\",\n \"yellow\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"payment\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"cash\",\n \"credit card\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pickup_zone\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 71,\n \"samples\": [\n \"JFK Airport\",\n \"Lenox Hill West\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"dropoff_zone\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 75,\n \"samples\": [\n \"East Village\",\n \"Williamsburg (South Side)\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pickup_borough\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Queens\",\n \"Bronx\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"dropoff_borough\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Queens\",\n \"Bronx\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"pickup_time\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"2019-03-23 00:07:14\",\n \"max\": \"2019-03-23 23:50:41\",\n \"num_unique_values\": 209,\n \"samples\": [\n \"2019-03-23 11:26:58\",\n \"2019-03-23 12:26:03\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 52
+ }
+ ],
+ "source": [
+ "taxis_one_day=taxis[(taxis[\"pickup_time\"]>'2019-03-23 00:06:00') & (taxis[\"pickup_time\"]<'2019-03-24 00:00:00')]\n",
+ "taxis_one_day"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "14b66e0e-762c-433c-9041-e40cf624eaa5",
+ "metadata": {
+ "id": "14b66e0e-762c-433c-9041-e40cf624eaa5"
+ },
+ "source": [
+ "**41.** We now have a range from morning until midnight, but we to take the mean of the numeric columns, grouped at one hour intervals. Save the result as df_means, and display it."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "baca4e69-dc2d-492e-918b-c15d7d560b4b",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 869
+ },
+ "id": "baca4e69-dc2d-492e-918b-c15d7d560b4b",
+ "outputId": "dba4bea5-268b-4b5e-8326-bda2e4eea648"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/tmp/ipython-input-3815462732.py:1: FutureWarning: 'H' is deprecated and will be removed in a future version, please use 'h' instead.\n",
+ " df_means=taxis_one_day.groupby(pd.Grouper(key=\"pickup_time\",freq=\"1H\")).mean(numeric_only=True)\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " passengers distance fare tip tolls \\\n",
+ "pickup_time \n",
+ "2019-03-23 00:00:00 1.000000 1.911667 8.583333 1.415000 0.000000 \n",
+ "2019-03-23 01:00:00 1.250000 1.325000 7.875000 1.525000 0.000000 \n",
+ "2019-03-23 02:00:00 1.727273 1.739091 8.181818 1.641818 0.000000 \n",
+ "2019-03-23 03:00:00 1.500000 3.377500 11.750000 2.410000 0.000000 \n",
+ "2019-03-23 04:00:00 2.000000 0.950000 5.500000 0.915000 0.000000 \n",
+ "2019-03-23 05:00:00 2.000000 1.270000 6.000000 0.980000 0.000000 \n",
+ "2019-03-23 06:00:00 1.000000 0.400000 21.500000 0.000000 0.000000 \n",
+ "2019-03-23 07:00:00 2.333333 0.980000 5.250000 1.165000 0.000000 \n",
+ "2019-03-23 08:00:00 1.000000 0.020000 2.500000 0.000000 0.000000 \n",
+ "2019-03-23 09:00:00 1.500000 1.352000 7.400000 1.674000 0.000000 \n",
+ "2019-03-23 10:00:00 1.000000 1.760000 8.750000 0.727500 0.000000 \n",
+ "2019-03-23 11:00:00 1.909091 2.070000 11.090909 0.803636 0.000000 \n",
+ "2019-03-23 12:00:00 2.000000 2.267143 10.260000 0.645714 0.000000 \n",
+ "2019-03-23 13:00:00 2.500000 1.167000 7.550000 2.074000 0.000000 \n",
+ "2019-03-23 14:00:00 2.470588 4.752941 18.330000 1.945294 1.003529 \n",
+ "2019-03-23 15:00:00 1.000000 6.557143 22.214286 3.210000 1.645714 \n",
+ "2019-03-23 16:00:00 2.000000 2.194545 10.454545 1.109091 0.000000 \n",
+ "2019-03-23 17:00:00 1.090909 1.913636 14.818182 2.688182 0.523636 \n",
+ "2019-03-23 18:00:00 1.571429 3.206429 12.821429 0.844286 0.411429 \n",
+ "2019-03-23 19:00:00 1.526316 2.097895 10.263158 1.176316 0.000000 \n",
+ "2019-03-23 20:00:00 1.400000 2.448000 11.100000 1.544000 0.000000 \n",
+ "2019-03-23 21:00:00 1.000000 2.017143 10.571429 1.420000 0.000000 \n",
+ "2019-03-23 22:00:00 1.307692 1.881538 8.923077 1.094615 0.000000 \n",
+ "2019-03-23 23:00:00 1.615385 3.725385 15.115385 1.696154 0.000000 \n",
+ "\n",
+ " total \n",
+ "pickup_time \n",
+ "2019-03-23 00:00:00 12.965000 \n",
+ "2019-03-23 01:00:00 12.575000 \n",
+ "2019-03-23 02:00:00 13.169091 \n",
+ "2019-03-23 03:00:00 17.335000 \n",
+ "2019-03-23 04:00:00 10.215000 \n",
+ "2019-03-23 05:00:00 10.530000 \n",
+ "2019-03-23 06:00:00 23.133333 \n",
+ "2019-03-23 07:00:00 9.298333 \n",
+ "2019-03-23 08:00:00 3.300000 \n",
+ "2019-03-23 09:00:00 12.124000 \n",
+ "2019-03-23 10:00:00 12.152500 \n",
+ "2019-03-23 11:00:00 14.667273 \n",
+ "2019-03-23 12:00:00 13.420000 \n",
+ "2019-03-23 13:00:00 12.344000 \n",
+ "2019-03-23 14:00:00 24.267059 \n",
+ "2019-03-23 15:00:00 30.370000 \n",
+ "2019-03-23 16:00:00 14.431818 \n",
+ "2019-03-23 17:00:00 20.739091 \n",
+ "2019-03-23 18:00:00 16.427143 \n",
+ "2019-03-23 19:00:00 14.226316 \n",
+ "2019-03-23 20:00:00 15.944000 \n",
+ "2019-03-23 21:00:00 15.791429 \n",
+ "2019-03-23 22:00:00 13.433077 \n",
+ "2019-03-23 23:00:00 20.034615 "
+ ],
+ "text/html": [
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " passengers | \n",
+ " distance | \n",
+ " fare | \n",
+ " tip | \n",
+ " tolls | \n",
+ " total | \n",
+ "
\n",
+ " \n",
+ " | pickup_time | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 2019-03-23 00:00:00 | \n",
+ " 1.000000 | \n",
+ " 1.911667 | \n",
+ " 8.583333 | \n",
+ " 1.415000 | \n",
+ " 0.000000 | \n",
+ " 12.965000 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 01:00:00 | \n",
+ " 1.250000 | \n",
+ " 1.325000 | \n",
+ " 7.875000 | \n",
+ " 1.525000 | \n",
+ " 0.000000 | \n",
+ " 12.575000 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 02:00:00 | \n",
+ " 1.727273 | \n",
+ " 1.739091 | \n",
+ " 8.181818 | \n",
+ " 1.641818 | \n",
+ " 0.000000 | \n",
+ " 13.169091 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 03:00:00 | \n",
+ " 1.500000 | \n",
+ " 3.377500 | \n",
+ " 11.750000 | \n",
+ " 2.410000 | \n",
+ " 0.000000 | \n",
+ " 17.335000 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 04:00:00 | \n",
+ " 2.000000 | \n",
+ " 0.950000 | \n",
+ " 5.500000 | \n",
+ " 0.915000 | \n",
+ " 0.000000 | \n",
+ " 10.215000 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 05:00:00 | \n",
+ " 2.000000 | \n",
+ " 1.270000 | \n",
+ " 6.000000 | \n",
+ " 0.980000 | \n",
+ " 0.000000 | \n",
+ " 10.530000 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 06:00:00 | \n",
+ " 1.000000 | \n",
+ " 0.400000 | \n",
+ " 21.500000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 23.133333 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 07:00:00 | \n",
+ " 2.333333 | \n",
+ " 0.980000 | \n",
+ " 5.250000 | \n",
+ " 1.165000 | \n",
+ " 0.000000 | \n",
+ " 9.298333 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 08:00:00 | \n",
+ " 1.000000 | \n",
+ " 0.020000 | \n",
+ " 2.500000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 3.300000 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 09:00:00 | \n",
+ " 1.500000 | \n",
+ " 1.352000 | \n",
+ " 7.400000 | \n",
+ " 1.674000 | \n",
+ " 0.000000 | \n",
+ " 12.124000 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 10:00:00 | \n",
+ " 1.000000 | \n",
+ " 1.760000 | \n",
+ " 8.750000 | \n",
+ " 0.727500 | \n",
+ " 0.000000 | \n",
+ " 12.152500 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 11:00:00 | \n",
+ " 1.909091 | \n",
+ " 2.070000 | \n",
+ " 11.090909 | \n",
+ " 0.803636 | \n",
+ " 0.000000 | \n",
+ " 14.667273 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 12:00:00 | \n",
+ " 2.000000 | \n",
+ " 2.267143 | \n",
+ " 10.260000 | \n",
+ " 0.645714 | \n",
+ " 0.000000 | \n",
+ " 13.420000 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 13:00:00 | \n",
+ " 2.500000 | \n",
+ " 1.167000 | \n",
+ " 7.550000 | \n",
+ " 2.074000 | \n",
+ " 0.000000 | \n",
+ " 12.344000 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 14:00:00 | \n",
+ " 2.470588 | \n",
+ " 4.752941 | \n",
+ " 18.330000 | \n",
+ " 1.945294 | \n",
+ " 1.003529 | \n",
+ " 24.267059 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 15:00:00 | \n",
+ " 1.000000 | \n",
+ " 6.557143 | \n",
+ " 22.214286 | \n",
+ " 3.210000 | \n",
+ " 1.645714 | \n",
+ " 30.370000 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 16:00:00 | \n",
+ " 2.000000 | \n",
+ " 2.194545 | \n",
+ " 10.454545 | \n",
+ " 1.109091 | \n",
+ " 0.000000 | \n",
+ " 14.431818 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 17:00:00 | \n",
+ " 1.090909 | \n",
+ " 1.913636 | \n",
+ " 14.818182 | \n",
+ " 2.688182 | \n",
+ " 0.523636 | \n",
+ " 20.739091 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 18:00:00 | \n",
+ " 1.571429 | \n",
+ " 3.206429 | \n",
+ " 12.821429 | \n",
+ " 0.844286 | \n",
+ " 0.411429 | \n",
+ " 16.427143 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 19:00:00 | \n",
+ " 1.526316 | \n",
+ " 2.097895 | \n",
+ " 10.263158 | \n",
+ " 1.176316 | \n",
+ " 0.000000 | \n",
+ " 14.226316 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 20:00:00 | \n",
+ " 1.400000 | \n",
+ " 2.448000 | \n",
+ " 11.100000 | \n",
+ " 1.544000 | \n",
+ " 0.000000 | \n",
+ " 15.944000 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 21:00:00 | \n",
+ " 1.000000 | \n",
+ " 2.017143 | \n",
+ " 10.571429 | \n",
+ " 1.420000 | \n",
+ " 0.000000 | \n",
+ " 15.791429 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 22:00:00 | \n",
+ " 1.307692 | \n",
+ " 1.881538 | \n",
+ " 8.923077 | \n",
+ " 1.094615 | \n",
+ " 0.000000 | \n",
+ " 13.433077 | \n",
+ "
\n",
+ " \n",
+ " | 2019-03-23 23:00:00 | \n",
+ " 1.615385 | \n",
+ " 3.725385 | \n",
+ " 15.115385 | \n",
+ " 1.696154 | \n",
+ " 0.000000 | \n",
+ " 20.034615 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df_means",
+ "summary": "{\n \"name\": \"df_means\",\n \"rows\": 24,\n \"fields\": [\n {\n \"column\": \"pickup_time\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"2019-03-23 00:00:00\",\n \"max\": \"2019-03-23 23:00:00\",\n \"num_unique_values\": 24,\n \"samples\": [\n \"2019-03-23 08:00:00\",\n \"2019-03-23 16:00:00\",\n \"2019-03-23 00:00:00\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"passengers\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.49037419813216393,\n \"min\": 1.0,\n \"max\": 2.5,\n \"num_unique_values\": 15,\n \"samples\": [\n 1.0909090909090908,\n 1.5263157894736843,\n 1.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"distance\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1.405026463375497,\n \"min\": 0.02,\n \"max\": 6.557142857142858,\n \"num_unique_values\": 24,\n \"samples\": [\n 0.02,\n 2.194545454545455,\n 1.9116666666666668\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"fare\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 4.858496791901966,\n \"min\": 2.5,\n \"max\": 22.214285714285715,\n \"num_unique_values\": 24,\n \"samples\": [\n 2.5,\n 10.454545454545455,\n 8.583333333333334\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tip\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.758503028946405,\n \"min\": 0.0,\n \"max\": 3.21,\n \"num_unique_values\": 23,\n \"samples\": [\n 1.1090909090909091,\n 0.7275,\n 1.415\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tolls\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.3969236292933474,\n \"min\": 0.0,\n \"max\": 1.6457142857142857,\n \"num_unique_values\": 5,\n \"samples\": [\n 1.0035294117647058,\n 0.4114285714285714,\n 1.6457142857142857\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"total\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 5.557120639119065,\n \"min\": 3.3,\n \"max\": 30.37,\n \"num_unique_values\": 24,\n \"samples\": [\n 3.3,\n 14.431818181818182,\n 12.965000000000002\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 54
+ }
+ ],
+ "source": [
+ "df_means=taxis_one_day.groupby(pd.Grouper(key=\"pickup_time\",freq=\"1H\")).mean(numeric_only=True)\n",
+ "df_means"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "601743a0-8e97-46f3-8c46-6681353d374c",
+ "metadata": {
+ "id": "601743a0-8e97-46f3-8c46-6681353d374c"
+ },
+ "source": [
+ "**42.** Create a simple line plot of the value \"distance\". "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "d270aaf8-b52e-4d1f-8373-3ce72f9d4a81",
+ "metadata": {
+ "id": "d270aaf8-b52e-4d1f-8373-3ce72f9d4a81"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "deba998f-24b1-4fc3-93e6-807955ba111b",
+ "metadata": {
+ "id": "deba998f-24b1-4fc3-93e6-807955ba111b"
+ },
+ "source": [
+ "**43.** Overall, do riders travel further or less far as the day progresses?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4b69ad07-bd6c-4ef8-8caa-ec0b5b4a2b21",
+ "metadata": {
+ "id": "4b69ad07-bd6c-4ef8-8caa-ec0b5b4a2b21"
+ },
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3c1784d1-e66d-40c7-87d1-a5a4bc7dd43b",
+ "metadata": {
+ "id": "3c1784d1-e66d-40c7-87d1-a5a4bc7dd43b"
+ },
+ "source": [
+ "**44.** Create a new column in taxis_means, ```tip_in_percent```. The source columns for this should be \"fare\" and \"tip\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "014637ae-70f7-4289-8077-5264bf46daab",
+ "metadata": {
+ "id": "014637ae-70f7-4289-8077-5264bf46daab"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "15be5b49-d465-48a7-9b3b-6d58291752d9",
+ "metadata": {
+ "id": "15be5b49-d465-48a7-9b3b-6d58291752d9"
+ },
+ "source": [
+ "**45.** Create a new column, time_interval, as a range of integer values beginning with zero."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "bcc2dca5-7682-4e55-b218-e94006071ebd",
+ "metadata": {
+ "id": "bcc2dca5-7682-4e55-b218-e94006071ebd"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "269b329e-cade-4e54-9ed8-ce5d23006750",
+ "metadata": {
+ "id": "269b329e-cade-4e54-9ed8-ce5d23006750"
+ },
+ "source": [
+ "Display the correlations between the following pairs of values:\n",
+ "1. tip_in_percent and distance.\n",
+ "1. tip_in_percent and passengers.\n",
+ "1. tip_in_percent and time_interval."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "7003165a-2e86-46fa-9b0c-bb1e9dab4afd",
+ "metadata": {
+ "id": "7003165a-2e86-46fa-9b0c-bb1e9dab4afd"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "934d0379-8d2c-4579-af88-fafffdb58767",
+ "metadata": {
+ "id": "934d0379-8d2c-4579-af88-fafffdb58767"
+ },
+ "source": [
+ "**47.** Admittedly, the size of the data set is fairly small given how we've subsetted it. But based on the values in #45, which of the three pairs show the strongest correlation."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04dfb838-f8de-4e79-8076-12ef0c187fc4",
+ "metadata": {
+ "id": "04dfb838-f8de-4e79-8076-12ef0c187fc4"
+ },
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "279a9e91-2488-4e53-8234-ddde076fbe30",
+ "metadata": {
+ "id": "279a9e91-2488-4e53-8234-ddde076fbe30"
+ },
+ "source": [
+ "**48.** Did our hypothesis that people tip more as the day goes on turn out to be warranted?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6aaf603f-5099-457c-99eb-bf8370583a0c",
+ "metadata": {
+ "id": "6aaf603f-5099-457c-99eb-bf8370583a0c"
+ },
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "c885f77c-db5c-462b-8231-663e56f7f497",
+ "metadata": {
+ "id": "c885f77c-db5c-462b-8231-663e56f7f497"
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.2"
+ },
+ "colab": {
+ "provenance": []
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file