diff --git a/churn/1_churn_feature_pipeline.ipynb b/churn/1_churn_feature_pipeline.ipynb new file mode 100644 index 0000000..dd3006b --- /dev/null +++ b/churn/1_churn_feature_pipeline.ipynb @@ -0,0 +1,1800 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2de72615", + "metadata": { + "id": "2de72615" + }, + "source": [ + "# **Hopsworks Feature Store** - Part 01: Feature Pipeline\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/churn/1_churn_feature_pipeline.ipynb)\n", + "\n", + "\n", + "## 🗒️ This notebook is divided into the following sections:\n", + "1. Loading the data and feature engineering.\n", + "2. Connect to the Hopsworks feature store.\n", + "3. Create feature groups and upload them to the feature store.\n", + "\n", + "\n", + "![tutorial-flow](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/images/01_featuregroups.png?raw=1)\n", + "\n", + "First of all you will load the data and do some feature engineering on it.\n" + ] + }, + { + "cell_type": "markdown", + "id": "e08a069b", + "metadata": { + "id": "e08a069b" + }, + "source": [ + "The data you will use comes from three different CSV files:\n", + "\n", + "- `demography.csv`: demographic informations.\n", + "- `customer_info.csv`: customer information such as contract type, billing methods and monthly charges as well as whether customer has churned within the last month.\n", + "- `subscriptions.csv`: customer subscription to services such as internet, mobile or movie streaming.\n", + "\n", + "You can conceptualize these CSV files as originating from separate data sources.\n", + "**All three files have a customer id column `customerid` in common, which you can use for joins.**\n", + "\n", + "Let's go ahead and load the data." + ] + }, + { + "cell_type": "markdown", + "id": "90f5f948", + "metadata": { + "id": "90f5f948" + }, + "source": [ + "### 📝 Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "859fbe6b", + "metadata": { + "id": "859fbe6b", + "outputId": "509c2374-cfa9-477a-98c6-0aa52d3444ff", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[33mWARNING: Skipping pyspark as it is not installed.\u001b[0m\u001b[33m\n", + "\u001b[0mRequirement already satisfied: hopsworks[python] in /usr/local/lib/python3.10/dist-packages (4.1.3)\n", + "Requirement already satisfied: pyhumps==1.6.1 in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (1.6.1)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (2.32.3)\n", + "Requirement already satisfied: furl in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (2.1.3)\n", + "Requirement already satisfied: boto3 in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (1.35.76)\n", + "Requirement already satisfied: pandas<2.2.0 in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (2.1.4)\n", + "Requirement already satisfied: pyjks in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (20.0.0)\n", + "Requirement already satisfied: mock in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (5.1.0)\n", + "Requirement already satisfied: avro==1.11.3 in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (1.11.3)\n", + "Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (2.0.29)\n", + "Requirement already satisfied: PyMySQL[rsa] in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (1.1.1)\n", + "Requirement already satisfied: tzlocal in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (5.2)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (2024.10.0)\n", + "Requirement already satisfied: retrying in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (1.3.4)\n", + "Requirement already satisfied: hopsworks_aiomysql==0.2.1 in /usr/local/lib/python3.10/dist-packages (from hopsworks_aiomysql[sa]==0.2.1->hopsworks[python]) (0.2.1)\n", + "Requirement already satisfied: opensearch-py<=2.4.2,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (2.4.2)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (4.66.6)\n", + "Requirement already satisfied: grpcio<2.0.0,>=1.49.1 in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (1.68.1)\n", + "Requirement already satisfied: protobuf<5.0.0,>=4.25.4 in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (4.25.5)\n", + "Requirement already satisfied: numpy<2 in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (1.26.4)\n", + "Requirement already satisfied: pyarrow>=10.0 in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (17.0.0)\n", + "Requirement already satisfied: confluent-kafka<=2.3.0 in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (2.3.0)\n", + "Requirement already satisfied: fastavro<=1.8.4,>=1.4.11 in /usr/local/lib/python3.10/dist-packages (from hopsworks[python]) (1.8.4)\n", + "Requirement already satisfied: urllib3>=1.26.18 in /usr/local/lib/python3.10/dist-packages (from opensearch-py<=2.4.2,>=1.1.0->hopsworks[python]) (2.2.3)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from opensearch-py<=2.4.2,>=1.1.0->hopsworks[python]) (1.16.0)\n", + "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from opensearch-py<=2.4.2,>=1.1.0->hopsworks[python]) (2.8.2)\n", + "Requirement already satisfied: certifi>=2022.12.07 in /usr/local/lib/python3.10/dist-packages (from opensearch-py<=2.4.2,>=1.1.0->hopsworks[python]) (2024.8.30)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2.0->hopsworks[python]) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2.0->hopsworks[python]) (2024.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->hopsworks[python]) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->hopsworks[python]) (3.10)\n", + "Requirement already satisfied: typing-extensions>=4.6.0 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy->hopsworks[python]) (4.12.2)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy->hopsworks[python]) (3.1.1)\n", + "Requirement already satisfied: botocore<1.36.0,>=1.35.76 in /usr/local/lib/python3.10/dist-packages (from boto3->hopsworks[python]) (1.35.76)\n", + "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from boto3->hopsworks[python]) (1.0.1)\n", + "Requirement already satisfied: s3transfer<0.11.0,>=0.10.0 in /usr/local/lib/python3.10/dist-packages (from boto3->hopsworks[python]) (0.10.4)\n", + "Requirement already satisfied: orderedmultidict>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from furl->hopsworks[python]) (1.0.1)\n", + "Requirement already satisfied: javaobj-py3 in /usr/local/lib/python3.10/dist-packages (from pyjks->hopsworks[python]) (0.4.4)\n", + "Requirement already satisfied: pyasn1>=0.3.5 in /usr/local/lib/python3.10/dist-packages (from pyjks->hopsworks[python]) (0.6.1)\n", + "Requirement already satisfied: pyasn1-modules in /usr/local/lib/python3.10/dist-packages (from pyjks->hopsworks[python]) (0.4.1)\n", + "Requirement already satisfied: pycryptodomex in /usr/local/lib/python3.10/dist-packages (from pyjks->hopsworks[python]) (3.21.0)\n", + "Requirement already satisfied: twofish in /usr/local/lib/python3.10/dist-packages (from pyjks->hopsworks[python]) (0.3.0)\n", + "Requirement already satisfied: cryptography in /usr/local/lib/python3.10/dist-packages (from PyMySQL[rsa]->hopsworks[python]) (43.0.3)\n", + "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography->PyMySQL[rsa]->hopsworks[python]) (1.17.1)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography->PyMySQL[rsa]->hopsworks[python]) (2.22)\n" + ] + } + ], + "source": [ + "#pip install hopsworks==4.1.3\n", + "\n", + "!pip uninstall pyspark\n", + "#!pip uninstall hopsworks\n", + "!pip install hopsworks[python]" + ] + }, + { + "cell_type": "code", + "source": [ + "pip show hopsworks\n" + ], + "metadata": { + "id": "U0DXdl9X7dM-", + "outputId": "167ea22b-916b-4833-98f4-03b300a49f5a", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "U0DXdl9X7dM-", + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Name: hopsworks\n", + "Version: 4.1.3\n", + "Summary: Hopsworks Python SDK to interact with Hopsworks Platform, Feature Store, Model Registry and Model Serving\n", + "Home-page: https://www.hopsworks.ai\n", + "Author: \n", + "Author-email: Hopsworks AB \n", + "License: Apache-2.0\n", + "Location: /usr/local/lib/python3.10/dist-packages\n", + "Requires: avro, boto3, fsspec, furl, grpcio, hopsworks_aiomysql, mock, opensearch-py, pandas, protobuf, pyhumps, pyjks, PyMySQL, requests, retrying, sqlalchemy, tqdm, tzlocal\n", + "Required-by: \n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f73d7642", + "metadata": { + "id": "f73d7642" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "# Ignore warnings\n", + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "markdown", + "id": "d7d39005", + "metadata": { + "id": "d7d39005" + }, + "source": [ + "## 💽 Loading the Data \n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "abff9db7", + "metadata": { + "id": "abff9db7" + }, + "outputs": [], + "source": [ + "# Read demography data\n", + "demography_df = pd.read_csv(\"https://repo.hops.works/dev/davit/churn/demography.csv/")\n", + "\n", + "# Read customer info data with datetime parsing\n", + "customer_info_df = pd.read_csv(\n", + " \"https://repo.hops.works/dev/davit/churn/customer_info.csv/",\n", + " parse_dates=['datetime'],\n", + ")\n", + "\n", + "# Read subscriptions data with datetime parsing\n", + "subscriptions_df = pd.read_csv(\n", + " \"https://repo.hops.works/dev/davit/churn/subscriptions.csv/",\n", + " parse_dates=['datetime'],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "47371dd8", + "metadata": { + "id": "47371dd8", + "outputId": "e956fc5f-3185-4785-ee8d-903c876d6069", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " customerID gender SeniorCitizen Dependents Partner\n", + "0 7590-VHVEG Female 0 No Yes\n", + "1 5575-GNVDE Male 0 No No\n", + "2 3668-QPYBK Male 0 No No" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerIDgenderSeniorCitizenDependentsPartner
07590-VHVEGFemale0NoYes
15575-GNVDEMale0NoNo
23668-QPYBKMale0NoNo
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "demography_df", + "summary": "{\n \"name\": \"demography_df\",\n \"rows\": 7043,\n \"fields\": [\n {\n \"column\": \"customerID\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7043,\n \"samples\": [\n \"1024-GUALD\",\n \"0484-JPBRU\",\n \"3620-EHIMZ\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"gender\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Male\",\n \"Female\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"SeniorCitizen\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Dependents\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Yes\",\n \"No\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Partner\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"No\",\n \"Yes\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "demography_df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "171c0a5d", + "metadata": { + "id": "171c0a5d", + "outputId": "ecdcdc09-c850-4b09-f9d1-625316cbf865", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 195 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " customerID Contract tenure PaymentMethod PaperlessBilling \\\n", + "0 7590-VHVEG Month-to-month 1 Electronic check Yes \n", + "1 5575-GNVDE One year 34 Mailed check No \n", + "2 3668-QPYBK Month-to-month 2 Mailed check Yes \n", + "\n", + " MonthlyCharges TotalCharges Churn datetime \n", + "0 29.85 29.85 No 2021-10-25 15:07:18.625390512 \n", + "1 56.95 1889.5 No 2020-06-28 06:32:24.674808292 \n", + "2 53.85 108.15 Yes 2021-12-05 20:10:58.449304176 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerIDContracttenurePaymentMethodPaperlessBillingMonthlyChargesTotalChargesChurndatetime
07590-VHVEGMonth-to-month1Electronic checkYes29.8529.85No2021-10-25 15:07:18.625390512
15575-GNVDEOne year34Mailed checkNo56.951889.5No2020-06-28 06:32:24.674808292
23668-QPYBKMonth-to-month2Mailed checkYes53.85108.15Yes2021-12-05 20:10:58.449304176
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "customer_info_df", + "summary": "{\n \"name\": \"customer_info_df\",\n \"rows\": 7043,\n \"fields\": [\n {\n \"column\": \"customerID\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7043,\n \"samples\": [\n \"1024-GUALD\",\n \"0484-JPBRU\",\n \"3620-EHIMZ\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Contract\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Month-to-month\",\n \"One year\",\n \"Two year\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"tenure\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24,\n \"min\": 0,\n \"max\": 72,\n \"num_unique_values\": 73,\n \"samples\": [\n 8,\n 40,\n 12\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"PaymentMethod\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Mailed check\",\n \"Credit card (automatic)\",\n \"Electronic check\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"PaperlessBilling\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"No\",\n \"Yes\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"MonthlyCharges\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 30.09004709767854,\n \"min\": 18.25,\n \"max\": 118.75,\n \"num_unique_values\": 1585,\n \"samples\": [\n 48.85,\n 20.05\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"TotalCharges\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 6531,\n \"samples\": [\n \"4600.7\",\n \"20.35\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Churn\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Yes\",\n \"No\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"datetime\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"2020-01-01 00:00:00\",\n \"max\": \"2022-01-01 00:00:00\",\n \"num_unique_values\": 7043,\n \"samples\": [\n \"2020-06-25 13:45:55.467196818\",\n \"2021-08-22 16:27:28.168134048\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 6 + } + ], + "source": [ + "customer_info_df.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "58f7c4ef", + "metadata": { + "id": "58f7c4ef", + "outputId": "4716da4a-d46a-4466-a10e-5e558d7b50c3", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 215 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " customerID DeviceProtection OnlineBackup OnlineSecurity InternetService \\\n", + "0 7590-VHVEG No Yes No DSL \n", + "1 5575-GNVDE Yes No Yes DSL \n", + "2 3668-QPYBK No Yes Yes DSL \n", + "\n", + " MultipleLines PhoneService TechSupport StreamingMovies StreamingTV \\\n", + "0 No phone service No No No No \n", + "1 No Yes No No No \n", + "2 No Yes No No No \n", + "\n", + " datetime \n", + "0 2021-10-25 15:07:18.625390512 \n", + "1 2020-06-28 06:32:24.674808292 \n", + "2 2021-12-05 20:10:58.449304176 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customerIDDeviceProtectionOnlineBackupOnlineSecurityInternetServiceMultipleLinesPhoneServiceTechSupportStreamingMoviesStreamingTVdatetime
07590-VHVEGNoYesNoDSLNo phone serviceNoNoNoNo2021-10-25 15:07:18.625390512
15575-GNVDEYesNoYesDSLNoYesNoNoNo2020-06-28 06:32:24.674808292
23668-QPYBKNoYesYesDSLNoYesNoNoNo2021-12-05 20:10:58.449304176
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "subscriptions_df", + "summary": "{\n \"name\": \"subscriptions_df\",\n \"rows\": 7043,\n \"fields\": [\n {\n \"column\": \"customerID\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 7043,\n \"samples\": [\n \"1024-GUALD\",\n \"0484-JPBRU\",\n \"3620-EHIMZ\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"DeviceProtection\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"No\",\n \"Yes\",\n \"No internet service\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"OnlineBackup\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"Yes\",\n \"No\",\n \"No internet service\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"OnlineSecurity\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"No\",\n \"Yes\",\n \"No internet service\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"InternetService\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"DSL\",\n \"Fiber optic\",\n \"No\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"MultipleLines\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"No phone service\",\n \"No\",\n \"Yes\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"PhoneService\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Yes\",\n \"No\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"TechSupport\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"No\",\n \"Yes\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"StreamingMovies\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"No\",\n \"Yes\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"StreamingTV\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"No\",\n \"Yes\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"datetime\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"2020-01-01 00:00:00\",\n \"max\": \"2022-01-01 00:00:00\",\n \"num_unique_values\": 7043,\n \"samples\": [\n \"2020-06-25 13:45:55.467196818\",\n \"2021-08-22 16:27:28.168134048\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 7 + } + ], + "source": [ + "subscriptions_df.head(3)" + ] + }, + { + "cell_type": "code", + "source": [ + "#type(customer_info_df)\n", + "customer_info_df.dtypes" + ], + "metadata": { + "id": "r3o2qKCx4pgn", + "outputId": "16144522-5bbb-49e2-85c2-1579b220a2dd", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 366 + } + }, + "id": "r3o2qKCx4pgn", + "execution_count": 18, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "customerID object\n", + "Contract object\n", + "tenure int64\n", + "PaymentMethod object\n", + "PaperlessBilling object\n", + "MonthlyCharges float64\n", + "TotalCharges float64\n", + "Churn int64\n", + "datetime datetime64[ns]\n", + "dtype: object" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
customerIDobject
Contractobject
tenureint64
PaymentMethodobject
PaperlessBillingobject
MonthlyChargesfloat64
TotalChargesfloat64
Churnint64
datetimedatetime64[ns]
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 18 + } + ] + }, + { + "cell_type": "markdown", + "id": "5a7a2f89", + "metadata": { + "id": "5a7a2f89" + }, + "source": [ + "---\n", + "## 🛠️ Feature Engineering \n", + "\n", + "In this section you will perform feature engineering, such as converting textual features to numerical features and replacing missing values to 0s. Let's start with the Customer information feature group." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "4d0ddecb", + "metadata": { + "id": "4d0ddecb" + }, + "outputs": [], + "source": [ + "# Convert the \"TotalCharges\" column to numeric, treating errors as NaN\n", + "customer_info_df[\"TotalCharges\"] = pd.to_numeric(\n", + " customer_info_df[\"TotalCharges\"],\n", + " errors='coerce',\n", + ")\n", + "\n", + "# Replace NaN values in the \"TotalCharges\" column with 0\n", + "customer_info_df[\"TotalCharges\"].fillna(0, inplace=True)\n", + "\n", + "# Replace values in the \"Churn\" column with 0 for \"No\" and 1 for \"Yes\"\n", + "customer_info_df[\"Churn\"].replace({\"No\": 0, \"Yes\": 1}, inplace=True)" + ] + }, + { + "cell_type": "markdown", + "id": "2ca91d6f", + "metadata": { + "id": "2ca91d6f" + }, + "source": [ + "---\n", + "## 🪄 Creating Feature Groups \n", + "\n", + "A [feature group](https://docs.hopsworks.ai/feature-store-api/latest/generated/feature_group/) can be seen as a collection of conceptually related features. In this case, you will create 3 feature groups:\n", + "1. Customer information\n", + "2. Customer demography\n", + "3. Customer subscibtion\n", + "\n", + "As you can see feature groups are related to their source data. These feature groups have the same column as a primary key, which will allow you to join them when creating a dataset in the next tutorial.\n", + "\n", + "Before you can create a feature group you need to connect to Hopsworks feature store." + ] + }, + { + "cell_type": "code", + "source": [ + "customer_info_df.dtypes" + ], + "metadata": { + "id": "1OLIodrN5i6J", + "outputId": "bd507ad4-cc6c-4e4f-e21b-c2792aaa65b4", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 366 + } + }, + "id": "1OLIodrN5i6J", + "execution_count": 10, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "customerID object\n", + "Contract object\n", + "tenure int64\n", + "PaymentMethod object\n", + "PaperlessBilling object\n", + "MonthlyCharges float64\n", + "TotalCharges float64\n", + "Churn int64\n", + "datetime datetime64[ns]\n", + "dtype: object" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0
customerIDobject
Contractobject
tenureint64
PaymentMethodobject
PaperlessBillingobject
MonthlyChargesfloat64
TotalChargesfloat64
Churnint64
datetimedatetime64[ns]
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 10 + } + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "2f835078", + "metadata": { + "id": "2f835078", + "outputId": "3159b3ea-8aef-4b43-81e8-1bb2342ddeb6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1193142\n" + ] + } + ], + "source": [ + "import hopsworks\n", + "\n", + "project = hopsworks.login()\n", + "\n", + "fs = project.get_feature_store()" + ] + }, + { + "cell_type": "markdown", + "id": "2195a094", + "metadata": { + "id": "2195a094" + }, + "source": [ + "To create a feature group you need to give it a name and specify a primary key. It is also good to provide a description of the contents of the feature group." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "5be5d163", + "metadata": { + "id": "5be5d163" + }, + "outputs": [], + "source": [ + "# Get or create the 'customer_info' feature group\n", + "customer_info_fg = fs.get_or_create_feature_group(\n", + " name=\"customer_info\",\n", + " version=2,\n", + " description=\"Customer info for churn prediction.\",\n", + " primary_key=['customerID'],\n", + " event_time=\"datetime\",\n", + ")" + ] + }, + { + "cell_type": "code", + "source": [ + "customer_info_fg.to_dict()" + ], + "metadata": { + "id": "1ecxRSbR54eI", + "outputId": "d2063252-47c8-47b8-be6a-947ee0ad63a4", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "1ecxRSbR54eI", + "execution_count": 14, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'id': 1379683,\n", + " 'name': 'customer_info',\n", + " 'version': 2,\n", + " 'description': 'Customer info for churn prediction.',\n", + " 'onlineEnabled': False,\n", + " 'timeTravelFormat': 'HUDI',\n", + " 'features': [Feature('customerid', 'string', None, True, False, None, None, 1379683),\n", + " Feature('contract', 'string', None, False, False, None, None, 1379683),\n", + " Feature('tenure', 'bigint', None, False, False, None, None, 1379683),\n", + " Feature('paymentmethod', 'string', None, False, False, None, None, 1379683),\n", + " Feature('paperlessbilling', 'string', None, False, False, None, None, 1379683),\n", + " Feature('monthlycharges', 'double', None, False, False, None, None, 1379683),\n", + " Feature('totalcharges', 'double', None, False, False, None, None, 1379683),\n", + " Feature('churn', 'bigint', None, False, False, None, None, 1379683),\n", + " Feature('datetime', 'timestamp', None, False, False, None, None, 1379683)],\n", + " 'featurestoreId': 1182819,\n", + " 'type': 'streamFeatureGroupDTO',\n", + " 'statisticsConfig': StatisticsConfig(True, False, False, False, []),\n", + " 'eventTime': 'datetime',\n", + " 'expectationSuite': None,\n", + " 'parents': None,\n", + " 'topicName': None,\n", + " 'notificationTopicName': None,\n", + " 'deprecated': False,\n", + " 'transformationFunctions': [],\n", + " 'path': None,\n", + " 'onlineConfig': {'onlineComments': [], 'tableSpace': None},\n", + " 'deltaStreamerJobConf': None}" + ] + }, + "metadata": {}, + "execution_count": 14 + } + ] + }, + { + "cell_type": "markdown", + "id": "9454e269", + "metadata": { + "id": "9454e269" + }, + "source": [ + "A full list of arguments can be found in the [documentation](https://docs.hopsworks.ai/feature-store-api/latest/generated/api/feature_store_api/#create_feature_group).\n", + "\n", + "At this point, you have only specified some metadata for the feature group. It does not store any data or even have a schema defined for the data. To make the feature group persistent you need to populate it with its associated data using the `insert` function." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "c820fdb0", + "metadata": { + "id": "c820fdb0", + "outputId": "cab00529-b922-44f3-d664-b6f64b2b3b4c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Uploading Dataframe: 100.00% |██████████| Rows 7043/7043 | Elapsed Time: 00:01 | Remaining Time: 00:00\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Launching job: customer_info_2_offline_fg_materialization\n", + "Job started successfully, you can follow the progress at \n", + "https://c.app.hopsworks.ai/p/1193142/jobs/named/customer_info_2_offline_fg_materialization/executions/n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(Job('customer_info_2_offline_fg_materialization', 'SPARK'), None)" + ] + }, + "metadata": {}, + "execution_count": 15 + } + ], + "source": [ + "# Insert data into feature group\n", + "customer_info_fg.insert(customer_info_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "9a751ee7", + "metadata": { + "id": "9a751ee7" + }, + "outputs": [], + "source": [ + "# Update feature descriptions\n", + "feature_descriptions = [\n", + " {\"name\": \"customerid\", \"description\": \"Customer id\"},\n", + " {\"name\": \"contract\", \"description\": \"Type of contact\"},\n", + " {\"name\": \"tenure\", \"description\": \"How long they’ve been a customer\"},\n", + " {\"name\": \"paymentmethod\", \"description\": \"Payment method\"},\n", + " {\"name\": \"paperlessbilling\", \"description\": \"Whether customer has paperless billing or not\"},\n", + " {\"name\": \"monthlycharges\", \"description\": \"Monthly charges\"},\n", + " {\"name\": \"totalcharges\", \"description\": \"Total charges\"},\n", + " {\"name\": \"churn\", \"description\": \"Whether customer has left within the last month or not\"},\n", + " {\"name\": \"datetime\", \"description\": \"Date when the customer information was recorded\"},\n", + "]\n", + "\n", + "for desc in feature_descriptions:\n", + " customer_info_fg.update_feature_description(desc[\"name\"], desc[\"description\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "e97881ff", + "metadata": { + "id": "e97881ff", + "outputId": "aa116e8b-92be-4b59-82dd-4be28c950887", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Uploading Dataframe: 100.00% |██████████| Rows 7043/7043 | Elapsed Time: 00:01 | Remaining Time: 00:00\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Launching job: customer_demography_info_2_offline_fg_materialization\n", + "Job started successfully, you can follow the progress at \n", + "https://c.app.hopsworks.ai/p/1193142/jobs/named/customer_demography_info_2_offline_fg_materialization/executions/n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "(Job('customer_demography_info_2_offline_fg_materialization', 'SPARK'), None)" + ] + }, + "metadata": {}, + "execution_count": 17 + } + ], + "source": [ + "# Get or create the 'customer_demography_info' feature group\n", + "demography_fg = fs.get_or_create_feature_group(\n", + " name=\"customer_demography_info\",\n", + " version=2,\n", + " description=\"Customer demography info for churn prediction.\",\n", + " primary_key=['customerID'],\n", + ")\n", + "# Insert data into feature group\n", + "demography_fg.insert(demography_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cbabcd85", + "metadata": { + "id": "cbabcd85" + }, + "outputs": [], + "source": [ + "# Update feature descriptions\n", + "feature_descriptions = [\n", + " {\"name\": \"customerid\", \"description\": \"Customer id\"},\n", + " {\"name\": \"gender\", \"description\": \"Customer gender\"},\n", + " {\"name\": \"seniorcitizen\", \"description\": \"Whether customer is a senior citizen or not\"},\n", + " {\"name\": \"dependents\", \"description\": \"Whether customer has dependents or not\"},\n", + " {\"name\": \"partner\", \"description\": \"Whether customer has partners or not\"},\n", + "]\n", + "\n", + "for desc in feature_descriptions:\n", + " demography_fg.update_feature_description(desc[\"name\"], desc[\"description\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d43fb2e", + "metadata": { + "id": "0d43fb2e" + }, + "outputs": [], + "source": [ + "# Get or create the 'customer_subscription_info' feature group\n", + "subscriptions_fg = fs.get_or_create_feature_group(\n", + " name=\"customer_subscription_info\",\n", + " version=1,\n", + " description=\"Customer subscription info for churn prediction.\",\n", + " primary_key=['customerID'],\n", + " event_time=\"datetime\",\n", + ")\n", + "# Insert data into feature group\n", + "subscriptions_fg.insert(subscriptions_df)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "472be52b", + "metadata": { + "id": "472be52b" + }, + "outputs": [], + "source": [ + "# Update feature descriptions\n", + "feature_descriptions = [\n", + " {\"name\": \"customerid\", \"description\": \"Customer id\"},\n", + " {\"name\": \"deviceprotection\", \"description\": \"Whether customer has signed up for device protection service\"},\n", + " {\"name\": \"onlinebackup\", \"description\": \"Whether customer has signed up for online backup service\"},\n", + " {\"name\": \"onlinesecurity\", \"description\": \"Whether customer has signed up for online security service\"},\n", + " {\"name\": \"internetservice\", \"description\": \"Whether customer has signed up for internet service\"},\n", + " {\"name\": \"multiplelines\", \"description\": \"Whether customer has signed up for multiple lines service\"},\n", + " {\"name\": \"phoneservice\", \"description\": \"Whether customer has signed up for phone service\"},\n", + " {\"name\": \"techsupport\", \"description\": \"Whether customer has signed up for tech support service\"},\n", + " {\"name\": \"streamingmovies\", \"description\": \"Whether customer has signed up for streaming movies service\"},\n", + " {\"name\": \"streamingtv\", \"description\": \"Whether customer has signed up for streaming TV service\"},\n", + " {\"name\": \"datetime\", \"description\": \"Date when the customer information was recorded\"},\n", + "]\n", + "\n", + "for desc in feature_descriptions:\n", + " subscriptions_fg.update_feature_description(desc[\"name\"], desc[\"description\"])" + ] + }, + { + "cell_type": "markdown", + "id": "0f38854f", + "metadata": { + "id": "0f38854f" + }, + "source": [ + "All three feature groups are now accessible and searchable in the UI\n", + "\n", + "![fg-overview](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/churn/images/churn_fg.gif?raw=1)" + ] + }, + { + "cell_type": "markdown", + "id": "817cab5b", + "metadata": { + "id": "817cab5b" + }, + "source": [ + "---\n", + "## ⏭️ **Next:** Part 02 \n", + "\n", + "In the following notebook you will use your feature groups to create a train dataset, train a model and add a trained model to model registry.\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/churn/2_churn_training_pipeline.ipynb)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/churn/2_churn_training_pipeline.ipynb b/churn/2_churn_training_pipeline.ipynb new file mode 100644 index 0000000..712b4cc --- /dev/null +++ b/churn/2_churn_training_pipeline.ipynb @@ -0,0 +1,3892 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "irdt2-haMbZI" + }, + "source": [ + "# **Hopsworks Feature Store** - Part 02: Training Pipeline\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/churn/2_churn_training_pipeline.ipynb)\n", + "\n", + "This is the second part of the quick start series of tutorials about predicting customers that are at risk of churning with the Hopsworks Feature Store.\n", + "\n", + "This notebook explains how to read from a feature group and create training dataset within the feature store.\n", + "\n", + "You will train the model using XGBoost model, although it could just as well be trained with other machine learning frameworks such as Scikit-learn, PySpark, TensorFlow, and PyTorch. You will also perform some of the exploration that can be done in Hopsworks, notably the search functions and the lineage.\n", + "\n", + "## 🗒️ This notebook is divided into the following sections:\n", + "1. Select the features you want to train the model on.\n", + "2. Preprocess of features.\n", + "3. Create a dataset split for training and validation data.\n", + "4. Load the training data.\n", + "5. Train the model.\n", + "6. Explore feature groups and views via the UI.\n", + "\n", + "![tutorial-flow](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/images/03_model.png?raw=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "na3WoaPOMbZK", + "outputId": "2c8f570c-76d7-46a4-f968-240fabc73b83", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting hopsworks==4.1.3\n", + " Downloading hopsworks-4.1.3-py3-none-any.whl.metadata (11 kB)\n", + "Collecting pyhumps==1.6.1 (from hopsworks==4.1.3)\n", + " Downloading pyhumps-1.6.1-py3-none-any.whl.metadata (3.7 kB)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from hopsworks==4.1.3) (2.32.3)\n", + "Collecting furl (from hopsworks==4.1.3)\n", + " Downloading furl-2.1.3-py2.py3-none-any.whl.metadata (1.2 kB)\n", + "Collecting boto3 (from hopsworks==4.1.3)\n", + " Downloading boto3-1.35.76-py3-none-any.whl.metadata (6.7 kB)\n", + "Collecting pandas<2.2.0 (from hopsworks==4.1.3)\n", + " Downloading pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n", + "Collecting pyjks (from hopsworks==4.1.3)\n", + " Downloading pyjks-20.0.0-py2.py3-none-any.whl.metadata (1.7 kB)\n", + "Collecting mock (from hopsworks==4.1.3)\n", + " Downloading mock-5.1.0-py3-none-any.whl.metadata (3.0 kB)\n", + "Collecting avro==1.11.3 (from hopsworks==4.1.3)\n", + " Downloading avro-1.11.3.tar.gz (90 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m90.6/90.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", + " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", + " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: sqlalchemy in /usr/local/lib/python3.10/dist-packages (from hopsworks==4.1.3) (2.0.36)\n", + "Collecting PyMySQL[rsa] (from hopsworks==4.1.3)\n", + " Downloading PyMySQL-1.1.1-py3-none-any.whl.metadata (4.4 kB)\n", + "Requirement already satisfied: tzlocal in /usr/local/lib/python3.10/dist-packages (from hopsworks==4.1.3) (5.2)\n", + "Requirement already satisfied: fsspec in /usr/local/lib/python3.10/dist-packages (from hopsworks==4.1.3) (2024.10.0)\n", + "Collecting retrying (from hopsworks==4.1.3)\n", + " Downloading retrying-1.3.4-py3-none-any.whl.metadata (6.9 kB)\n", + "Collecting hopsworks_aiomysql==0.2.1 (from hopsworks_aiomysql[sa]==0.2.1->hopsworks==4.1.3)\n", + " Downloading hopsworks_aiomysql-0.2.1-py3-none-any.whl.metadata (11 kB)\n", + "Collecting opensearch-py<=2.4.2,>=1.1.0 (from hopsworks==4.1.3)\n", + " Downloading opensearch_py-2.4.2-py2.py3-none-any.whl.metadata (6.8 kB)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from hopsworks==4.1.3) (4.66.6)\n", + "Requirement already satisfied: grpcio<2.0.0,>=1.49.1 in /usr/local/lib/python3.10/dist-packages (from hopsworks==4.1.3) (1.68.1)\n", + "Requirement already satisfied: protobuf<5.0.0,>=4.25.4 in /usr/local/lib/python3.10/dist-packages (from hopsworks==4.1.3) (4.25.5)\n", + "Collecting sqlalchemy (from hopsworks==4.1.3)\n", + " Downloading SQLAlchemy-2.0.29-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)\n", + "Requirement already satisfied: urllib3>=1.26.18 in /usr/local/lib/python3.10/dist-packages (from opensearch-py<=2.4.2,>=1.1.0->hopsworks==4.1.3) (2.2.3)\n", + "Requirement already satisfied: six in /usr/local/lib/python3.10/dist-packages (from opensearch-py<=2.4.2,>=1.1.0->hopsworks==4.1.3) (1.16.0)\n", + "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.10/dist-packages (from opensearch-py<=2.4.2,>=1.1.0->hopsworks==4.1.3) (2.8.2)\n", + "Requirement already satisfied: certifi>=2022.12.07 in /usr/local/lib/python3.10/dist-packages (from opensearch-py<=2.4.2,>=1.1.0->hopsworks==4.1.3) (2024.8.30)\n", + "Requirement already satisfied: numpy<2,>=1.22.4 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2.0->hopsworks==4.1.3) (1.26.4)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2.0->hopsworks==4.1.3) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas<2.2.0->hopsworks==4.1.3) (2024.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->hopsworks==4.1.3) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->hopsworks==4.1.3) (3.10)\n", + "Requirement already satisfied: typing-extensions>=4.6.0 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy->hopsworks==4.1.3) (4.12.2)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.10/dist-packages (from sqlalchemy->hopsworks==4.1.3) (3.1.1)\n", + "Collecting botocore<1.36.0,>=1.35.76 (from boto3->hopsworks==4.1.3)\n", + " Downloading botocore-1.35.76-py3-none-any.whl.metadata (5.7 kB)\n", + "Collecting jmespath<2.0.0,>=0.7.1 (from boto3->hopsworks==4.1.3)\n", + " Downloading jmespath-1.0.1-py3-none-any.whl.metadata (7.6 kB)\n", + "Collecting s3transfer<0.11.0,>=0.10.0 (from boto3->hopsworks==4.1.3)\n", + " Downloading s3transfer-0.10.4-py3-none-any.whl.metadata (1.7 kB)\n", + "Collecting orderedmultidict>=1.0.1 (from furl->hopsworks==4.1.3)\n", + " Downloading orderedmultidict-1.0.1-py2.py3-none-any.whl.metadata (1.3 kB)\n", + "Collecting javaobj-py3 (from pyjks->hopsworks==4.1.3)\n", + " Downloading javaobj_py3-0.4.4-py2.py3-none-any.whl.metadata (17 kB)\n", + "Requirement already satisfied: pyasn1>=0.3.5 in /usr/local/lib/python3.10/dist-packages (from pyjks->hopsworks==4.1.3) (0.6.1)\n", + "Requirement already satisfied: pyasn1-modules in /usr/local/lib/python3.10/dist-packages (from pyjks->hopsworks==4.1.3) (0.4.1)\n", + "Collecting pycryptodomex (from pyjks->hopsworks==4.1.3)\n", + " Downloading pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.4 kB)\n", + "Collecting twofish (from pyjks->hopsworks==4.1.3)\n", + " Downloading twofish-0.3.0.tar.gz (26 kB)\n", + " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + "Requirement already satisfied: cryptography in /usr/local/lib/python3.10/dist-packages (from PyMySQL[rsa]->hopsworks==4.1.3) (43.0.3)\n", + "Requirement already satisfied: cffi>=1.12 in /usr/local/lib/python3.10/dist-packages (from cryptography->PyMySQL[rsa]->hopsworks==4.1.3) (1.17.1)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.12->cryptography->PyMySQL[rsa]->hopsworks==4.1.3) (2.22)\n", + "Downloading hopsworks-4.1.3-py3-none-any.whl (640 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m640.1/640.1 kB\u001b[0m \u001b[31m23.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading hopsworks_aiomysql-0.2.1-py3-none-any.whl (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.2/44.2 kB\u001b[0m \u001b[31m2.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pyhumps-1.6.1-py3-none-any.whl (5.0 kB)\n", + "Downloading opensearch_py-2.4.2-py2.py3-none-any.whl (258 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m258.6/258.6 kB\u001b[0m \u001b[31m16.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pandas-2.1.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m78.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading SQLAlchemy-2.0.29-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.1/3.1 MB\u001b[0m \u001b[31m64.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading boto3-1.35.76-py3-none-any.whl (139 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m139.2/139.2 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading furl-2.1.3-py2.py3-none-any.whl (20 kB)\n", + "Downloading mock-5.1.0-py3-none-any.whl (30 kB)\n", + "Downloading pyjks-20.0.0-py2.py3-none-any.whl (45 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.3/45.3 kB\u001b[0m \u001b[31m3.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading retrying-1.3.4-py3-none-any.whl (11 kB)\n", + "Downloading botocore-1.35.76-py3-none-any.whl (13.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.2/13.2 MB\u001b[0m \u001b[31m72.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading jmespath-1.0.1-py3-none-any.whl (20 kB)\n", + "Downloading orderedmultidict-1.0.1-py2.py3-none-any.whl (11 kB)\n", + "Downloading PyMySQL-1.1.1-py3-none-any.whl (44 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.0/45.0 kB\u001b[0m \u001b[31m3.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading s3transfer-0.10.4-py3-none-any.whl (83 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m83.2/83.2 kB\u001b[0m \u001b[31m6.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading javaobj_py3-0.4.4-py2.py3-none-any.whl (57 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.1/57.1 kB\u001b[0m \u001b[31m4.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pycryptodomex-3.21.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.3/2.3 MB\u001b[0m \u001b[31m49.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hBuilding wheels for collected packages: avro, twofish\n", + " Building wheel for avro (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for avro: filename=avro-1.11.3-py2.py3-none-any.whl size=123910 sha256=4814a3a09b5ef5141cbe8cde34fabd8a7e0e7923e81b0cb13e7573ee1f3a0961\n", + " Stored in directory: /root/.cache/pip/wheels/1d/f6/41/0e0399396af07060e64d4e32c8bd259b48b98a4a114df31294\n", + " Building wheel for twofish (setup.py) ... \u001b[?25l\u001b[?25hdone\n", + " Created wheel for twofish: filename=twofish-0.3.0-cp310-cp310-linux_x86_64.whl size=24196 sha256=35949a18d4e7c4fbbb75d7ae982c76ca0a1bd30590839fe469db1db1666bd8e5\n", + " Stored in directory: /root/.cache/pip/wheels/a7/3c/27/c390be4f3e8a299d4b2836f8daa19697eb991eacbfabe25031\n", + "Successfully built avro twofish\n", + "Installing collected packages: twofish, pyhumps, javaobj-py3, sqlalchemy, retrying, PyMySQL, pycryptodomex, orderedmultidict, mock, jmespath, avro, pyjks, pandas, opensearch-py, hopsworks_aiomysql, furl, botocore, s3transfer, boto3, hopsworks\n", + " Attempting uninstall: sqlalchemy\n", + " Found existing installation: SQLAlchemy 2.0.36\n", + " Uninstalling SQLAlchemy-2.0.36:\n", + " Successfully uninstalled SQLAlchemy-2.0.36\n", + " Attempting uninstall: pandas\n", + " Found existing installation: pandas 2.2.2\n", + " Uninstalling pandas-2.2.2:\n", + " Successfully uninstalled pandas-2.2.2\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.1.4 which is incompatible.\n", + "mizani 0.13.0 requires pandas>=2.2.0, but you have pandas 2.1.4 which is incompatible.\n", + "plotnine 0.14.3 requires pandas>=2.2.0, but you have pandas 2.1.4 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed PyMySQL-1.1.1 avro-1.11.3 boto3-1.35.76 botocore-1.35.76 furl-2.1.3 hopsworks-4.1.3 hopsworks_aiomysql-0.2.1 javaobj-py3-0.4.4 jmespath-1.0.1 mock-5.1.0 opensearch-py-2.4.2 orderedmultidict-1.0.1 pandas-2.1.4 pycryptodomex-3.21.0 pyhumps-1.6.1 pyjks-20.0.0 retrying-1.3.4 s3transfer-0.10.4 sqlalchemy-2.0.29 twofish-0.3.0\n" + ] + } + ], + "source": [ + "pip install hopsworks==4.1.3" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DK634D4aMbZK" + }, + "source": [ + "### 📝 Imports" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "2PlhD1v5MbZK" + }, + "outputs": [], + "source": [ + "import os\n", + "from PIL import Image\n", + "\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "from sklearn.metrics import confusion_matrix\n", + "import xgboost as xgb\n", + "\n", + "#ignore warnings\n", + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qWlYTQnIMbZK" + }, + "source": [ + "## 📡 Connecting to Hopsworks Feature Store " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "IbUOC1ptMbZK", + "outputId": "9674c001-da8e-4163-f1cf-da885aeec184", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Copy your Api Key (first register/login): https://c.app.hopsworks.ai/account/api/generated\n", + "\n", + "Paste it here: ··········\n", + "\n", + "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1193142\n" + ] + } + ], + "source": [ + "import hopsworks\n", + "\n", + "project = hopsworks.login()\n", + "\n", + "fs = project.get_feature_store()" + ] + }, + { + "cell_type": "code", + "source": [ + "# existing_feature_view = fs.get_feature_view(\n", + "# name = 'churn_feature_view',\n", + "# version = 1)\n", + "\n", + "#existing_feature_view.delete()" + ], + "metadata": { + "id": "WK_Oh7V9P-Wh" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "16z4YI1FMbZL" + }, + "source": [ + "---\n", + "## 🔪 Feature Selection \n", + "\n", + "You will start by selecting all the features you want to include for model training/inference." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "id": "6DwyP04lMbZL" + }, + "outputs": [], + "source": [ + "# Retrieve feature groups\n", + "customer_info_fg = fs.get_feature_group(\n", + " name=\"customer_info\",\n", + " version=2,\n", + ")\n", + "\n", + "demography_fg = fs.get_feature_group(\n", + " name=\"customer_demography_info\",\n", + " version=2,\n", + ")\n", + "\n", + "subscriptions_fg = fs.get_feature_group(\n", + " name=\"customer_subscription_info\",\n", + " version=1,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "id": "6sWQDlAnMbZL" + }, + "outputs": [], + "source": [ + "# Select features for training data\n", + "selected_features = customer_info_fg.select_except([\"customerid\", \"datetime\"]) \\\n", + " .join(demography_fg.select_except([\"customerid\"])) \\\n", + " .join(subscriptions_fg.select_except([\"datetime\"]))\n", + "\n", + "# uncomment this if you would like to view your selected features\n", + "# selected_features.show(5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rZnRHFxNMbZL" + }, + "source": [ + "Recall that you created three feature groups in the previous notebook. If you had created multiple feature groups with identical schema and wanted to include them in the join you would need to include a prefix argument in the join to avoid feature name clash. See the [documentation](https://docs.hopsworks.ai/feature-store-api/latest/generated/api/query_api/#join) for more details." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SLGGfg7ZMbZL" + }, + "source": [ + "---\n", + "## 🤖 Transformation Functions \n", + "\n", + "You will preprocess the data using *min-max scaling* on numerical features and *label encoding* on categorical features. To do this you will simply define a mapping between features and transformation functions. This ensures that transformation functions such as *min-max scaling* are fitted only on the training data (and not the validation/test data), which ensures that there is no data leakage." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "id": "ImQg2aykMbZL" + }, + "outputs": [], + "source": [ + "# Load transformation functions from the feature store\n", + "min_max_scaler = fs.get_transformation_function(name=\"min_max_scaler\")\n", + "label_encoder = fs.get_transformation_function(name=\"label_encoder\")\n", + "\n", + "# Define lists of numerical and categorical features\n", + "numerical_features = [\"tenure\", \"monthlycharges\", \"totalcharges\"]\n", + "categorical_features = [\n", + " \"multiplelines\", \"internetservice\", \"onlinesecurity\", \"onlinebackup\",\n", + " \"deviceprotection\", \"techsupport\", \"streamingmovies\", \"streamingtv\",\n", + " \"phoneservice\", \"paperlessbilling\", \"contract\", \"paymentmethod\", \"gender\",\n", + " \"dependents\", \"partner\"\n", + "]\n", + "\n", + "# Map features to their corresponding transformation functions\n", + "transformation_functions = []\n", + "\n", + "# For numerical features, use the min_max_scaler transformation\n", + "for feature in numerical_features:\n", + " transformation_functions.append(min_max_scaler(feature))\n", + "\n", + "# For categorical features, use the label_encoder transformation\n", + "for feature in categorical_features:\n", + " transformation_functions.append(label_encoder(feature))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Wdqtn3yQMbZL" + }, + "source": [ + "---\n", + "## ⚙️ Feature View Creation \n", + "\n", + "The Feature Views allows schema in form of a query with filters, define a model target feature/label and additional transformation functions.\n", + "In order to create a Feature View you may use `fs.get_or_create_feature_view()`." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "id": "NX8eQPt9MbZL", + "outputId": "90cba4be-5409-4105-c535-adb54c483500", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Feature view created successfully, explore it at \n", + "https://c.app.hopsworks.ai/p/1193142/fs/1182819/fv/churn_feature_view/version/4/n" + ] + } + ], + "source": [ + "# Get or create the 'churn_feature_view'\n", + "feature_view = fs.get_or_create_feature_view(\n", + " name = 'churn_feature_view',\n", + " version = 4,\n", + " labels=[\"churn\"],\n", + " transformation_functions=transformation_functions,\n", + " query=selected_features,\n", + ")" + ] + }, + { + "cell_type": "code", + "source": [ + "feature_view.features" + ], + "metadata": { + "id": "BNgo6wSpxJuV", + "outputId": "d9c651cf-4139-46f2-b2b9-696b6c6dc9df", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "execution_count": 25, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[Training Dataset Feature('contract', 'string', 0, False, contract, 1379683, None),\n", + " Training Dataset Feature('tenure', 'bigint', 1, False, tenure, 1379683, None),\n", + " Training Dataset Feature('paymentmethod', 'string', 2, False, paymentmethod, 1379683, None),\n", + " Training Dataset Feature('paperlessbilling', 'string', 3, False, paperlessbilling, 1379683, None),\n", + " Training Dataset Feature('monthlycharges', 'double', 4, False, monthlycharges, 1379683, None),\n", + " Training Dataset Feature('totalcharges', 'double', 5, False, totalcharges, 1379683, None),\n", + " Training Dataset Feature('churn', 'bigint', 6, True, churn, 1379683, None),\n", + " Training Dataset Feature('gender', 'string', 7, False, gender, 1379684, None),\n", + " Training Dataset Feature('seniorcitizen', 'bigint', 8, False, seniorcitizen, 1379684, None),\n", + " Training Dataset Feature('dependents', 'string', 9, False, dependents, 1379684, None),\n", + " Training Dataset Feature('partner', 'string', 10, False, partner, 1379684, None),\n", + " Training Dataset Feature('customerid', 'string', 11, False, customerid, 1377754, None),\n", + " Training Dataset Feature('deviceprotection', 'string', 12, False, deviceprotection, 1377754, None),\n", + " Training Dataset Feature('onlinebackup', 'string', 13, False, onlinebackup, 1377754, None),\n", + " Training Dataset Feature('onlinesecurity', 'string', 14, False, onlinesecurity, 1377754, None),\n", + " Training Dataset Feature('internetservice', 'string', 15, False, internetservice, 1377754, None),\n", + " Training Dataset Feature('multiplelines', 'string', 16, False, multiplelines, 1377754, None),\n", + " Training Dataset Feature('phoneservice', 'string', 17, False, phoneservice, 1377754, None),\n", + " Training Dataset Feature('techsupport', 'string', 18, False, techsupport, 1377754, None),\n", + " Training Dataset Feature('streamingmovies', 'string', 19, False, streamingmovies, 1377754, None),\n", + " Training Dataset Feature('streamingtv', 'string', 20, False, streamingtv, 1377754, None)]" + ] + }, + "metadata": {}, + "execution_count": 25 + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6jejXJEnMbZM" + }, + "source": [ + "The feature view is now visible in the UI.\n", + "\n", + "![fv-overview](https://github.com/logicalclocks/hopsworks-tutorials/blob/master/churn/images/churn_tutofv.gif?raw=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NmTY1NIeMbZM" + }, + "source": [ + "---\n", + "## 🏋️ Training Dataset \n" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "id": "4b5aXxmnMbZM", + "outputId": "37daa0a7-d9cc-48ae-a5ec-137a3da2c1d3", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (2.27s) \n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "VersionWarning: Incremented version to `1`.\n" + ] + } + ], + "source": [ + "# Split data into training, validation, and test sets\n", + "X_train, X_val, X_test, y_train, y_val, y_test = feature_view.train_validation_test_split(\n", + " validation_size=0.2,\n", + " test_size=0.1,\n", + ")" + ] + }, + { + "cell_type": "code", + "source": [ + "\n", + "# Drop the 'customerid' column from the training set\n", + "X_train.drop('customerid', axis=1, inplace=True)\n", + "\n", + "# Drop the 'customerid' column from the validation set\n", + "X_val.drop('customerid', axis=1, inplace=True)\n", + "\n", + "# Drop the 'customerid' column from the test set\n", + "X_test.drop('customerid', axis=1, inplace=True)" + ], + "metadata": { + "id": "fg4_yYUNy3do" + }, + "execution_count": 27, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "id": "6-GflMzhMbZM", + "outputId": "ad6e0c05-abf8-46fc-82da-60ba5ca0a3a0", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 163 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " seniorcitizen label_encoder_contract_ label_encoder_dependents_ \\\n", + "0 1 0 0 \n", + "4 0 0 0 \n", + "5 0 1 0 \n", + "\n", + " label_encoder_deviceprotection_ label_encoder_gender_ \\\n", + "0 2 0 \n", + "4 0 1 \n", + "5 0 1 \n", + "\n", + " label_encoder_internetservice_ label_encoder_multiplelines_ \\\n", + "0 1 2 \n", + "4 0 0 \n", + "5 1 2 \n", + "\n", + " label_encoder_onlinebackup_ label_encoder_onlinesecurity_ \\\n", + "0 2 2 \n", + "4 0 2 \n", + "5 2 0 \n", + "\n", + " label_encoder_paperlessbilling_ label_encoder_partner_ \\\n", + "0 1 0 \n", + "4 1 0 \n", + "5 1 0 \n", + "\n", + " label_encoder_paymentmethod_ label_encoder_phoneservice_ \\\n", + "0 2 1 \n", + "4 0 1 \n", + "5 0 1 \n", + "\n", + " label_encoder_streamingmovies_ label_encoder_streamingtv_ \\\n", + "0 0 0 \n", + "4 0 0 \n", + "5 2 2 \n", + "\n", + " label_encoder_techsupport_ min_max_scaler_monthlycharges_ \\\n", + "0 0 0.711443 \n", + "4 0 0.311443 \n", + "5 2 0.865174 \n", + "\n", + " min_max_scaler_tenure_ min_max_scaler_totalcharges_ \n", + "0 0.819444 0.632933 \n", + "4 0.333333 0.139370 \n", + "5 0.958333 0.850457 " + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
seniorcitizenlabel_encoder_contract_label_encoder_dependents_label_encoder_deviceprotection_label_encoder_gender_label_encoder_internetservice_label_encoder_multiplelines_label_encoder_onlinebackup_label_encoder_onlinesecurity_label_encoder_paperlessbilling_label_encoder_partner_label_encoder_paymentmethod_label_encoder_phoneservice_label_encoder_streamingmovies_label_encoder_streamingtv_label_encoder_techsupport_min_max_scaler_monthlycharges_min_max_scaler_tenure_min_max_scaler_totalcharges_
010020122210210000.7114430.8194440.632933
400001000210010000.3114430.3333330.139370
501001122010012220.8651740.9583330.850457
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "X_train", + "summary": "{\n \"name\": \"X_train\",\n \"rows\": 4930,\n \"fields\": [\n {\n \"column\": \"seniorcitizen\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_contract_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_dependents_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_deviceprotection_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_gender_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_internetservice_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_multiplelines_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_onlinebackup_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_onlinesecurity_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 2,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_paperlessbilling_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_partner_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_paymentmethod_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 0,\n \"max\": 3,\n \"num_unique_values\": 4,\n \"samples\": [\n 0,\n 3\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_phoneservice_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_streamingmovies_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_streamingtv_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label_encoder_techsupport_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 2,\n \"num_unique_values\": 3,\n \"samples\": [\n 0,\n 2\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"min_max_scaler_monthlycharges_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.2997495162884068,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 1454,\n \"samples\": [\n 0.05721393034825871,\n 0.9562189054726368\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"min_max_scaler_tenure_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.3412166676186731,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 73,\n \"samples\": [\n 0.2777777777777778,\n 0.4305555555555556\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"min_max_scaler_totalcharges_\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.2610089653322158,\n \"min\": 0.0,\n \"max\": 1.0,\n \"num_unique_values\": 4658,\n \"samples\": [\n 0.008077330508474577,\n 0.1317819638909359\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 28 + } + ], + "source": [ + "X_train.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "id": "FG1SERDaMbZM", + "outputId": "a981c7ec-a742-418f-a07a-4bedc9b25849", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " churn\n", + "0 0\n", + "4 1\n", + "5 0" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
churn
00
41
50
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "\n", + "
\n", + "
\n" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "dataframe", + "variable_name": "y_train", + "summary": "{\n \"name\": \"y_train\",\n \"rows\": 4930,\n \"fields\": [\n {\n \"column\": \"churn\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 1,\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}" + } + }, + "metadata": {}, + "execution_count": 29 + } + ], + "source": [ + "y_train.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "id": "EJSuqEMXMbZM", + "outputId": "5ba9c915-9246-43a6-ff99-fa29b652db0a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 178 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "churn\n", + "0 0.741988\n", + "1 0.258012\n", + "Name: proportion, dtype: float64" + ], + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
proportion
churn
00.741988
10.258012
\n", + "

" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ], + "source": [ + "# Display the normalized value counts of the target variable\n", + "y_train.value_counts(normalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZOjhhi6uMbZM" + }, + "source": [ + "Notice that the distribution is skewed, which is good news for the company considering that customers at risk of churning make up smaller part of customer base. However, as a data scientist should somehow address the class imbalance. There are many approaches for this, such as weighting the loss function, over- or undersampling, creating synthetic data, or modifying the decision threshold. In this example, you will use the simplest method which is to just supply a class weight parameter to our learning algorithm. The class weight will affect how much importance is attached to each class, which in our case means that higher importance will be placed on positive (curn) samples." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rfmzg2inMbZM" + }, + "source": [ + "---\n", + "## 🏃 Train Model\n", + "\n", + "Next you will train a model and set the bigger class weight for the positive class." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "id": "DiFxLoTnMbZM", + "outputId": "87606265-fe9c-412f-9a91-edb804b45562", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 254 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "XGBClassifier(base_score=None, booster=None, callbacks=None,\n", + " colsample_bylevel=None, colsample_bynode=None,\n", + " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", + " enable_categorical=False, eval_metric=None, feature_types=None,\n", + " gamma=None, grow_policy=None, importance_type=None,\n", + " interaction_constraints=None, learning_rate=None, max_bin=None,\n", + " max_cat_threshold=None, max_cat_to_onehot=None,\n", + " max_delta_step=None, max_depth=None, max_leaves=None,\n", + " min_child_weight=None, missing=nan, monotone_constraints=None,\n", + " multi_strategy=None, n_estimators=None, n_jobs=None,\n", + " num_parallel_tree=None, random_state=None, ...)" + ], + "text/html": [ + "
XGBClassifier(base_score=None, booster=None, callbacks=None,\n",
+              "              colsample_bylevel=None, colsample_bynode=None,\n",
+              "              colsample_bytree=None, device=None, early_stopping_rounds=None,\n",
+              "              enable_categorical=False, eval_metric=None, feature_types=None,\n",
+              "              gamma=None, grow_policy=None, importance_type=None,\n",
+              "              interaction_constraints=None, learning_rate=None, max_bin=None,\n",
+              "              max_cat_threshold=None, max_cat_to_onehot=None,\n",
+              "              max_delta_step=None, max_depth=None, max_leaves=None,\n",
+              "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
+              "              multi_strategy=None, n_estimators=None, n_jobs=None,\n",
+              "              num_parallel_tree=None, random_state=None, ...)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ], + "source": [ + "# Create an instance of the XGBClassifier with a specified scale_pos_weight\n", + "model = xgb.XGBClassifier(scale_pos_weight=3)\n", + "\n", + "# Fit the classifier on the training data\n", + "model.fit(X_train, y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Prm6Q20hMbZM" + }, + "source": [ + "---\n", + "## 👨🏻‍⚖️ Model Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "id": "QoCZH7W8MbZM", + "outputId": "8f64193f-504c-4e89-8bca-5b344b980193", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 627 + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": {} + } + ], + "source": [ + "# Generate the confusion matrix using the true labels (y_test) and predicted labels from the classifier\n", + "conf_matrix = confusion_matrix(\n", + " y_test,\n", + " model.predict(X_test),\n", + ").astype(int)\n", + "\n", + "# Create a DataFrame from the confusion matrix results with appropriate labels\n", + "df_cm = pd.DataFrame(\n", + " conf_matrix,\n", + " ['Non Churn', 'Churn'],\n", + " ['Non Churn', 'Churn'],\n", + ")\n", + "\n", + "# Create a heatmap using seaborn with annotations\n", + "figure_cm = plt.figure(figsize=(10, 7))\n", + "figure_cm = sns.heatmap(\n", + " df_cm,\n", + " annot=True,\n", + " annot_kws={\"size\": 14},\n", + " fmt='.10g',\n", + ")\n", + "\n", + "# Set the title for the confusion matrix plot\n", + "plt.title('Confusion Matrix', fontsize=17)\n", + "\n", + "# Display the plot\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wm46sFQhMbZM" + }, + "source": [ + "---\n", + "## 🗄 Model Registry\n", + "\n", + "One of the features in Hopsworks is the model registry. This is where you can store different versions of models and compare their performance." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "id": "I4j3_TisMbZM" + }, + "outputs": [], + "source": [ + "# Get the model registry\n", + "mr = project.get_model_registry()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_If1voO-MbZM" + }, + "source": [ + "### ⚙️ Model Schema\n", + "\n", + "The model needs to be set up with a [Model Schema](https://docs.hopsworks.ai/machine-learning-api/latest/generated/model_schema/), which describes the inputs and outputs for a model.\n", + "\n", + "A Model Schema can be automatically generated from training examples, as shown below." + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "id": "6Er4QCP9MbZM", + "outputId": "86c82dd1-a094-4107-92fe-6a8bcbbf20cc", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'input_schema': {'columnar_schema': [{'name': 'seniorcitizen',\n", + " 'type': 'int64'},\n", + " {'name': 'label_encoder_contract_', 'type': 'int64'},\n", + " {'name': 'label_encoder_dependents_', 'type': 'int64'},\n", + " {'name': 'label_encoder_deviceprotection_', 'type': 'int64'},\n", + " {'name': 'label_encoder_gender_', 'type': 'int64'},\n", + " {'name': 'label_encoder_internetservice_', 'type': 'int64'},\n", + " {'name': 'label_encoder_multiplelines_', 'type': 'int64'},\n", + " {'name': 'label_encoder_onlinebackup_', 'type': 'int64'},\n", + " {'name': 'label_encoder_onlinesecurity_', 'type': 'int64'},\n", + " {'name': 'label_encoder_paperlessbilling_', 'type': 'int64'},\n", + " {'name': 'label_encoder_partner_', 'type': 'int64'},\n", + " {'name': 'label_encoder_paymentmethod_', 'type': 'int64'},\n", + " {'name': 'label_encoder_phoneservice_', 'type': 'int64'},\n", + " {'name': 'label_encoder_streamingmovies_', 'type': 'int64'},\n", + " {'name': 'label_encoder_streamingtv_', 'type': 'int64'},\n", + " {'name': 'label_encoder_techsupport_', 'type': 'int64'},\n", + " {'name': 'min_max_scaler_monthlycharges_', 'type': 'float64'},\n", + " {'name': 'min_max_scaler_tenure_', 'type': 'float64'},\n", + " {'name': 'min_max_scaler_totalcharges_', 'type': 'float64'}]},\n", + " 'output_schema': {'columnar_schema': [{'name': 'churn', 'type': 'int64'}]}}" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ], + "source": [ + "from hsml.schema import Schema\n", + "from hsml.model_schema import ModelSchema\n", + "\n", + "# Create input schema using X_train\n", + "input_schema = Schema(X_train)\n", + "\n", + "# Create output schema using y_train\n", + "output_schema = Schema(y_train)\n", + "\n", + "# Create a ModelSchema object specifying the input and output schemas\n", + "model_schema = ModelSchema(\n", + " input_schema=input_schema,\n", + " output_schema=output_schema,\n", + ")\n", + "\n", + "# Convert the model schema to a dictionary\n", + "model_schema.to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "id": "r0S7jr8GMbZM" + }, + "outputs": [], + "source": [ + "# Specify the directory where the model files will be saved\n", + "model_dir = \"churn_model\"\n", + "\n", + "# Check if the directory exists, and create it if it doesn't\n", + "if not os.path.isdir(model_dir):\n", + " os.mkdir(model_dir)\n", + "\n", + "# Save the trained classifier as json file\n", + "model.save_model(model_dir + \"/model.json\")\n", + "\n", + "# Save the confusion matrix heatmap as an image in the model directory\n", + "figure_cm.figure.savefig(model_dir + '/confusion_matrix.png')" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "id": "c4aPY8bXMbZM", + "outputId": "61cf1592-a49d-48ad-b5da-87c8a35db29c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 212, + "referenced_widgets": [ + "041aa107d4a8426fab732f15b6195893", + "8de026318e6a4bd6b0cba00928814853", + "ea7ec3a3c07b4e68af364d3c79c6a07c", + "ed1b301d290f42df986cfb86785aad9c", + "cce27c500e35468e969ab47f067293b4", + "9708e3e7701c4a49bd829ffc59e68005", + "9edb31a3dd7e4babb2c696dd0de32c91", + "36a664baec6048f8b727b46ea38d5a78", + "71bc10de73f34203b79ecf751a8e4112", + "f163e8e597e940fdb45ccf9a5e645bf5", + "edcf01a9d21946a58544ad7565f2d361", + "3a512a7a07304c0a9fe3b7e58e586749", + "23a260026de64a2f9800aec023c730b1", + "50d7d874b8174aa4804b37a0ea70679b", + "0149c99a60694ce7aa6c05ac0a2dd98a", + "72beb1c78c194dc6ba48c9eb49cac32f", + "0698f81033864549a78000138eb728dd", + "1447d97d22b043ffb8d290e2193d0e2a", + "524f4567c132404aa0e7526de6f2362a", + "f13c9fd7c29c4cfc977226241f9bc39c", + "0bbc1651f1014a19af414d3c513d6eca", + "06238a8b7c1b4cd5a65bf79da0a510ff", + "6d6a034754b042099a516d512348ca37", + "23ea3859196e4646980e4e98778ac2fd", + "051b52b329bf42168366cadd055d5666", + "31260fb380ab4021bc7fdfe3480ee713", + "93823d188df64f9397d1e0415bec3af4", + "df90e53f76e2451d89be106722a89a29", + "95fc89f2cfee443682427857c0383c0d", + "8ab4707942074fc59152aaa0fba0da5a", + "090a138523da474db0339029e818cdc5", + "4024d8599f5c45188f4503577d6a5253", + "4e076475630e4204ba938cb63097957f", + "f04f3c1e5dd74347a626e11f7c326996", + "70b25fd6b2b345c28721a609fc9b1d10", + "b3cb15fb8f5544f0befe025056d1104c", + "a0467fe9b797408d8e9f252c32a977bc", + "f6343adfee5a4a868969173db37628ab", + "ad48bf45cfde4343b11d0d68378db86a", + "653f765223194d52894c05957b9b75ab", + "006224eaa5ae48bd9a602ee2309294be", + "24db55739ec348c6bee3bacc587e8a25", + "279927d437934036989d20cca2ae9604", + "86cd8b03926c4482bc932bac08872b0b", + "b857e2ff0c464bcabe29d52d6a1fb3bf", + "59b110f140494eddbf5892d594b1a992", + "0c62ed5525b64afca78732d8b8d71606", + "5220ddbcd47a46f090d95bea703c98f1", + "392b2ffab58545a4944b4895115d8a1a", + "3c7ce09d28344a4c9a49ab9da0e661cf", + "35fad4f5dd974319bae85ca5fa62d17d", + "5e4485a055ea4ec4a6d95389cd36d5c4", + "c1c5d68c0f784717b0e71fe98a570016", + "ab3e640cd6954a888b1ec35a9a628093", + "804b7a67b06b4579989735dd6a9dda4e" + ] + } + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + " 0%| | 0/6 [00:00⏭️ **Next:** Part 03 \n", + "\n", + "In the following notebook you will use your model for batch inference.\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/logicalclocks/hopsworks-tutorials/blob/master/churn/3_churn_batch_inference.ipynb)\n", + "\n", + "---" + ] + }, + { + "cell_type": "code", + "source": [], + "metadata": { + "id": "U_O8Z_uDJaDq" + }, + "execution_count": 36, + "outputs": [] + } + ], + "metadata": { + "interpreter": { + "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.11" + }, + "colab": { + "provenance": [] + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "041aa107d4a8426fab732f15b6195893": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_8de026318e6a4bd6b0cba00928814853", + "IPY_MODEL_ea7ec3a3c07b4e68af364d3c79c6a07c", + "IPY_MODEL_ed1b301d290f42df986cfb86785aad9c" + ], + "layout": "IPY_MODEL_cce27c500e35468e969ab47f067293b4" + } + }, + "8de026318e6a4bd6b0cba00928814853": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9708e3e7701c4a49bd829ffc59e68005", + "placeholder": "​", + "style": "IPY_MODEL_9edb31a3dd7e4babb2c696dd0de32c91", + "value": "Model export complete: 100%" + } + }, + "ea7ec3a3c07b4e68af364d3c79c6a07c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_36a664baec6048f8b727b46ea38d5a78", + "max": 6, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_71bc10de73f34203b79ecf751a8e4112", + "value": 6 + } + }, + "ed1b301d290f42df986cfb86785aad9c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f163e8e597e940fdb45ccf9a5e645bf5", + "placeholder": "​", + "style": "IPY_MODEL_edcf01a9d21946a58544ad7565f2d361", + "value": " 6/6 [00:57<00:00, 10.25s/it]" + } + }, + "cce27c500e35468e969ab47f067293b4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9708e3e7701c4a49bd829ffc59e68005": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "9edb31a3dd7e4babb2c696dd0de32c91": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "36a664baec6048f8b727b46ea38d5a78": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "71bc10de73f34203b79ecf751a8e4112": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "f163e8e597e940fdb45ccf9a5e645bf5": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "edcf01a9d21946a58544ad7565f2d361": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "3a512a7a07304c0a9fe3b7e58e586749": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_23a260026de64a2f9800aec023c730b1", + "IPY_MODEL_50d7d874b8174aa4804b37a0ea70679b", + "IPY_MODEL_0149c99a60694ce7aa6c05ac0a2dd98a" + ], + "layout": "IPY_MODEL_72beb1c78c194dc6ba48c9eb49cac32f" + } + }, + "23a260026de64a2f9800aec023c730b1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0698f81033864549a78000138eb728dd", + "placeholder": "​", + "style": "IPY_MODEL_1447d97d22b043ffb8d290e2193d0e2a", + "value": "Uploading: 100.000%" + } + }, + "50d7d874b8174aa4804b37a0ea70679b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_524f4567c132404aa0e7526de6f2362a", + "max": 22711, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_f13c9fd7c29c4cfc977226241f9bc39c", + "value": 22711 + } + }, + "0149c99a60694ce7aa6c05ac0a2dd98a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0bbc1651f1014a19af414d3c513d6eca", + "placeholder": "​", + "style": "IPY_MODEL_06238a8b7c1b4cd5a65bf79da0a510ff", + "value": " 22711/22711 elapsed<00:12 remaining<00:00" + } + }, + "72beb1c78c194dc6ba48c9eb49cac32f": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "0698f81033864549a78000138eb728dd": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "1447d97d22b043ffb8d290e2193d0e2a": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "524f4567c132404aa0e7526de6f2362a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f13c9fd7c29c4cfc977226241f9bc39c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "0bbc1651f1014a19af414d3c513d6eca": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "06238a8b7c1b4cd5a65bf79da0a510ff": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6d6a034754b042099a516d512348ca37": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_23ea3859196e4646980e4e98778ac2fd", + "IPY_MODEL_051b52b329bf42168366cadd055d5666", + "IPY_MODEL_31260fb380ab4021bc7fdfe3480ee713" + ], + "layout": "IPY_MODEL_93823d188df64f9397d1e0415bec3af4" + } + }, + "23ea3859196e4646980e4e98778ac2fd": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_df90e53f76e2451d89be106722a89a29", + "placeholder": "​", + "style": "IPY_MODEL_95fc89f2cfee443682427857c0383c0d", + "value": "Uploading: 100.000%" + } + }, + "051b52b329bf42168366cadd055d5666": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8ab4707942074fc59152aaa0fba0da5a", + "max": 449813, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_090a138523da474db0339029e818cdc5", + "value": 449813 + } + }, + "31260fb380ab4021bc7fdfe3480ee713": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_4024d8599f5c45188f4503577d6a5253", + "placeholder": "​", + "style": "IPY_MODEL_4e076475630e4204ba938cb63097957f", + "value": " 449813/449813 elapsed<00:03 remaining<00:00" + } + }, + "93823d188df64f9397d1e0415bec3af4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "df90e53f76e2451d89be106722a89a29": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "95fc89f2cfee443682427857c0383c0d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "8ab4707942074fc59152aaa0fba0da5a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "090a138523da474db0339029e818cdc5": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "4024d8599f5c45188f4503577d6a5253": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4e076475630e4204ba938cb63097957f": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "f04f3c1e5dd74347a626e11f7c326996": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_70b25fd6b2b345c28721a609fc9b1d10", + "IPY_MODEL_b3cb15fb8f5544f0befe025056d1104c", + "IPY_MODEL_a0467fe9b797408d8e9f252c32a977bc" + ], + "layout": "IPY_MODEL_f6343adfee5a4a868969173db37628ab" + } + }, + "70b25fd6b2b345c28721a609fc9b1d10": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ad48bf45cfde4343b11d0d68378db86a", + "placeholder": "​", + "style": "IPY_MODEL_653f765223194d52894c05957b9b75ab", + "value": "Uploading: 100.000%" + } + }, + "b3cb15fb8f5544f0befe025056d1104c": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_006224eaa5ae48bd9a602ee2309294be", + "max": 143, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_24db55739ec348c6bee3bacc587e8a25", + "value": 143 + } + }, + "a0467fe9b797408d8e9f252c32a977bc": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_279927d437934036989d20cca2ae9604", + "placeholder": "​", + "style": "IPY_MODEL_86cd8b03926c4482bc932bac08872b0b", + "value": " 143/143 elapsed<00:32 remaining<00:00" + } + }, + "f6343adfee5a4a868969173db37628ab": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "ad48bf45cfde4343b11d0d68378db86a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "653f765223194d52894c05957b9b75ab": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "006224eaa5ae48bd9a602ee2309294be": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "24db55739ec348c6bee3bacc587e8a25": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "279927d437934036989d20cca2ae9604": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "86cd8b03926c4482bc932bac08872b0b": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "b857e2ff0c464bcabe29d52d6a1fb3bf": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_59b110f140494eddbf5892d594b1a992", + "IPY_MODEL_0c62ed5525b64afca78732d8b8d71606", + "IPY_MODEL_5220ddbcd47a46f090d95bea703c98f1" + ], + "layout": "IPY_MODEL_392b2ffab58545a4944b4895115d8a1a" + } + }, + "59b110f140494eddbf5892d594b1a992": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_3c7ce09d28344a4c9a49ab9da0e661cf", + "placeholder": "​", + "style": "IPY_MODEL_35fad4f5dd974319bae85ca5fa62d17d", + "value": "Uploading: 100.000%" + } + }, + "0c62ed5525b64afca78732d8b8d71606": { + "model_module": "@jupyter-widgets/controls", + "model_name": "FloatProgressModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_5e4485a055ea4ec4a6d95389cd36d5c4", + "max": 1841, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_c1c5d68c0f784717b0e71fe98a570016", + "value": 1841 + } + }, + "5220ddbcd47a46f090d95bea703c98f1": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_ab3e640cd6954a888b1ec35a9a628093", + "placeholder": "​", + "style": "IPY_MODEL_804b7a67b06b4579989735dd6a9dda4e", + "value": " 1841/1841 elapsed<00:01 remaining<00:00" + } + }, + "392b2ffab58545a4944b4895115d8a1a": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3c7ce09d28344a4c9a49ab9da0e661cf": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "35fad4f5dd974319bae85ca5fa62d17d": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "5e4485a055ea4ec4a6d95389cd36d5c4": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c1c5d68c0f784717b0e71fe98a570016": { + "model_module": "@jupyter-widgets/controls", + "model_name": "ProgressStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "ab3e640cd6954a888b1ec35a9a628093": { + "model_module": "@jupyter-widgets/base", + "model_name": "LayoutModel", + "model_module_version": "1.2.0", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "804b7a67b06b4579989735dd6a9dda4e": { + "model_module": "@jupyter-widgets/controls", + "model_name": "DescriptionStyleModel", + "model_module_version": "1.5.0", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file