diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile new file mode 100644 index 0000000..5b878b0 --- /dev/null +++ b/.devcontainer/Dockerfile @@ -0,0 +1,22 @@ +# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/python-3-miniconda/.devcontainer/base.Dockerfile + +FROM mcr.microsoft.com/vscode/devcontainers/miniconda:0-3 + +# [Choice] Node.js version: none, lts/*, 16, 14, 12, 10 +ARG NODE_VERSION="none" +RUN if [ "${NODE_VERSION}" != "none" ]; then su vscode -c "umask 0002 && . /usr/local/share/nvm/nvm.sh && nvm install ${NODE_VERSION} 2>&1"; fi + +# Copy environment.yml (if found) to a temp location so we update the environment. Also +# copy "noop.txt" so the COPY instruction does not fail if no environment.yml exists. +COPY environment.yml* .devcontainer/noop.txt /tmp/conda-tmp/ +RUN if [ -f "/tmp/conda-tmp/environment.yml" ]; then umask 0002 && /opt/conda/bin/conda env update -n base -f /tmp/conda-tmp/environment.yml; fi \ + && rm -rf /tmp/conda-tmp + +# [Optional] Uncomment to install a different version of Python than the default +# RUN conda install -y python=3.6 \ +# && pip install --no-cache-dir pipx \ +# && pipx reinstall-all + +# [Optional] Uncomment this section to install additional OS packages. +# RUN apt-get update && export DEBIAN_FRONTEND=noninteractive \ +# && apt-get -y install --no-install-recommends diff --git a/.devcontainer/add-notice.sh b/.devcontainer/add-notice.sh new file mode 100644 index 0000000..c292bc5 --- /dev/null +++ b/.devcontainer/add-notice.sh @@ -0,0 +1,19 @@ +# Display a notice when not running in GitHub Codespaces + +cat << 'EOF' > /usr/local/etc/vscode-dev-containers/conda-notice.txt +When using "conda" from outside of GitHub Codespaces, note the Anaconda repository +contains restrictions on commercial use that may impact certain organizations. See +https://aka.ms/vscode-remote/conda/miniconda + +EOF + +notice_script="$(cat << 'EOF' +if [ -t 1 ] && [ "${IGNORE_NOTICE}" != "true" ] && [ "${TERM_PROGRAM}" = "vscode" ] && [ "${CODESPACES}" != "true" ] && [ ! -f "$HOME/.config/vscode-dev-containers/conda-notice-already-displayed" ]; then + cat "/usr/local/etc/vscode-dev-containers/conda-notice.txt" + mkdir -p "$HOME/.config/vscode-dev-containers" + ((sleep 10s; touch "$HOME/.config/vscode-dev-containers/conda-notice-already-displayed") &) +fi +EOF +)" + +echo "${notice_script}" | tee -a /etc/bash.bashrc >> /etc/zsh/zshrc diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..715c4c7 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,54 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at: +// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.2/containers/python-3-miniconda +{ + "name": "Miniconda (Python 3)", + "build": { + "context": "..", + "dockerfile": "Dockerfile", + "args": { + "NODE_VERSION": "16" + } + }, + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/usr/local/py-utils/bin/autopep8", + "python.formatting.blackPath": "/usr/local/py-utils/bin/black", + "python.formatting.yapfPath": "/usr/local/py-utils/bin/yapf", + "python.linting.banditPath": "/usr/local/py-utils/bin/bandit", + "python.linting.flake8Path": "/usr/local/py-utils/bin/flake8", + "python.linting.mypyPath": "/usr/local/py-utils/bin/mypy", + "python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle", + "python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle", + "python.linting.pylintPath": "/usr/local/py-utils/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": [ + "ms-python.python", + "ms-python.vscode-pylance" + ] + } + }, + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Use 'postCreateCommand' to run commands after the container is created. + // "postCreateCommand": "python --version", + + // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + "remoteUser": "vscode", + "features": { + "docker-in-docker": "latest", + "github-cli": "latest", + "homebrew": "latest" + } +} diff --git a/.devcontainer/noop.txt b/.devcontainer/noop.txt new file mode 100644 index 0000000..abee195 --- /dev/null +++ b/.devcontainer/noop.txt @@ -0,0 +1,3 @@ +This file is copied into the container along with environment.yml* from the +parent folder. This is done to prevent the Dockerfile COPY instruction from +failing if no environment.yml is found. \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..22c2f34 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +notebooks/data/ +notebooks/hymenoptera_data/ +notebooks/tmp/ +notebooks/test.ipynb \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 8d086e0..55d9158 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,16 @@ -# 🐙 ML Notebooks +# 🐙 Machine Learning Notebooks -A series of code examples for all sorts of machine learning tasks and applications. +This repo contains machine learning notebooks for different tasks and applications. The notebooks are meant to be minimal, easily reusable, and extendable. You are free to use them for educational and research purposes. -The notebooks are meant to be minimal and easily reusable and extendable. +This repo supports Codespaces! +- Spin up a new instance by clicking on the green `"<> Code"` button followed by the `"Configure and create codespace"` option. Make sure to select the dev container config provided with this repo. This setups an environment with all the dependencies installed and ready to go. +- Once the codespace is fully running, you can install all the libraries you will need to run the notebooks under the `/notebooks` folder. Open up a terminal and simply run `conda create --name myenv --file spec-file.txt` to install all the Python libraries including PyTorch. +- Activate your environment `conda activate myenv`. *You might need to run `conda init zsh` or whatever shell you are using... and then close + reopen terminal.* +- Finally you can try out if everything is working by opening a notebook such as `/notebooks/bow.ipynb`. -Feel free to use them for educational and research purposes. +--- + +## Getting Started @@ -37,6 +43,22 @@ Feel free to use them for educational and research purposes. + + + + + + + + + + + + @@ -69,14 +91,57 @@ Feel free to use them for educational and research purposes. - - + + +
Counterfactual ExplanationsA basic tutorial to learn about counterfactual explanations for explainable AI + +
Linear Regression from ScratchAn implementation of linear regression from scratch using stochastic gradient descent + +
Logistic Regression from Scratch An implementation of logistic regression from scratch
Introduction to GNNs Introduction to Graph Neural Networks. Applies basic GCN to Cora dataset for node classification.
+ + +## NLP + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -86,20 +151,78 @@ Feel free to use them for educational and research purposes. +
NameDescriptionNotebook
Bag of Words Text ClassifierBuild a simple bag of words text classifier. + +
Continuous Bag of Words (CBOW) Text ClassifierBuild a continuous bag of words text classifier. + +
Deep Continuous Bag of Words (Deep CBOW) Text ClassifierBuild a deep continuous bag of words text classifier. + +
Text Data AugmentationAn introduction to the most commonly used data augmentation techniques for text and their implementation + +
Emotion Classification with Fine-tuned BERT
+ +## Transformers + + + + + + + + - + +
+ Kaggle + + + + + + + + + + + + + + + + + + + + + + +
NameDescriptionNotebook
Text Classification using Attention Mechanism and Positional EmbeddingsText Classification using Transformer An implementation of Attention Mechanism and Positional Embeddings on a text classification task -
Neural Machine Translation using TransformerAn implementation of Transformer to translate human readable dates in any format to YYYY-MM-DD format. + +
+ Kaggle
Feature Tokenizer TransformerAn implementation of Feature Tokenizer Transformer on a classification task + +
+ Kaggle
Named Entity Recognition using TransformerAn implementation of Transformer to perform token classification and identify species in PubMed abstracts + +
+ Kaggle
Extractive Question Answering using TransformerAn implementation of Transformer to perform extractive question answering + +
+ Kaggle
+ +## Computer Vision + + + + + + + + +
+ Kaggle @@ -107,11 +230,114 @@ Feel free to use them for educational and research purposes. +
+ Kaggle + + + + + -
NameDescriptionNotebook
Siamese Network An implementation of Siamese Network for finding Image Similarity -
An implementation of Variational Auto Encoder to generate Augmentations for MNIST Handwritten Digits -
Object Detection using Sliding Window and Image PyramidA basic object detection implementation using sliding window and image pyramid on top of an image classifier + +
+ Kaggle
+ + Object Detection using Selective Search + A basic object detection implementation using selective search on top of an image classifier + + +
+ Kaggle + + + + +## Generative Adversarial Network + + + + + + + + + + + + + + + + + + + + + + +
NameDescriptionNotebook
Deep Convolutional GANAn Implementation of Deep Convolutional GAN to generate MNIST digits + +
+ Kaggle
Wasserstein GAN with Gradient PenaltyAn Implementation of Wasserstein GAN with Gradient Penalty to generate MNIST digits + +
+ Kaggle
Conditional GANAn Implementation of Conditional GAN to generate MNIST digits + +
+ Kaggle
+ + ## Parameter Efficient Fine-tuning + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameDescriptionNotebook
LoRA BERTAn Implementation of BERT Finetuning using LoRA + Kaggle
LoRA BERT NERAn Implementation of BERT Finetuning using LoRA for token classification task + Kaggle
LoRA T5An Implementation of T5 Finetuning using LoRA + Kaggle
LoRA TinyLlama 1.1BAn Implementation of TinyLlama 1.1B Finetuning using LoRA + Kaggle
QLoRA TinyLlama 1.1BAn Implementation of TinyLlama 1.1B Finetuning using QLoRA + Kaggle
QLoRA Mistral 7BAn Implementation of Mistral 7B Finetuning using QLoRA + Kaggle
+ +--- + +If you find any bugs or have any questions regarding these notebooks, please open an issue. We will address it as soon as we can. Reach out on [Twitter](https://twitter.com/omarsar0) if you have any questions. diff --git a/img/bow.png b/img/bow.png new file mode 100644 index 0000000..7b17e99 Binary files /dev/null and b/img/bow.png differ diff --git a/img/cbow.png b/img/cbow.png new file mode 100644 index 0000000..021c4cd Binary files /dev/null and b/img/cbow.png differ diff --git a/img/deep_cbow.png b/img/deep_cbow.png new file mode 100644 index 0000000..acd04dd Binary files /dev/null and b/img/deep_cbow.png differ diff --git a/notebooks/bow-dataloader.ipynb b/notebooks/bow-dataloader.ipynb new file mode 100644 index 0000000..3965fa7 --- /dev/null +++ b/notebooks/bow-dataloader.ipynb @@ -0,0 +1,326 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bag of Words Text Classifier\n", + "\n", + "The code below implements a simple bag of words text classifier.\n", + "- We tokenize the text, create a vocabulary and encode each piece of text in the dataset\n", + "- The lookup allows for extracting embeddings for each tokenized inputs\n", + "- The embedding vectors are added together with a bias vector\n", + "- The resulting vector is referred to as the scores\n", + "- The score are applied a softmax to generate probabilities which are used for the classification task\n", + "\n", + "The code used in this notebook was inspired by code from the [official repo](https://github.com/neubig/nn4nlp-code) used in the [CMU Neural Networks for NLP class](http://www.phontron.com/class/nn4nlp2021/schedule.html) by [Graham Neubig](http://www.phontron.com/index.php). \n", + "\n", + "We are also adding a PyTorch data loader to this notebook which is how it differs from `bow.ipynb`.\n", + "\n", + "![img txt](../img/bow.png?raw=true)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import random\n", + "import torch.nn as nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "\n", + "# download the files\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/dev.txt\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/test.txt\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/train.txt\n", + "\n", + "# create the data folders\n", + "!mkdir data data/classes\n", + "!cp dev.txt data/classes\n", + "!cp test.txt data/classes\n", + "!cp train.txt data/classes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [], + "source": [ + "# function to read in data, process each line and split columns by \" ||| \"\n", + "def read_data(filename):\n", + " data = []\n", + " with open(filename, 'r') as f:\n", + " for line in f:\n", + " line = line.lower().strip()\n", + " line = line.split(' ||| ')\n", + " data.append(line)\n", + " return data\n", + "\n", + "train_data = read_data('data/classes/train.txt')\n", + "test_data = read_data('data/classes/test.txt')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Construct the Vocab and Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [], + "source": [ + "# creating the word and tag indices\n", + "word_to_index = {}\n", + "word_to_index[\"\"] = len(word_to_index) # adds to dictionary\n", + "tag_to_index = {}\n", + "\n", + "# create word to index dictionary and tag to index dictionary from data\n", + "def create_dict(data, check_unk=False):\n", + " for line in data:\n", + " for word in line[1].split(\" \"):\n", + " if check_unk == False:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = len(word_to_index)\n", + " else:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = word_to_index[\"\"]\n", + "\n", + " if line[0] not in tag_to_index:\n", + " tag_to_index[line[0]] = len(tag_to_index)\n", + "\n", + "create_dict(train_data)\n", + "create_dict(test_data, check_unk=True)\n", + "\n", + "# create word and tag tensors from data\n", + "def create_tensor(data):\n", + " for line in data:\n", + " yield [[word_to_index[word] for word in line[1].split(\" \")], tag_to_index[line[0]]]\n", + "\n", + "train_data = [*create_tensor(train_data)]\n", + "test_data = [*create_tensor(test_data)]\n", + "\n", + "number_of_words = len(word_to_index)\n", + "number_of_tags = len(tag_to_index)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Convert data to PyTorch Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [], + "source": [ + "from torch.utils.data import DataLoader\n", + "from torch.utils.data import Dataset\n", + "\n", + "# load data into a dataset and dataloader; ensure that the data is split X, y\n", + "class TextDataset(Dataset):\n", + " def __init__(self, data):\n", + " self.data = data\n", + "\n", + " def __len__(self):\n", + " return len(self.data)\n", + "\n", + " def __getitem__(self, idx):\n", + " return torch.as_tensor(self.data[idx][0]), torch.as_tensor(self.data[idx][1])\n", + "\n", + "train_dataset = TextDataset(train_data)\n", + "test_dataset = TextDataset(test_data)\n", + "\n", + "train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)\n", + "test_loader = DataLoader(test_dataset, batch_size=1, shuffle=True)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "# cpu or gpu\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "\n", + "# create a simple neural network with embedding layer, bias, and xavier initialization\n", + "class BoW(torch.nn.Module):\n", + " def __init__(self, nwords, ntags):\n", + " super(BoW, self).__init__()\n", + " self.embedding = nn.Embedding(nwords, ntags)\n", + " nn.init.xavier_uniform_(self.embedding.weight)\n", + "\n", + " type = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor\n", + " self.bias = torch.zeros(ntags, requires_grad=True).type(type)\n", + "\n", + " def forward(self, x):\n", + " emb = self.embedding(x) # seq_len x ntags (for each seq) \n", + " out = torch.sum(emb, dim=0) + self.bias # ntags\n", + " out = out.view(1, -1) # reshape to (1, ntags)\n", + " return out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ITER: 1 | train loss/sent: 1.4731 | train accuracy: 0.3668 | test accuracy: 0.3778\n", + "ITER: 2 | train loss/sent: 1.1223 | train accuracy: 0.6056 | test accuracy: 0.4118\n", + "ITER: 3 | train loss/sent: 0.9106 | train accuracy: 0.7155 | test accuracy: 0.4186\n", + "ITER: 4 | train loss/sent: 0.7685 | train accuracy: 0.7687 | test accuracy: 0.4032\n", + "ITER: 5 | train loss/sent: 0.6635 | train accuracy: 0.8070 | test accuracy: 0.4054\n", + "ITER: 6 | train loss/sent: 0.5814 | train accuracy: 0.8346 | test accuracy: 0.4113\n", + "ITER: 7 | train loss/sent: 0.5157 | train accuracy: 0.8558 | test accuracy: 0.3991\n", + "ITER: 8 | train loss/sent: 0.4631 | train accuracy: 0.8722 | test accuracy: 0.3946\n", + "ITER: 9 | train loss/sent: 0.4183 | train accuracy: 0.8839 | test accuracy: 0.4014\n", + "ITER: 10 | train loss/sent: 0.3807 | train accuracy: 0.8969 | test accuracy: 0.3928\n" + ] + } + ], + "source": [ + "# train and test the BoW model\n", + "model = BoW(number_of_words, number_of_tags).to(device)\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(model.parameters())\n", + "type = torch.LongTensor\n", + "\n", + "if torch.cuda.is_available():\n", + " model.to(device)\n", + " type = torch.cuda.LongTensor\n", + "\n", + "# perform training of the Bow model\n", + "def train_bow(model, optimizer, criterion, train_data):\n", + " for ITER in range(10):\n", + " # perform training\n", + " model.train()\n", + " total_loss = 0.0\n", + " train_correct = 0\n", + " for batch, (sentence, tag) in enumerate(train_loader):\n", + " sentence = sentence[0].to(device)\n", + " tag = tag.to(device)\n", + "\n", + " output = model(sentence)\n", + " predicted = torch.argmax(output.data.detach()).item()\n", + " \n", + " loss = criterion(output, tag)\n", + " total_loss += loss.item()\n", + "\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if predicted == tag: train_correct+=1\n", + "\n", + " # perform testing of the model\n", + " model.eval()\n", + " test_correct = 0\n", + " for batch, (sentence, tag) in enumerate(test_loader):\n", + " sentence = sentence[0].to(device)\n", + " output = model(sentence)\n", + " predicted = torch.argmax(output.data.detach()).item()\n", + " if predicted == tag: test_correct += 1\n", + " \n", + " # print model performance results\n", + " log = f'ITER: {ITER+1} | ' \\\n", + " f'train loss/sent: {total_loss/len(train_data):.4f} | ' \\\n", + " f'train accuracy: {train_correct/len(train_data):.4f} | ' \\\n", + " f'test accuracy: {test_correct/len(test_data):.4f}'\n", + " print(log)\n", + "\n", + "# call the train_bow function\n", + "train_bow(model, optimizer, criterion, train_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exercises\n", + "\n", + "To keep on practising, you can try the following exercises:\n", + "\n", + "- Try to use different batch sizes and see how it affects the training.\n", + "- Try to use [`torchtext`](https://pytorch.org/text/stable/index.html#) to load other datasets and create tokenizer and vocabularies for them. This [example](https://pytorch.org/tutorials/beginner/transformer_tutorial.html) on Transformer could be useful to help guide you.\n", + "- Write a mini Python library easily help you train and evaluate models. You can use the code from this notebook as a starting point." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nlp", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "154abf72fb8cc0db1aa0e7366557ff891bff86d6d75b7e5f2e68a066d591bfd7" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/bow.ipynb b/notebooks/bow.ipynb new file mode 100644 index 0000000..5a85f63 --- /dev/null +++ b/notebooks/bow.ipynb @@ -0,0 +1,313 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Bag of Words Text Classifier\n", + "\n", + "The code below implements a simple bag of words text classifier.\n", + "- We tokenize the text, create a vocabulary and encode each piece of text in the dataset\n", + "- The lookup allows for extracting embeddings for each tokenized inputs\n", + "- The embedding vectors are added together with a bias vector\n", + "- The resulting vector is referred to as the scores\n", + "- The score are applied a softmax to generate probabilities which are used for the classification task\n", + "\n", + "The code used in this notebook was inspired by code from the [official repo](https://github.com/neubig/nn4nlp-code) used in the [CMU Neural Networks for NLP class](http://www.phontron.com/class/nn4nlp2021/schedule.html) by [Graham Neubig](http://www.phontron.com/index.php). \n", + "\n", + "![img txt](../img/bow.png?raw=true)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import random\n", + "import torch.nn as nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "\n", + "# download the files\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/dev.txt\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/test.txt\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/train.txt\n", + "\n", + "# create the data folders\n", + "!mkdir data data/classes\n", + "!cp dev.txt data/classes\n", + "!cp test.txt data/classes\n", + "!cp train.txt data/classes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Read the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# function to read in data, process each line and split columns by \" ||| \"\n", + "def read_data(filename):\n", + " data = []\n", + " with open(filename, 'r') as f:\n", + " for line in f:\n", + " line = line.lower().strip()\n", + " line = line.split(' ||| ')\n", + " data.append(line)\n", + " return data\n", + "\n", + "train_data = read_data('data/classes/train.txt')\n", + "test_data = read_data('data/classes/test.txt')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Construct the Vocab and Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# creating the word and tag indices\n", + "word_to_index = {}\n", + "word_to_index[\"\"] = len(word_to_index) # adds to dictionary\n", + "tag_to_index = {}\n", + "\n", + "# create word to index dictionary and tag to index dictionary from data\n", + "def create_dict(data, check_unk=False):\n", + " for line in data:\n", + " for word in line[1].split(\" \"):\n", + " if check_unk == False:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = len(word_to_index)\n", + " else:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = word_to_index[\"\"]\n", + "\n", + " if line[0] not in tag_to_index:\n", + " tag_to_index[line[0]] = len(tag_to_index)\n", + "\n", + "create_dict(train_data)\n", + "create_dict(test_data, check_unk=True)\n", + "\n", + "# create word and tag tensors from data\n", + "def create_tensor(data):\n", + " for line in data:\n", + " yield([word_to_index[word] for word in line[1].split(\" \")], tag_to_index[line[0]])\n", + "\n", + "train_data = list(create_tensor(train_data))\n", + "test_data = list(create_tensor(test_data))\n", + "\n", + "number_of_words = len(word_to_index)\n", + "number_of_tags = len(tag_to_index)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "# cpu or gpu\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "\n", + "# create a simple neural network with embedding layer, bias, and xavier initialization\n", + "class BoW(torch.nn.Module):\n", + " def __init__(self, nwords, ntags):\n", + " super(BoW, self).__init__()\n", + " self.embedding = nn.Embedding(nwords, ntags)\n", + " nn.init.xavier_uniform_(self.embedding.weight)\n", + "\n", + " type = torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor\n", + " self.bias = torch.zeros(ntags, requires_grad=True).type(type)\n", + "\n", + " def forward(self, x):\n", + " emb = self.embedding(x) # seq_len x ntags (for each seq) \n", + " out = torch.sum(emb, dim=0) + self.bias # ntags\n", + " out = out.view(1, -1) # reshape to (1, ntags)\n", + " return out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pretest the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([[ 0.0124, 0.0164, -0.0182, -0.0014, -0.0120]], device='cuda:0',\n", + " grad_fn=)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# function to convert sentence into tensor using word_to_index dictionary\n", + "def sentence_to_tensor(sentence):\n", + " return torch.LongTensor([word_to_index[word] for word in sentence.split(\" \")])\n", + "\n", + "# test the sentence_to_tensor function\n", + "type = torch.cuda.LongTensor if torch.cuda.is_available() else torch.LongTensor\n", + "out = sentence_to_tensor(\"i love dogs\").type(type)\n", + "test_model = BoW(number_of_words, number_of_tags).to(device)\n", + "test_model(out)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ITER: 1 | train loss/sent: 1.4733 | train accuracy: 0.3631 | test accuracy: 0.4009\n", + "ITER: 2 | train loss/sent: 1.1216 | train accuracy: 0.6040 | test accuracy: 0.4118\n", + "ITER: 3 | train loss/sent: 0.9123 | train accuracy: 0.7117 | test accuracy: 0.4154\n", + "ITER: 4 | train loss/sent: 0.7688 | train accuracy: 0.7664 | test accuracy: 0.4140\n", + "ITER: 5 | train loss/sent: 0.6631 | train accuracy: 0.8065 | test accuracy: 0.4068\n", + "ITER: 6 | train loss/sent: 0.5814 | train accuracy: 0.8324 | test accuracy: 0.4059\n", + "ITER: 7 | train loss/sent: 0.5171 | train accuracy: 0.8507 | test accuracy: 0.4077\n", + "ITER: 8 | train loss/sent: 0.4640 | train accuracy: 0.8695 | test accuracy: 0.4036\n", + "ITER: 9 | train loss/sent: 0.4191 | train accuracy: 0.8830 | test accuracy: 0.3991\n", + "ITER: 10 | train loss/sent: 0.3818 | train accuracy: 0.8929 | test accuracy: 0.3964\n" + ] + } + ], + "source": [ + "# train and test the BoW model\n", + "model = BoW(number_of_words, number_of_tags).to(device)\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(model.parameters())\n", + "type = torch.LongTensor\n", + "\n", + "if torch.cuda.is_available():\n", + " model.to(device)\n", + " type = torch.cuda.LongTensor\n", + "\n", + "# perform training of the Bow model\n", + "def train_bow(model, optimizer, criterion, train_data):\n", + " for ITER in range(10):\n", + " # perform training\n", + " model.train()\n", + " random.shuffle(train_data)\n", + " total_loss = 0.0\n", + " train_correct = 0\n", + " for sentence, tag in train_data:\n", + " sentence = torch.tensor(sentence).type(type)\n", + " tag = torch.tensor([tag]).type(type)\n", + " output = model(sentence)\n", + " predicted = torch.argmax(output.data.detach()).item()\n", + " \n", + " loss = criterion(output, tag)\n", + " total_loss += loss.item()\n", + "\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if predicted == tag: train_correct+=1\n", + "\n", + " # perform testing of the model\n", + " model.eval()\n", + " test_correct = 0\n", + " for sentence, tag in test_data:\n", + " sentence = torch.tensor(sentence).type(type)\n", + " output = model(sentence)\n", + " predicted = torch.argmax(output.data.detach()).item()\n", + " if predicted == tag: test_correct += 1\n", + " \n", + " # print model performance results\n", + " log = f'ITER: {ITER+1} | ' \\\n", + " f'train loss/sent: {total_loss/len(train_data):.4f} | ' \\\n", + " f'train accuracy: {train_correct/len(train_data):.4f} | ' \\\n", + " f'test accuracy: {test_correct/len(test_data):.4f}'\n", + " print(log)\n", + "\n", + "# call the train_bow function\n", + "train_bow(model, optimizer, criterion, train_data)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nlp", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "154abf72fb8cc0db1aa0e7366557ff891bff86d6d75b7e5f2e68a066d591bfd7" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/cbow.ipynb b/notebooks/cbow.ipynb new file mode 100644 index 0000000..85d382f --- /dev/null +++ b/notebooks/cbow.ipynb @@ -0,0 +1,276 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Continuous Bag of Words (CBOW) Text Classifier\n", + "\n", + "The code below implements a continuous bag of words text classifier.\n", + "- We tokenize the text, create a vocabulary and encode each piece of text in the dataset\n", + "- The lookup allows for extracting embeddings for each tokenized input\n", + "- The embedding vectors are added together\n", + "- The resulting vector is multiplied with a weight matrix, which is then added a bias vector; this results in scores\n", + "- The scores are applied a softmax to generate probabilities which are used for the final classification\n", + "\n", + "The code used in this notebook was inspired by code from the [official repo](https://github.com/neubig/nn4nlp-code) used in the [CMU Neural Networks for NLP class](http://www.phontron.com/class/nn4nlp2021/schedule.html) by [Graham Neubig](http://www.phontron.com/index.php). \n", + "\n", + "![img txt](../img/cbow.png?raw=true)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import random\n", + "import torch.nn as nn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "% % capture\n", + "''' uncomment to download the data\n", + "# download the files\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/dev.txt\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/test.txt\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/train.txt\n", + "\n", + "# create the data folders\n", + "!mkdir data data/classes\n", + "!cp dev.txt data/classes\n", + "!cp test.txt data/classes\n", + "!cp train.txt data/classes\n", + "'''" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read and Process Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# function to read in data, process each line and split columns by \" ||| \"\n", + "def read_data(filename):\n", + " data = []\n", + " with open(filename, 'r') as f:\n", + " for line in f:\n", + " line = line.lower().strip()\n", + " line = line.split(' ||| ')\n", + " data.append(line)\n", + " return data\n", + "\n", + "\n", + "train_data = read_data('data/classes/train.txt')\n", + "test_data = read_data('data/classes/test.txt')\n", + "\n", + "# creating the word and tag indices\n", + "word_to_index = {}\n", + "word_to_index[\"\"] = len(word_to_index) # add to dictionary\n", + "tag_to_index = {}\n", + "\n", + "# create word to index dictionary and tag to index dictionary from data\n", + "def create_dict(data, check_unk=False):\n", + " for line in data:\n", + " for word in line[1].split(\" \"):\n", + " if check_unk == False:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = len(word_to_index)\n", + " else:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = word_to_index[\"\"]\n", + "\n", + " if line[0] not in tag_to_index:\n", + " tag_to_index[line[0]] = len(tag_to_index)\n", + "\n", + "\n", + "create_dict(train_data)\n", + "create_dict(test_data, check_unk=True)\n", + "\n", + "# create word and tag tensors from data\n", + "def create_tensor(data):\n", + " for line in data:\n", + " yield ([word_to_index[word] for word in line[1].split(\" \")], tag_to_index[line[0]])\n", + "\n", + "\n", + "train_data = list(create_tensor(train_data))\n", + "test_data = list(create_tensor(test_data))\n", + "\n", + "number_of_words = len(word_to_index)\n", + "number_of_tags = len(tag_to_index)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'torch' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_3236/4002993260.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# cpu or gpu\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdevice\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"cuda\"\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcuda\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_available\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m\"cpu\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# create a simple neural network with embedding layer, bias, and xavier initialization\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mclass\u001b[0m \u001b[0mCBoW\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mModule\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'torch' is not defined" + ] + } + ], + "source": [ + "# cpu or gpu\n", + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "\n", + "# create a simple neural network with embedding layer, bias, and xavier initialization\n", + "class CBoW(torch.nn.Module):\n", + " def __init__(self, nwords, ntags, emb_size):\n", + " super(CBoW, self).__init__()\n", + "\n", + " # layers\n", + " self.embedding = torch.nn.Embedding(nwords, emb_size)\n", + " self.linear = torch.nn.Linear(emb_size, ntags)\n", + "\n", + " # use xavier initialization for weights\n", + " nn.init.xavier_uniform_(self.embedding.weight)\n", + " nn.init.xavier_uniform_(self.linear.weight)\n", + "\n", + " def forward(self, x):\n", + " emb = self.embedding(x) # seq x emb_size\n", + " out = torch.sum(emb, dim=0) # emb_size\n", + " out = out.view(1, -1) # reshape to (1, emb_size)\n", + " out = self.linear(out) # 1 x ntags\n", + " return out\n", + "\n", + "\n", + "EMB_SIZE = 64\n", + "model = CBoW(number_of_words, number_of_tags, EMB_SIZE)\n", + "criterion = torch.nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(model.parameters())\n", + "type = torch.LongTensor\n", + "\n", + "if torch.cuda.is_available():\n", + " model.to(device)\n", + " type = torch.cuda.LongTensor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch: 1 | train loss/sent: 1.4111 | train accuracy: 0.3841 | test accuracy: 0.3982\n", + "epoch: 2 | train loss/sent: 0.8886 | train accuracy: 0.6522 | test accuracy: 0.3991\n", + "epoch: 3 | train loss/sent: 0.5147 | train accuracy: 0.8148 | test accuracy: 0.3950\n", + "epoch: 4 | train loss/sent: 0.3200 | train accuracy: 0.8878 | test accuracy: 0.3796\n", + "epoch: 5 | train loss/sent: 0.2148 | train accuracy: 0.9247 | test accuracy: 0.3738\n", + "epoch: 6 | train loss/sent: 0.1536 | train accuracy: 0.9466 | test accuracy: 0.3783\n", + "epoch: 7 | train loss/sent: 0.1097 | train accuracy: 0.9618 | test accuracy: 0.3638\n", + "epoch: 8 | train loss/sent: 0.0797 | train accuracy: 0.9716 | test accuracy: 0.3692\n", + "epoch: 9 | train loss/sent: 0.0568 | train accuracy: 0.9805 | test accuracy: 0.3661\n", + "epoch: 10 | train loss/sent: 0.0435 | train accuracy: 0.9853 | test accuracy: 0.3529\n" + ] + } + ], + "source": [ + "# perform training of the Bow model\n", + "\n", + "for epoch in range(10):\n", + " # perform training\n", + " model.train()\n", + " random.shuffle(train_data)\n", + " total_loss = 0.0\n", + " train_correct = 0\n", + " for sentence, tag in train_data:\n", + " sentence = torch.tensor(sentence).type(type)\n", + " tag = torch.tensor([tag]).type(type)\n", + " output = model(sentence)\n", + " predicted = torch.argmax(output.data.detach()).item()\n", + "\n", + " loss = criterion(output, tag)\n", + " total_loss += loss.item()\n", + "\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if predicted == tag:\n", + " train_correct += 1\n", + "\n", + " # perform testing of the model\n", + " model.eval()\n", + " test_correct = 0\n", + " for sentence, tag in test_data:\n", + " sentence = torch.tensor(sentence).type(type)\n", + " output = model(sentence)\n", + " predicted = torch.argmax(output.data.detach()).item()\n", + " if predicted == tag:\n", + " test_correct += 1\n", + "\n", + " # print model performance results\n", + " log = f'epoch: {epoch+1} | ' \\\n", + " f'train loss/sent: {total_loss/len(train_data):.4f} | ' \\\n", + " f'train accuracy: {train_correct/len(train_data):.4f} | ' \\\n", + " f'test accuracy: {test_correct/len(test_data):.4f}'\n", + " print(log)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/comp_graphs.ipynb b/notebooks/comp_graphs.ipynb new file mode 100644 index 0000000..f82c1df --- /dev/null +++ b/notebooks/comp_graphs.ipynb @@ -0,0 +1,326 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction to Computational Graphs with PyTorch\n", + "\n", + "by [Elvis Saravia](https://twitter.com/omarsar0)\n", + "\n", + "\n", + "In this notebook we provide a short introduction and overview of computational graphs using PyTorch.\n", + "\n", + "There are several materials online that cover theory on the topic of computational graphs. However, I think it's much easier to learn the concept using code. I attempt to bridge the gap here which should be useful for beginner students. \n", + "\n", + "Inspired by Olah's article [\"Calculus on Computational Graphs: Backpropagation\"](https://colah.github.io/posts/2015-08-Backprop/), I've put together a few code snippets to get you started with computationsl graphs with PyTorch. This notebook should complement that article, so refer to it for more comprehensive explanations. In fact, I've tried to simplify the explanations and refer to them here." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Why Computational Graphs?\n", + "\n", + "When talking about neural networks in any context, [backpropagation](https://en.wikipedia.org/wiki/Backpropagation) is an important topic to understand because it is the algorithm used for training deep neural networks. \n", + "\n", + "Backpropagation is used to calculate derivatives which is what you need to keep optimizing parameters of the model and allowing the model to learn on the task at hand. \n", + "\n", + "Many of the deep learning frameworks today like PyTorch does the backpropagation out-of-the-box using [**automatic differentiation**](https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html). \n", + "\n", + "To better understand how this is done it's important to talk about **computational graphs** which defines the flow of computations that are carried out throughout the network. Along the way we will use `torch.autograd` to demonstrate in code how this works. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Getting Started\n", + "\n", + "Inspired by Olah's article on computational graphs, let's look at the following expression $e = (a + b) * (b+1)$. It helps to break it down to the following operations:\n", + "\n", + "$$\n", + "\\begin{aligned}&c=a+b \\\\&d=b+1 \\\\&e=c * d\\end{aligned}\n", + "$$\n", + "\n", + "This is not a neural network of any sort. We are just going through a very simple example of a chain of operations which you can be represented with computational graphs. \n", + "\n", + "Let's visualize these operations using a computational graph. Computational graphs contain **nodes** which can represent and input (tensor, matrix, vector, scalar) or **operation** that can be the input to another node. The nodes are connected by **edges**, which represent a function argument, they are pointers to nodes. Note that the computation graphs are directed and acyclic. The computational graph for our example looks as follows:\n", + "\n", + "![](https://colah.github.io/posts/2015-08-Backprop/img/tree-def.png)\n", + "\n", + "*Source: Christopher Olah (2015)*\n", + "\n", + "We can evaluate the expression by setting our input variables as follows: $a=2$ and $b=1$. This will allow us to compute nodes up through the graph as shown in the computational graph above. \n", + "\n", + "Rather than doing this by hand, we can use the automatic differentation engine provided by PyTorch. \n", + "\n", + "Let's import PyTorch first:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define the inputs like this:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "a = torch.tensor([2.], requires_grad=True)\n", + "b = torch.tensor([1.], requires_grad=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that we used `requires_grad=True` to tell the autograd engine that every operation on them should be tracked. \n", + "\n", + "These are the operations in code:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "c = a + b\n", + "d = b + 1\n", + "e = c * d\n", + "\n", + "# grads populated for non-leaf nodes\n", + "c.retain_grad()\n", + "d.retain_grad()\n", + "e.retain_grad()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that we used `.retain_grad()` to allow gradients to be stored for non-leaf nodes as we are interested in inpecting those as well.\n", + "\n", + "Now that we have our computational graph, we can check the result when evaluating the expression:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([6.], grad_fn=)\n" + ] + } + ], + "source": [ + "print(e)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The output is a tensor with the value of `6.`, which verifies the results here: \n", + "\n", + "![](https://colah.github.io/posts/2015-08-Backprop/img/tree-eval.png)\n", + "*Source: Christopher Olah (2015)*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Derivatives on Computational Graphs\n", + "\n", + "Using the concept of computational graphs we are now interested in evaluating the **partial derivatives** of the edges of the graph. This will help in gathering the gradients of the graph. Remember that gradients are what we use to train the neural network and those calculations can be taken care of by the automatic differentation engine. \n", + "\n", + "The intuition is: we want to know, for example, if $a$ directly affects $c$, how does it affect it. In other words, if we change $a$ a little, how does $c$ change. This is referred to as the partial derivative of $c$ with respect to $a$.\n", + "\n", + "You can work this by hand, but the easy way to do this with PyTorch is by calling `.backward()` on $e$ and let the engine figure out the values. The `.backward()` signals the autograd engine to calculate the gradients and store them in the respective tensors’ `.grad` attribute.\n", + "\n", + "Let's do that now:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "e.backward()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now, let’s say we are interested in the derivative of $e$ with respect to $a$, how do we obtain this? In other words, we are looking for $\\frac{\\partial e}{\\partial a}$.\n", + "\n", + "Using PyTorch, we can do this by calling `a.grad`:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([2.])\n" + ] + } + ], + "source": [ + "print(a.grad)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is important to understand the intuition behind this. Olah puts it best:\n", + "\n", + ">Let’s consider how $e$ is affected by $a$. If we change $a$ at a speed of 1, $c$ also changes at a speed of $1$. In turn, $c$ changing at a speed of $1$ causes $e$ to change at a speed of $2$. So $e$ changes at a rate of $1*2$ with respect to $a$.\n", + "\n", + "In other words, by hand this would be:\n", + "\n", + "$$\n", + "\\frac{\\partial e}{\\partial \\boldsymbol{a}}=\\frac{\\partial e}{\\partial \\boldsymbol{c}} \\frac{\\partial \\boldsymbol{c}}{\\partial \\boldsymbol{a}} = 2 * 1\n", + "$$\n", + "\n", + "You can verify that this is correct by checking the manual calculations by Olah. Since $a$ is not directly connectected to $e$, we can use some special rule which allows to sum over all paths from one node to the other in the computational graph and mulitplying the derivatives on each edge of the path together.\n", + "\n", + "![](https://colah.github.io/posts/2015-08-Backprop/img/tree-eval-derivs.png)\n", + "*Source: Christopher Olah (2015)*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To check that this holds, let look at another example. How about caluclating the derivative of $e$ with respect to $b$, i.e., $\\frac{\\partial e}{\\partial b}$?\n", + "\n", + "We can get that through `b.grad`:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([5.])\n" + ] + } + ], + "source": [ + "print(b.grad)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If you work it out by hand, you are basically doing the following:\n", + "\n", + "$$\n", + "\\frac{\\partial e}{\\partial b}=1 * 2+1 * 3\n", + "$$\n", + "\n", + "It indicates how $b$ affects $e$ through $c$ and $d$. We are essentially summing over paths in the computational graph.\n", + "\n", + "Here are all the gradients collected, including non-leaf nodes:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([2.]) tensor([5.]) tensor([2.]) tensor([3.]) tensor([1.])\n" + ] + } + ], + "source": [ + "print(a.grad, b.grad, c.grad, d.grad, e.grad)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can use the computational graph above to verify that everything is correct. This is the power of computational graphs and how they are used by automatic differentation engines. It's also a very useful concept to understand when developing neural networks architectures and their correctness.\n", + "\n", + "### Next Steps\n", + "\n", + "In this notebook, I've provided a simple and intuitive explanation to the concept of computational graphs using PyTorch. I highly recommend to go through [Olah's article](https://colah.github.io/posts/2015-08-Backprop/) for more on the topic.\n", + "\n", + "In the next tutorial, I will be applying the concept of computational graphs to more advanced operations you typically see in a neural network. In fact, if you are interested in this, and you are feeling comfortable with the topic now, you can check out these two PyTorch tutorials:\n", + "\n", + "- [A gentle introduction to `torch.autograd`](https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html)\n", + "- [Automatic differentation with `torch.autograd`](https://pytorch.org/tutorials/beginner/basics/autogradqs_tutorial.html)\n", + "\n", + "And here are some other useful references used to put this article together:\n", + "\n", + "- [Hacker's guide to Neural Networks\n", + "](http://karpathy.github.io/neuralnets/)\n", + "- [Backpropagation calculus](https://www.youtube.com/watch?v=tIeHLnjs5U8&ab_channel=3Blue1Brown)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('nlp')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "154abf72fb8cc0db1aa0e7366557ff891bff86d6d75b7e5f2e68a066d591bfd7" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/concise_log_reg.ipynb b/notebooks/concise_log_reg.ipynb new file mode 100644 index 0000000..fa89bcf --- /dev/null +++ b/notebooks/concise_log_reg.ipynb @@ -0,0 +1,243 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Concise Logistic Regression for Image Classification\n", + "\n", + "- Shows a concise implementation of logistic regression for image classification\n", + "- Uses PyTorch" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# imports\n", + "import torch\n", + "import torchvision\n", + "import torch.nn as nn\n", + "from torchvision import datasets, models, transforms\n", + "import os\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "%matplotlib inline\n", + "\n", + "# use gpu if available\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# download the data (uncomment if to download the data locally)\n", + "#!wget https://download.pytorch.org/tutorial/hymenoptera_data.zip\n", + "#!unzip hymenoptera_data.zip" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# create data loaders\n", + "\n", + "data_dir = 'hymenoptera_data'\n", + "\n", + "# custom transformer to flatten the image tensors\n", + "class ReshapeTransform:\n", + " def __init__(self, new_size):\n", + " self.new_size = new_size\n", + "\n", + " def __call__(self, img):\n", + " result = torch.reshape(img, self.new_size)\n", + " return result\n", + "\n", + "# transformations used to standardize and normalize the datasets\n", + "data_transforms = {\n", + " 'train': transforms.Compose([\n", + " transforms.Resize(224),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " ReshapeTransform((-1,)) # flattens the data\n", + " ]),\n", + " 'val': transforms.Compose([\n", + " transforms.Resize(224),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " ReshapeTransform((-1,)) # flattens the data\n", + " ]),\n", + "}\n", + "\n", + "# load the correspoding folders\n", + "image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),\n", + " data_transforms[x])\n", + " for x in ['train', 'val']}\n", + "\n", + "# load the entire dataset; we are not using minibatches here\n", + "train_dataset = torch.utils.data.DataLoader(image_datasets['train'],\n", + " batch_size=len(image_datasets['train']),\n", + " shuffle=True)\n", + "\n", + "test_dataset = torch.utils.data.DataLoader(image_datasets['val'],\n", + " batch_size=len(image_datasets['val']),\n", + " shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# build the LR model\n", + "class LR(nn.Module):\n", + " def __init__(self, dim):\n", + " super(LR, self).__init__()\n", + " self.linear = nn.Linear(dim, 1)\n", + " nn.init.zeros_(self.linear.weight)\n", + " nn.init.zeros_(self.linear.bias)\n", + "\n", + " def forward(self, x):\n", + " x = self.linear(x)\n", + " x = torch.sigmoid(x)\n", + " return x " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# predict function\n", + "def predict(yhat, y):\n", + " yhat = yhat.squeeze()\n", + " y = y.unsqueeze(0) \n", + " y_prediction = torch.zeros(y.size()[1])\n", + " for i in range(yhat.shape[0]):\n", + " if yhat[i] <= 0.5:\n", + " y_prediction[i] = 0\n", + " else:\n", + " y_prediction[i] = 1\n", + " return 100 - torch.mean(torch.abs(y_prediction - y)) * 100" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# model config\n", + "dim = train_dataset.dataset[0][0].shape[0]\n", + "\n", + "lrmodel = LR(dim).to(device)\n", + "criterion = nn.BCELoss()\n", + "optimizer = torch.optim.SGD(lrmodel.parameters(), lr=0.0001)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost after iteration 0: 0.6931471228599548 | Train Acc: 50.40983581542969 | Test Acc: 45.75163269042969\n", + "Cost after iteration 10: 0.6691471338272095 | Train Acc: 64.3442611694336 | Test Acc: 54.24836730957031\n", + "Cost after iteration 20: 0.6513182520866394 | Train Acc: 68.44261932373047 | Test Acc: 54.24836730957031\n", + "Cost after iteration 30: 0.6367825269699097 | Train Acc: 68.03278350830078 | Test Acc: 54.24836730957031\n", + "Cost after iteration 40: 0.6245337128639221 | Train Acc: 69.67213439941406 | Test Acc: 54.90196228027344\n", + "Cost after iteration 50: 0.6139225363731384 | Train Acc: 70.90164184570312 | Test Acc: 56.20914840698242\n", + "Cost after iteration 60: 0.6045235395431519 | Train Acc: 72.54098510742188 | Test Acc: 56.86274337768555\n", + "Cost after iteration 70: 0.5960512161254883 | Train Acc: 74.18032836914062 | Test Acc: 57.51633834838867\n", + "Cost after iteration 80: 0.5883084535598755 | Train Acc: 73.77049255371094 | Test Acc: 57.51633834838867\n", + "Cost after iteration 90: 0.5811557769775391 | Train Acc: 74.59016418457031 | Test Acc: 58.1699333190918\n" + ] + } + ], + "source": [ + "# training the model\n", + "costs = []\n", + "\n", + "for ITER in range(100):\n", + " lrmodel.train()\n", + " x, y = next(iter(train_dataset))\n", + " test_x, test_y = next(iter(test_dataset))\n", + "\n", + " # forward\n", + " yhat = lrmodel.forward(x.to(device))\n", + "\n", + " cost = criterion(yhat.squeeze(), y.type(torch.FloatTensor).to(device))\n", + " train_pred = predict(yhat, y)\n", + "\n", + " # backward\n", + " optimizer.zero_grad()\n", + " cost.backward()\n", + " optimizer.step()\n", + " \n", + " # evaluate\n", + " lrmodel.eval()\n", + " with torch.no_grad():\n", + " yhat_test = lrmodel.forward(test_x.to(device))\n", + " test_pred = predict(yhat_test, test_y)\n", + "\n", + " if ITER % 10 == 0:\n", + " costs.append(cost)\n", + "\n", + " if ITER % 10 == 0:\n", + " print(\"Cost after iteration {}: {} | Train Acc: {} | Test Acc: {}\".format(ITER, \n", + " cost, \n", + " train_pred,\n", + " test_pred))\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### References\n", + "- [A Logistic Regression Model from Scratch](https://colab.research.google.com/drive/1iBoJ0kngkOthy7SgVaVQA1aHEROt5mra?usp=sharing)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('play')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "cf9800998463bc980d70cdbacff0c7e9a10687346dc898569e92f016d6e252c9" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/deep_cbow.ipynb b/notebooks/deep_cbow.ipynb new file mode 100644 index 0000000..02a63aa --- /dev/null +++ b/notebooks/deep_cbow.ipynb @@ -0,0 +1,291 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Deep Continuous Bag of Words (Deep CBOW) Text Classifier\n", + "\n", + "The code below implements a continuous bag of words text classifier.\n", + "- We tokenize the text, create a vocabulary and encode each piece of text in the dataset\n", + "- We create embeddings for inputs and sum them together\n", + "- The resulting vector is fed to hidden neural network, which generates a new vector that is multiplied to a weights matrix\n", + "- We then add the bias and obtain scores\n", + "- The scores are applied a softmax to generate probabilities which are used for the final classification\n", + "\n", + "The code used in this notebook was inspired by code from the [official repo](https://github.com/neubig/nn4nlp-code) used in the [CMU Neural Networks for NLP class](http://www.phontron.com/class/nn4nlp2021/schedule.html) by [Graham Neubig](http://www.phontron.com/index.php). \n", + "\n", + "![img txt](https://github.com/dair-ai/ML-Notebooks/blob/main/img/deep_cbow.png?raw=true)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import random\n", + "import torch.nn as nn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "''' uncomment to download the data\n", + "%%capture\n", + "\n", + "# download the files\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/dev.txt\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/test.txt\n", + "!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/classes/train.txt\n", + "\n", + "# create the data folders\n", + "!mkdir data data/classes\n", + "!cp dev.txt data/classes\n", + "!cp test.txt data/classes\n", + "!cp train.txt data/classes\n", + "'''" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read and Process the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# function to read in data, process each line and split columns by \" ||| \"\n", + "def read_data(filename):\n", + " data = []\n", + " with open(filename, 'r') as f:\n", + " for line in f:\n", + " line = line.lower().strip()\n", + " line = line.split(' ||| ')\n", + " data.append(line)\n", + " return data\n", + "\n", + "train_data = read_data('data/classes/train.txt')\n", + "test_data = read_data('data/classes/test.txt')\n", + "\n", + "# creating the word and tag indices\n", + "word_to_index = {}\n", + "word_to_index[\"\"] = len(word_to_index) # add to dictionary\n", + "tag_to_index = {}\n", + "\n", + "# create word to index dictionary and tag to index dictionary from data\n", + "def create_dict(data, check_unk=False):\n", + " for line in data:\n", + " for word in line[1].split(\" \"):\n", + " if check_unk == False:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = len(word_to_index)\n", + " else:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = word_to_index[\"\"]\n", + "\n", + " if line[0] not in tag_to_index:\n", + " tag_to_index[line[0]] = len(tag_to_index)\n", + "\n", + "create_dict(train_data)\n", + "create_dict(test_data, check_unk=True)\n", + "\n", + "# create word and tag tensors from data\n", + "def create_tensor(data):\n", + " for line in data:\n", + " yield([word_to_index[word] for word in line[1].split(\" \")], tag_to_index[line[0]])\n", + "\n", + "train_data = list(create_tensor(train_data))\n", + "test_data = list(create_tensor(test_data))\n", + "\n", + "number_of_words = len(word_to_index)\n", + "number_of_tags = len(tag_to_index)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", + "\n", + "# create a simple neural network with embedding layer, bias, and xavier initialization\n", + "class DeepCBoW(nn.Module):\n", + " def __init__(self, nwords, ntags, hidden_size, num_layers, emb_size):\n", + " super(DeepCBoW, self).__init__()\n", + "\n", + " self.num_layers = num_layers\n", + "\n", + " # layers\n", + " self.embedding = nn.Embedding(nwords, emb_size)\n", + " self.linears = nn.ModuleList([nn.Linear(emb_size if i ==0 else hidden_size, hidden_size) \\\n", + " for i in range(num_layers)])\n", + "\n", + " # use xavier initialization for weights\n", + " nn.init.xavier_uniform_(self.embedding.weight)\n", + " for i in range(self.num_layers):\n", + " nn.init.xavier_uniform_(self.linears[i].weight)\n", + "\n", + " # output layer\n", + " self.output_layer = nn.Linear(hidden_size, ntags)\n", + "\n", + " def forward(self, x):\n", + " emb = self.embedding(x) # seq x emb_size\n", + " emb_sum = torch.sum(emb, dim=0) # emb_size\n", + " h = emb_sum.view(1, -1) # reshape to (1, emb_size)\n", + " for i in range(self.num_layers):\n", + " h = torch.tanh(self.linears[i](h))\n", + " out = self.output_layer(h) # 1 x ntags\n", + " return out\n", + "\n", + "HIDDEN_SIZE = 64\n", + "NUM_LAYERS = 2 # hidden layers\n", + "EMB_SIZE = 64\n", + "model = DeepCBoW(number_of_words, number_of_tags, HIDDEN_SIZE, NUM_LAYERS, EMB_SIZE).to(device)\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(model.parameters())\n", + "type = torch.LongTensor\n", + "\n", + "if torch.cuda.is_available():\n", + " model.to(device)\n", + " type = torch.cuda.LongTensor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model Training" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "epoch: 1 | train loss/sent: 1.4293 | train accuracy: 0.3765 | test accuracy: 0.3941\n", + "epoch: 2 | train loss/sent: 1.0343 | train accuracy: 0.5729 | test accuracy: 0.4127\n", + "epoch: 3 | train loss/sent: 0.6565 | train accuracy: 0.7583 | test accuracy: 0.3801\n", + "epoch: 4 | train loss/sent: 0.4013 | train accuracy: 0.8586 | test accuracy: 0.3783\n", + "epoch: 5 | train loss/sent: 0.2659 | train accuracy: 0.9079 | test accuracy: 0.3959\n", + "epoch: 6 | train loss/sent: 0.1747 | train accuracy: 0.9419 | test accuracy: 0.3787\n", + "epoch: 7 | train loss/sent: 0.1257 | train accuracy: 0.9573 | test accuracy: 0.3805\n", + "epoch: 8 | train loss/sent: 0.0860 | train accuracy: 0.9702 | test accuracy: 0.3719\n", + "epoch: 9 | train loss/sent: 0.0652 | train accuracy: 0.9768 | test accuracy: 0.3747\n", + "epoch: 10 | train loss/sent: 0.0434 | train accuracy: 0.9860 | test accuracy: 0.3887\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Bad pipe message: %s [b'I7{\\xddYY9\\x10\\xe5', b\"\\xee\\x8a\\xf0\\xff\\xe6\\x1a\\xd2\\x00\\x00|\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa\\xc0\\xaf\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]\\xc0a\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0#\\xc0'\\x00g\\x00@\\xc0\\n\\xc0\\x14\\x009\\x008\\xc0\\t\\xc0\\x13\\x003\\x00\", b'\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00<\\x005\\x00/\\x00\\x9a\\x00\\x99\\xc0\\x07\\xc0\\x11\\x00\\x96\\x00\\x05\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00']\n", + "Bad pipe message: %s [b'\\xe1\\x05', b'\\xb0\\x87g\\xc6U\\xd5G\\xa2.\\xd2\\xf7\\x05\\x9fL\\x00\\x00\\xa6\\xc0,\\xc0', b'\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa\\xc0\\xaf\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]\\xc0a\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V']\n", + "Bad pipe message: %s [b\"\\xc0$\\xc0(\\x00k\\x00j\\xc0s\\xc0w\\x00\\xc4\\x00\\xc3\\xc0#\\xc0'\\x00g\\x00@\\xc0r\\xc0v\\x00\\xbe\\x00\\xbd\\xc0\\n\\xc0\\x14\\x009\\x008\\x00\\x88\\x00\\x87\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9a\\x00\\x99\\x00E\\x00D\\xc0\\x07\\xc0\\x11\\xc0\\x08\\xc0\\x12\\x00\\x16\\x00\\x13\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00\\xc0\\x00<\\x00\\xba\\x005\\x00\\x84\\x00/\\x00\\x96\\x00A\\x00\\x05\\x00\\n\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x000\\x00.\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07\\x08\\x08\\x08\\t\\x08\\n\\x08\"]\n", + "Bad pipe message: %s [b'\\xc6\\t^\\x9c\\x07\\xc5y\\xd0\\xbeR\\x8b\\xc2\\x94`T\\xd3\\xcel\\x00\\x00>\\xc0\\x14\\xc0\\n\\x009\\x008']\n", + "Bad pipe message: %s [b'\\x04\\x08\\x05\\x08\\x06\\x04\\x01\\x05\\x01\\x06']\n", + "Bad pipe message: %s [b'', b'\\x03\\x03']\n", + "Bad pipe message: %s [b'']\n", + "Bad pipe message: %s [b'\\x14\\xc6J\\xf8[H\\x91\\xb3\\x8dV^z\\x9dVA\\xf6Tt\\x00\\x00\\xa2\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00:\\x00\\x89\\xc0\\x0f\\xc0\\x05\\x005\\x00\\x84\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E']\n", + "Bad pipe message: %s [b'', b'\\x02']\n", + "Bad pipe message: %s [b'\\x05\\x02\\x06']\n", + "Bad pipe message: %s [b'\\xd8j\\x00\\x0be\\x95\\x1d\\t\\xd2\\xa5\\x02\\xda\\x07;\\x93\\x94$\\x96\\x00\\x00>\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\xc0\\x0f\\xc0\\x05\\x005\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00\\x96\\x00\\x07\\xc0']\n", + "Bad pipe message: %s [b'\\x07\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\x00\\xff\\x02\\x01\\x00\\x15\\x03']\n", + "Bad pipe message: %s [b'1\\x84+\\xad\\xe8(\\xa4\\xf2qZ\\xbd\\x06\\x03\\x10u\\xfe\\x18w\\x00\\x00\\xa2\\xc0\\x14\\xc0', b'9\\x008\\x007\\x006\\x00\\x88']\n", + "Bad pipe message: %s [b'\\x0c0~\\xec\\xf3\\xe2M\\xe5\\xb4\\xbd:v\\xae\\xca\\xec\\xdb\\xb8!\\x00\\x00\\x86\\xc00\\xc0,\\xc0(\\xc0$\\xc0\\x14\\xc0\\n\\x00\\xa5\\x00', b\"\\xa1\\x00\\x9f\\x00k\\x00j\\x00i\\x00h\\x009\\x008\\x007\\x006\\xc02\\xc0.\\xc0*\\xc0&\\xc0\\x0f\\xc0\\x05\\x00\\x9d\\x00=\\x005\\xc0/\\xc0+\\xc0'\\xc0#\\xc0\\x13\\xc0\\t\\x00\\xa4\\x00\\xa2\\x00\\xa0\\x00\\x9e\\x00g\\x00@\\x00?\\x00>\\x003\\x002\\x001\\x000\\xc01\\xc0-\\xc0)\\xc0%\\xc0\\x0e\\xc0\\x04\\x00\\x9c\\x00<\\x00/\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00\\x96\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\x00\\xff\\x02\\x01\\x00\\x00g\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x1c\\x00\\x1a\\x00\\x17\\x00\\x19\\x00\\x1c\\x00\\x1b\"]\n", + "Bad pipe message: %s [b\"j\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\"]\n" + ] + } + ], + "source": [ + "# perform training of the Bow model\n", + "\n", + "for epoch in range(10):\n", + " # perform training\n", + " model.train()\n", + " random.shuffle(train_data)\n", + " total_loss = 0.0\n", + " train_correct = 0\n", + " for sentence, tag in train_data:\n", + " sentence = torch.tensor(sentence).type(type)\n", + " tag = torch.tensor([tag]).type(type)\n", + " output = model(sentence)\n", + " predicted = torch.argmax(output.data.detach()).item()\n", + " \n", + " loss = criterion(output, tag)\n", + " total_loss += loss.item()\n", + "\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " if predicted == tag: train_correct+=1\n", + "\n", + " # perform testing of the model\n", + " model.eval()\n", + " test_correct = 0\n", + " for sentence, tag in test_data:\n", + " sentence = torch.tensor(sentence).type(type)\n", + " output = model(sentence)\n", + " predicted = torch.argmax(output.data.detach()).item()\n", + " if predicted == tag: test_correct += 1\n", + " \n", + " # print model performance results\n", + " log = f'epoch: {epoch+1} | ' \\\n", + " f'train loss/sent: {total_loss/len(train_data):.4f} | ' \\\n", + " f'train accuracy: {train_correct/len(train_data):.4f} | ' \\\n", + " f'test accuracy: {test_correct/len(test_data):.4f}'\n", + " \n", + " print(log)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12 (main, Apr 5 2022, 06:56:58) \n[GCC 7.5.0]" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/first_nn.ipynb b/notebooks/first_nn.ipynb new file mode 100644 index 0000000..4644ac3 --- /dev/null +++ b/notebooks/first_nn.ipynb @@ -0,0 +1,215 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## First Neural Network: Image Classification \n", + "\n", + "Objectives:\n", + "- Train a minimal image classifier on [MNIST](https://paperswithcode.com/dataset/mnist) using PyTorch\n", + "- Usese PyTorch and torchvision" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# The usual imports\n", + "\n", + "import torch\n", + "import torch.nn as nn\n", + "import torchvision\n", + "import torchvision.transforms as transforms" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# load the data\n", + "\n", + "class ReshapeTransform:\n", + " def __init__(self, new_size):\n", + " self.new_size = new_size\n", + "\n", + " def __call__(self, img):\n", + " return torch.reshape(img, self.new_size)\n", + "\n", + "transformations = transforms.Compose([\n", + " transforms.ToTensor(),\n", + " transforms.ConvertImageDtype(torch.float32),\n", + " ReshapeTransform((-1,))\n", + " ])\n", + "\n", + "trainset = torchvision.datasets.MNIST(root='./data', train=True,\n", + " download=True, transform=transformations)\n", + "\n", + "testset = torchvision.datasets.MNIST(root='./data', train=False,\n", + " download=True, transform=transformations)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(torch.Size([60000, 28, 28]), torch.Size([10000, 28, 28]))" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check shape of data\n", + "\n", + "trainset.data.shape, testset.data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# data loader\n", + "\n", + "BATCH_SIZE = 128\n", + "train_dataloader = torch.utils.data.DataLoader(trainset, \n", + " batch_size=BATCH_SIZE,\n", + " shuffle=True, \n", + " num_workers=0)\n", + "\n", + "test_dataloader = torch.utils.data.DataLoader(testset, \n", + " batch_size=BATCH_SIZE,\n", + " shuffle=False, \n", + " num_workers=0)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# model\n", + "\n", + "model = nn.Sequential(nn.Linear(784, 512), nn.ReLU(), nn.Linear(512, 10))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# training preparation\n", + "\n", + "trainer = torch.optim.RMSprop(model.parameters())\n", + "loss = nn.CrossEntropyLoss()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "def get_accuracy(output, target, batch_size):\n", + " # Obtain accuracy for training round\n", + " corrects = (torch.max(output, 1)[1].view(target.size()).data == target.data).sum()\n", + " accuracy = 100.0 * corrects/batch_size\n", + " return accuracy.item()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 1 | Train loss: 0.9943 | Train Accuracy: 91.7344\n", + "Epoch: 2 | Train loss: 0.1334 | Train Accuracy: 95.9422\n", + "Epoch: 3 | Train loss: 0.1030 | Train Accuracy: 96.8767\n", + "Epoch: 4 | Train loss: 0.0845 | Train Accuracy: 97.4997\n", + "Epoch: 5 | Train loss: 0.0735 | Train Accuracy: 97.8811\n" + ] + } + ], + "source": [ + "# train\n", + "\n", + "for ITER in range(5):\n", + " train_acc = 0.0\n", + " train_running_loss = 0.0\n", + "\n", + " model.train()\n", + " for i, (X, y) in enumerate(train_dataloader):\n", + " output = model(X)\n", + " l = loss(output, y)\n", + "\n", + " # update the parameters\n", + " l.backward()\n", + " trainer.step()\n", + " trainer.zero_grad()\n", + "\n", + " # gather metrics\n", + " train_acc += get_accuracy(output, y, BATCH_SIZE)\n", + " train_running_loss += l.detach().item()\n", + "\n", + " print('Epoch: %d | Train loss: %.4f | Train Accuracy: %.4f' \\\n", + " %(ITER+1, train_running_loss / (i+1),train_acc/(i+1)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Other things to try\n", + "\n", + "- Evaluate on test set\n", + "- Plot loss curve\n", + "- Add more layers to the model" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('play')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "cf9800998463bc980d70cdbacff0c7e9a10687346dc898569e92f016d6e252c9" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/intro_gnn.ipynb b/notebooks/intro_gnn.ipynb new file mode 100644 index 0000000..e27e725 --- /dev/null +++ b/notebooks/intro_gnn.ipynb @@ -0,0 +1,817 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Introduction to GNNs with PyTorch Geometric\n", + "\n", + "In this short notebook, the goal is to provide a introductory guide to get started with Graph Neural Networks using the popular library called [PyTorch Geometric](https://pytorch-geometric.readthedocs.io/en/latest/index.html). PyTorch Geometric is a PyTorch based libary hence we will be using PyTorch in this tutorial. \n", + "\n", + "The code used in this tutorial has been adapted from their official [examples](https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html). I have incorporated a bit more beginner-friendly guidance and kept it minimal." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11.3\n" + ] + } + ], + "source": [ + "# Find the CUDA version PyTorch was installed with\n", + "!python -c \"import torch; print(torch.version.cuda)\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.11.0\n" + ] + } + ], + "source": [ + "# PyTorch version\n", + "!python -c \"import torch; print(torch.__version__)\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Install the follow packages but make sure to \n", + "\n", + "---\n", + "\n", + "install the right version below. Find more instructions [here](https://pytorch-geometric.readthedocs.io/en/latest/notes/installation.html) if you get lost. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in links: https://data.pyg.org/whl/torch-1.11.0.html\n", + "Collecting torch-scatter\n", + " Downloading torch_scatter-2.0.9.tar.gz (21 kB)\n", + "Building wheels for collected packages: torch-scatter\n", + " Building wheel for torch-scatter (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for torch-scatter: filename=torch_scatter-2.0.9-cp38-cp38-linux_x86_64.whl size=304063 sha256=f9cc42222b3244636f8cb1a257bdc1556f9292207d987b02ec79e7fb59689fdb\n", + " Stored in directory: /home/codespace/.cache/pip/wheels/7c/51/2a/409339f45a48bf748a5db76dfa11373ea7c883ecf1932eee2f\n", + "Successfully built torch-scatter\n", + "Installing collected packages: torch-scatter\n", + "Successfully installed torch-scatter-2.0.9\n" + ] + } + ], + "source": [ + "!pip install torch-scatter -f https://data.pyg.org/whl/torch-1.11.0.html" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in links: https://data.pyg.org/whl/torch-1.11.0.html\n", + "Collecting torch-sparse\n", + " Downloading torch_sparse-0.6.13.tar.gz (48 kB)\n", + "\u001b[K |████████████████████████████████| 48 kB 1.7 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting scipy\n", + " Downloading scipy-1.8.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (41.6 MB)\n", + "\u001b[K |████████████████████████████████| 41.6 MB 73.5 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: numpy<1.25.0,>=1.17.3 in /home/codespace/.conda/envs/gnn/lib/python3.8/site-packages (from scipy->torch-sparse) (1.22.3)\n", + "Building wheels for collected packages: torch-sparse\n", + " Building wheel for torch-sparse (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for torch-sparse: filename=torch_sparse-0.6.13-cp38-cp38-linux_x86_64.whl size=570934 sha256=27a158cb7be5e10bb7846cce70764338cbb12828ea70eaf5a41a79f15cfc60c3\n", + " Stored in directory: /home/codespace/.cache/pip/wheels/81/94/1a/3fd0c022a887c997c5e681961f2bd2e41f8fd6b66562b90fb6\n", + "Successfully built torch-sparse\n", + "Installing collected packages: scipy, torch-sparse\n", + "Successfully installed scipy-1.8.1 torch-sparse-0.6.13\n" + ] + } + ], + "source": [ + "!pip install torch-sparse -f https://data.pyg.org/whl/torch-1.11.0.html" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting torch-geometric\n", + " Downloading torch_geometric-2.0.4.tar.gz (407 kB)\n", + "\u001b[K |████████████████████████████████| 407 kB 26.9 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting tqdm\n", + " Downloading tqdm-4.64.0-py2.py3-none-any.whl (78 kB)\n", + "\u001b[K |████████████████████████████████| 78 kB 1.8 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: numpy in /home/codespace/.conda/envs/gnn/lib/python3.8/site-packages (from torch-geometric) (1.22.3)\n", + "Requirement already satisfied: scipy in /home/codespace/.conda/envs/gnn/lib/python3.8/site-packages (from torch-geometric) (1.8.1)\n", + "Collecting pandas\n", + " Downloading pandas-1.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.7 MB)\n", + "\u001b[K |████████████████████████████████| 11.7 MB 79.4 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting jinja2\n", + " Downloading Jinja2-3.1.2-py3-none-any.whl (133 kB)\n", + "\u001b[K |████████████████████████████████| 133 kB 87.1 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: requests in /home/codespace/.conda/envs/gnn/lib/python3.8/site-packages (from torch-geometric) (2.27.1)\n", + "Collecting pyparsing\n", + " Using cached pyparsing-3.0.9-py3-none-any.whl (98 kB)\n", + "Collecting scikit-learn\n", + " Downloading scikit_learn-1.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31.2 MB)\n", + "\u001b[K |████████████████████████████████| 31.2 MB 84.1 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting MarkupSafe>=2.0\n", + " Downloading MarkupSafe-2.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (25 kB)\n", + "Collecting pytz>=2020.1\n", + " Downloading pytz-2022.1-py2.py3-none-any.whl (503 kB)\n", + "\u001b[K |████████████████████████████████| 503 kB 69.2 MB/s eta 0:00:01\n", + "\u001b[?25hRequirement already satisfied: python-dateutil>=2.8.1 in /home/codespace/.conda/envs/gnn/lib/python3.8/site-packages (from pandas->torch-geometric) (2.8.2)\n", + "Requirement already satisfied: six>=1.5 in /home/codespace/.conda/envs/gnn/lib/python3.8/site-packages (from python-dateutil>=2.8.1->pandas->torch-geometric) (1.16.0)\n", + "Requirement already satisfied: charset-normalizer~=2.0.0 in /home/codespace/.conda/envs/gnn/lib/python3.8/site-packages (from requests->torch-geometric) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/codespace/.conda/envs/gnn/lib/python3.8/site-packages (from requests->torch-geometric) (3.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/codespace/.conda/envs/gnn/lib/python3.8/site-packages (from requests->torch-geometric) (2022.5.18.1)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/codespace/.conda/envs/gnn/lib/python3.8/site-packages (from requests->torch-geometric) (1.26.9)\n", + "Collecting joblib>=1.0.0\n", + " Downloading joblib-1.1.0-py2.py3-none-any.whl (306 kB)\n", + "\u001b[K |████████████████████████████████| 306 kB 78.1 MB/s eta 0:00:01\n", + "\u001b[?25hCollecting threadpoolctl>=2.0.0\n", + " Downloading threadpoolctl-3.1.0-py3-none-any.whl (14 kB)\n", + "Building wheels for collected packages: torch-geometric\n", + " Building wheel for torch-geometric (setup.py) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for torch-geometric: filename=torch_geometric-2.0.4-py3-none-any.whl size=616602 sha256=c3dd839f53e4c307e7a223cb3868bbe2dfc5bb5a5586c2f1bcfe5d2f20f6d6a6\n", + " Stored in directory: /home/codespace/.cache/pip/wheels/c1/be/e9/b90ded2a496c975a539af002fe1f0f2a22a97af13b41866d6e\n", + "Successfully built torch-geometric\n", + "Installing collected packages: threadpoolctl, pytz, MarkupSafe, joblib, tqdm, scikit-learn, pyparsing, pandas, jinja2, torch-geometric\n", + "Successfully installed MarkupSafe-2.1.1 jinja2-3.1.2 joblib-1.1.0 pandas-1.4.2 pyparsing-3.0.9 pytz-2022.1 scikit-learn-1.1.1 threadpoolctl-3.1.0 torch-geometric-2.0.4 tqdm-4.64.0\n" + ] + } + ], + "source": [ + "!pip install torch-geometric" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Started\n", + "\n", + "Import PyTorch" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.11.0\n" + ] + } + ], + "source": [ + "import torch\n", + "\n", + "# print torch version\n", + "print(torch.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The great thing about PyTorch Geometric is that it contain useful functionalities to import and load graph related data. " + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from torch_geometric.data import Data" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's create an unweighted and undirected graph with three nodes and four total edges." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Data(x=[3, 1], edge_index=[2, 4])\n" + ] + } + ], + "source": [ + "# define edge list\n", + "edge_index = torch.tensor([[0, 1, 1, 2], [1, 0, 2, 1]], dtype=torch.long)\n", + "\n", + "# define node features\n", + "x = torch.tensor([[-1], [0], [1]])\n", + "\n", + "# create graph data object\n", + "data = Data(x=x, edge_index=edge_index)\n", + "print(data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Our data object `Data` has many useful utility functions to check the properties of the graph. " + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4\n" + ] + } + ], + "source": [ + "# check number of edges of the graph\n", + "print(data.num_edges)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "3\n" + ] + } + ], + "source": [ + "# check number of nodes of the graph\n", + "print(data.num_nodes)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "# check number of features of the graph\n", + "print(data.num_features)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "False\n" + ] + } + ], + "source": [ + "# check if graph is directed\n", + "print(data.is_directed())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading Data\n", + "\n", + "Find more fun functions related to graph data [here](https://pytorch-geometric.readthedocs.io/en/latest/modules/data.html#torch_geometric.data.Data). \n", + "\n", + "One of the cool things about the PyTorch Geometric library is that it contains out-of-the-box benchmark datasets that are ready to use and explore. A popular dataset is the Cora dataset that is used for supervised graph node classification. (We will talk about these applications in an upcoming tutorial but for now we will focus on the data itself).\n", + "\n", + "\"The Cora dataset consists of 2708 scientific publications classified into one of seven classes. The citation network consists of 5429 links. Each publication in the dataset is described by a 0/1-valued word vector indicating the absence/presence of the corresponding word from the dictionary. The dictionary consists of 1433 unique words.\" - [Papers with Code](https://paperswithcode.com/dataset/cora).\n", + "\n", + "Let's load the Cora dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.x\n", + "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.tx\n", + "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.allx\n", + "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.y\n", + "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ty\n", + "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.ally\n", + "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.graph\n", + "Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.cora.test.index\n", + "Processing...\n", + "Done!\n" + ] + } + ], + "source": [ + "from torch_geometric.datasets import Planetoid\n", + "\n", + "dataset = Planetoid(root='tmp/Cora', name='Cora')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's check some of the properties of the Cora dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of graphs: 1\n", + "Number of features: 1433\n", + "Number of classes: 7\n" + ] + } + ], + "source": [ + "# number of graphs\n", + "print(\"Number of graphs: \", len(dataset))\n", + "\n", + "# number of features\n", + "print(\"Number of features: \", dataset.num_features)\n", + "\n", + "# number of classes\n", + "print(\"Number of classes: \", dataset.num_classes)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can see that this particular graph dataset only contains one graph. Graph data can be very complex and can include multiple graphs depending on the type of data and application. Let's check more feature of the Cora dataset:" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of nodes: 2708\n", + "Number of edges: 10556\n", + "Is directed: False\n" + ] + } + ], + "source": [ + "# select the first graph\n", + "data = dataset[0]\n", + "\n", + "# number of nodes\n", + "print(\"Number of nodes: \", data.num_nodes)\n", + "\n", + "# number of edges\n", + "print(\"Number of edges: \", data.num_edges)\n", + "\n", + "# check if directed\n", + "print(\"Is directed: \", data.is_directed())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can sample nodes from the graph this way:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of sample nodes: torch.Size([5, 1433])\n" + ] + } + ], + "source": [ + "# sample nodes from the graph\n", + "print(\"Shape of sample nodes: \", data.x[:5].shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We extracted 5 nodes from the graph and checked its shape. You will see that each node has `1433` features.\n", + "\n", + "Another great advantage of using PyTorch Geometric to load the Cora data is that it comes pre-processed and ready to use. It also has the splits for training, validation and test which we can directly use for training a GNN.\n", + "\n", + "Let's check some stats for the partitions of the data:" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# of nodes to train on: 140\n", + "# of nodes to test on: 1000\n", + "# of nodes to validate on: 500\n" + ] + } + ], + "source": [ + "# check training nodes\n", + "print(\"# of nodes to train on: \", data.train_mask.sum().item())\n", + "\n", + "# check test nodes\n", + "print(\"# of nodes to test on: \", data.test_mask.sum().item())\n", + "\n", + "# check validation nodes\n", + "print(\"# of nodes to validate on: \", data.val_mask.sum().item())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That information is important as it will indicate to our model which nodes to train against and which to test against, and so on.\n", + "\n", + "When training neural networks we train them using batches of data. PyTorch Geometric provides efficient processes to load batches of data.\n", + "\n", + "PyTorch Geometric contains a data loader which is a very popular feature in PyTorch to efficiently load data when training neural networks.\n", + " \n", + "So let's try to load the data using the built in `DataLoader`:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "from torch_geometric.datasets import Planetoid\n", + "from torch_geometric.loader import DataLoader\n", + "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cuda\n" + ] + } + ], + "source": [ + "print(device)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = Planetoid(root='tmp/Cora', name='Cora')\n", + "data = dataset[0].to(device)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Print some quick statistics about the data:" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X shape: torch.Size([2708, 1433])\n", + "Edge shape: torch.Size([2, 10556])\n", + "Y shape: torch.Size([2708])\n" + ] + } + ], + "source": [ + "print(\"X shape: \", data.x.shape)\n", + "print(\"Edge shape: \", data.edge_index.shape)\n", + "print(\"Y shape: \", data.y.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model and Training\n", + "\n", + "Finally, let's define a standard GCN to train on the Cora dataset. The aim is to train a model that gets better at predicting the class of the node.\n", + "\n", + "To keep thins simple we will use the same model definition as used in the [tutorial](https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html) we adpated the code from. Note that we are using the built-in `GCNConv` model but you could easily implement your own (something we will cover in a future tutorial). \n", + "\n", + "The model below uses two `GCNConv` layers. The first layer is followed by a non-linearity `ReLU` and `Dropout`. The result is fed to the second layer on top of which we apply `Softmax` to get distribution over the number of classes." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "import torch.nn.functional as F\n", + "from torch_geometric.nn import GCNConv\n", + "\n", + "class GCN(torch.nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " \"\"\" GCNConv layers \"\"\"\n", + " self.conv1 = GCNConv(data.num_features, 16)\n", + " self.conv2 = GCNConv(16, dataset.num_classes)\n", + "\n", + " def forward(self, data):\n", + " x, edge_index = data.x, data.edge_index\n", + " x = self.conv1(x, edge_index)\n", + " x = F.relu(x)\n", + " x = F.dropout(x, training=self.training)\n", + " x = self.conv2(x, edge_index)\n", + "\n", + " return F.log_softmax(x, dim=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Initial model and optimizer" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "model = GCN().to(device)\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Define accuracy function for evaluating performance:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "# useful function for computing accuracy\n", + "def compute_accuracy(pred_y, y):\n", + " return (pred_y == y).sum()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And finally we train the model on the trainin nodes for 200 epochs:" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 10, Loss: 0.8056, Training Acc: 0.9214\n", + "Epoch: 20, Loss: 0.2925, Training Acc: 0.9500\n", + "Epoch: 30, Loss: 0.1065, Training Acc: 1.0000\n", + "Epoch: 40, Loss: 0.0654, Training Acc: 1.0000\n", + "Epoch: 50, Loss: 0.0423, Training Acc: 1.0000\n", + "Epoch: 60, Loss: 0.0467, Training Acc: 0.9929\n", + "Epoch: 70, Loss: 0.0496, Training Acc: 0.9929\n", + "Epoch: 80, Loss: 0.0353, Training Acc: 0.9929\n", + "Epoch: 90, Loss: 0.0397, Training Acc: 1.0000\n", + "Epoch: 100, Loss: 0.0253, Training Acc: 1.0000\n", + "Epoch: 110, Loss: 0.0353, Training Acc: 0.9929\n", + "Epoch: 120, Loss: 0.0340, Training Acc: 1.0000\n", + "Epoch: 130, Loss: 0.0338, Training Acc: 1.0000\n", + "Epoch: 140, Loss: 0.0319, Training Acc: 1.0000\n", + "Epoch: 150, Loss: 0.0469, Training Acc: 0.9929\n", + "Epoch: 160, Loss: 0.0260, Training Acc: 0.9929\n", + "Epoch: 170, Loss: 0.0392, Training Acc: 0.9857\n", + "Epoch: 180, Loss: 0.0470, Training Acc: 0.9929\n", + "Epoch: 190, Loss: 0.0267, Training Acc: 0.9929\n", + "Epoch: 200, Loss: 0.0221, Training Acc: 1.0000\n" + ] + } + ], + "source": [ + "# train the model\n", + "model.train()\n", + "losses = []\n", + "accuracies = []\n", + "for epoch in range(200):\n", + " optimizer.zero_grad()\n", + " out = model(data)\n", + "\n", + " loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])\n", + " correct = compute_accuracy(out.argmax(dim=1)[data.train_mask], data.y[data.train_mask])\n", + " acc = int(correct) / int(data.train_mask.sum())\n", + " losses.append(loss.item())\n", + " accuracies.append(acc)\n", + "\n", + " loss.backward()\n", + " optimizer.step()\n", + " if (epoch+1) % 10 == 0:\n", + " print('Epoch: {}, Loss: {:.4f}, Training Acc: {:.4f}'.format(epoch+1, loss.item(), acc))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# plot the loss and accuracy\n", + "import matplotlib.pyplot as plt\n", + "plt.plot(losses)\n", + "plt.plot(accuracies)\n", + "plt.legend(['Loss', 'Accuracy'])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It looks like the model achieves a very high accuracy and small loss on the training dataset. To see how well it generalizes, let's test on the testing nodes:" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy: 0.7870\n" + ] + } + ], + "source": [ + "# evaluate the model on test set\n", + "model.eval()\n", + "pred = model(data).argmax(dim=1)\n", + "correct = compute_accuracy(pred[data.test_mask], data.y[data.test_mask])\n", + "acc = int(correct) / int(data.test_mask.sum())\n", + "print(f'Accuracy: {acc:.4f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Very cool! It seems we got a very nice accuracy for the test as well. Our model is doing okay. There are many ways you can go about trying to improve this model, but we will keep that for another time. Hopefully, with this tutorial you got a glimpse of graph data and how to use PyTorch Geometric to train GNNs on a very popular dataset. \n", + "\n", + "Note that I haven't tested if this code works with GPUs. I will leave that as an exercise for the learner. \n", + "\n", + "If you are interested in the full tutorial and more examples, visit the [PyTorch Geomtric documentation](https://pytorch-geometric.readthedocs.io/en/latest/notes/introduction.html) where I adapted the code from. \n", + "\n", + "Feel free to reach out on [Twitter](https://twitter.com/omarsar0) if you have any further questions." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/linear_regression.ipynb b/notebooks/linear_regression.ipynb new file mode 100644 index 0000000..e7bc359 --- /dev/null +++ b/notebooks/linear_regression.ipynb @@ -0,0 +1,1376 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "view-in-github" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Linear Regression from Scratch" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m41136O7L5bV" + }, + "source": [ + "In this tutorial, we are going to implement a linear regression model to predict california housing prices. We will build the model from scratch using numpy. This will be a great approach to begin understanding regression based models.\n", + "\n", + "After completing this tutorial the learner is expected to know the basic building blocks of a linear regression model. The learner is also expected to know the pipeline of reading and transforming data for machine learning workflows.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "Pni17h4R8v8a" + }, + "outputs": [], + "source": [ + "## Import the usual libraries\n", + "import numpy as np\n", + "import pandas as pd\n", + "from sklearn.datasets import fetch_california_housing\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import StandardScaler\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DjyJUfczL4zX" + }, + "source": [ + "# Importing the dataset\n", + "\n", + "The real-world dataset can be obtained by the function `fetch_california_housing` that downloads the dataset for us. \n", + "\n", + "The `as_frame` parameter returns a pandas dataframe which is a library useful for viewing contents of the data.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 206 + }, + "id": "aOXxbywahC5X", + "outputId": "24521e0a-6f1a-4e5c-c35d-3abb8112a9af" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MedIncHouseAgeAveRoomsAveBedrmsPopulationAveOccupLatitudeLongitudeMedHouseVal
08.325241.06.9841271.023810322.02.55555637.88-122.234.526
18.301421.06.2381370.9718802401.02.10984237.86-122.223.585
27.257452.08.2881361.073446496.02.80226037.85-122.243.521
35.643152.05.8173521.073059558.02.54794537.85-122.253.413
43.846252.06.2818531.081081565.02.18146737.85-122.253.422
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude \\\n", + "0 8.3252 41.0 6.984127 1.023810 322.0 2.555556 37.88 \n", + "1 8.3014 21.0 6.238137 0.971880 2401.0 2.109842 37.86 \n", + "2 7.2574 52.0 8.288136 1.073446 496.0 2.802260 37.85 \n", + "3 5.6431 52.0 5.817352 1.073059 558.0 2.547945 37.85 \n", + "4 3.8462 52.0 6.281853 1.081081 565.0 2.181467 37.85 \n", + "\n", + " Longitude MedHouseVal \n", + "0 -122.23 4.526 \n", + "1 -122.22 3.585 \n", + "2 -122.24 3.521 \n", + "3 -122.25 3.413 \n", + "4 -122.25 3.422 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Fetch the data using sklearn function\n", + "bunch = fetch_california_housing(download_if_missing=True, as_frame=True)\n", + "\n", + "# Load the dataframe and view\n", + "df = bunch.frame\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KUeU_jLylTx7" + }, + "source": [ + "For this dataset, our target variable is the median house value for California districts, expressed in hundreds of thousands of dollars ($100,000).\n", + "\n", + "We can take a closer look at the various statistical parameters of the dataset using pandas. The `describe` function will help us." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 300 + }, + "id": "eD4BpBHClgDc", + "outputId": "e2171ffb-bd38-4e3e-b600-ad3c249a3234" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
MedIncHouseAgeAveRoomsAveBedrmsPopulationAveOccupLatitudeLongitudeMedHouseVal
count20640.00000020640.00000020640.00000020640.00000020640.00000020640.00000020640.00000020640.00000020640.000000
mean3.87067128.6394865.4290001.0966751425.4767443.07065535.631861-119.5697042.068558
std1.89982212.5855582.4741730.4739111132.46212210.3860502.1359522.0035321.153956
min0.4999001.0000000.8461540.3333333.0000000.69230832.540000-124.3500000.149990
25%2.56340018.0000004.4407161.006079787.0000002.42974133.930000-121.8000001.196000
50%3.53480029.0000005.2291291.0487801166.0000002.81811634.260000-118.4900001.797000
75%4.74325037.0000006.0523811.0995261725.0000003.28226137.710000-118.0100002.647250
max15.00010052.000000141.90909134.06666735682.0000001243.33333341.950000-114.3100005.000010
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ], + "text/plain": [ + " MedInc HouseAge AveRooms AveBedrms Population \\\n", + "count 20640.000000 20640.000000 20640.000000 20640.000000 20640.000000 \n", + "mean 3.870671 28.639486 5.429000 1.096675 1425.476744 \n", + "std 1.899822 12.585558 2.474173 0.473911 1132.462122 \n", + "min 0.499900 1.000000 0.846154 0.333333 3.000000 \n", + "25% 2.563400 18.000000 4.440716 1.006079 787.000000 \n", + "50% 3.534800 29.000000 5.229129 1.048780 1166.000000 \n", + "75% 4.743250 37.000000 6.052381 1.099526 1725.000000 \n", + "max 15.000100 52.000000 141.909091 34.066667 35682.000000 \n", + "\n", + " AveOccup Latitude Longitude MedHouseVal \n", + "count 20640.000000 20640.000000 20640.000000 20640.000000 \n", + "mean 3.070655 35.631861 -119.569704 2.068558 \n", + "std 10.386050 2.135952 2.003532 1.153956 \n", + "min 0.692308 32.540000 -124.350000 0.149990 \n", + "25% 2.429741 33.930000 -121.800000 1.196000 \n", + "50% 2.818116 34.260000 -118.490000 1.797000 \n", + "75% 3.282261 37.710000 -118.010000 2.647250 \n", + "max 1243.333333 41.950000 -114.310000 5.000010 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6S6WG0Bejxc2" + }, + "source": [ + "As we can see the data in each of the columns is on different scales. For example, the average bedroom value is around 1 and the average population is around 1425. \n", + "\n", + "Generally, machine learing models do not work well when the data is on different scales. Thus, we have to normalize our data in the range [-1,1]. The module [StandardScalar](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html) will help us in this.\n", + "\n", + "The training data should always be normalized. The testing data should be normalized using the values of the training data. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "pkaOgN44iQLN" + }, + "outputs": [], + "source": [ + "# !wget https://raw.githubusercontent.com/Ankit152/Fish-Market/main/Fish.csv\n", + "# import pandas as pd\n", + "# df = pd.read_csv(\"Fish.csv\")\n", + "# y = df['Weight']\n", + "# x = df[[\"Length1\", \"Length2\", \"Length3\", \"Height\", \"Width\",\"Weight\"]]\n", + "\n", + "df = bunch.frame\n", + "x = df.iloc[:,:-1] # Select all the columns, except the last column\n", + "y = df.iloc[:,-1:] # Select the last column\n", + "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33, random_state = 1)\n", + "\n", + "input_scalar = StandardScaler()\n", + "output_scalar = StandardScaler()\n", + "\n", + "x_train = input_scalar.fit_transform(x_train).T # Normalize train data\n", + "x_test = input_scalar.transform(x_test).T # Only transform test data using values of train data\n", + "\n", + "y_train = output_scalar.fit_transform(y_train).reshape(-1)\n", + "y_test = output_scalar.transform(y_test).reshape(-1)\n", + "\n", + "dataset_copy = [ x_train.copy(), x_test.copy(), y_train.copy(), y_test.copy()]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mylVXZDk96a2" + }, + "source": [ + "# Linear Regression Model\n", + "\n", + "Now we define our linear regression model from scratch.\n", + "\n", + "A linear regression model is of the form:\n", + "\n", + "$y = a_1 x_1 + a_2 x_2 + \\dots + a_nx_n + a_{n+1}$\n", + " \n", + "The above can be rewritten using matrix multiplication as\n", + "\n", + "$ y = w^T x $\n", + "\n", + "where \n", + "\n", + "$ w = [a_1, a_2, \\dots, a_n, a_{n+1}]^T $\n", + "\n", + "$ x = [x_1, x_2, \\dots, x_n]^T $\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "id": "iJViSowz9nah" + }, + "outputs": [], + "source": [ + "class LinearRegression():\n", + " def __init__(self, dim, lr = 0.1):\n", + " assert isinstance\n", + " self.lr = lr\n", + " self.w = np.zeros((dim))\n", + " self.grads = {\"dw\": np.zeros((dim)) +5}\n", + "\n", + " def forward(self, x):\n", + " y = self.w.T @ x\n", + " return y\n", + " \n", + " def backward(self, x, y_hat, y):\n", + " assert y_hat.shape == y.shape\n", + " self.grads[\"dw\"] = (1 / x.shape[1]) * ((y_hat - y) @ x.T).T\n", + " assert self.grads[\"dw\"].shape == self.w.shape\n", + " \n", + " # print(self.grads[\"dw\"])\n", + "\n", + " def optimize(self):\n", + " self.w = self.w - self.lr * self.grads[\"dw\"]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6UOy32LoqCrL" + }, + "source": [ + "# Loss\n", + "\n", + "For linear regression, various loss functions such as the mean absolute error, mean squared error, or root mean squared error can be used.\n", + "\n", + "In this example, we will use the mean squared error (MSE) loss.\n", + "\n", + "The MSE loss is given by \n", + "\n", + "$ error = \\frac{1}{m} Σ_{i=1}^{m} (y_{true}^{i} - y_{pred}^{i})^2 $ \n", + "\n", + "where $i$ denotes the particular obseration/row in the dataset and $m$ is the total number of obserations.\n", + "\n", + "To ensure our model is correct, the loss should decrease over each epoch.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3atqq0KirwLu" + }, + "source": [] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "mgNn3oGjjbxX", + "outputId": "2f67998e-2fa4-4c43-e091-d5180dd92029" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 0 | Train Loss 0.49999999999999983 | Test Loss 0.43712002508129305\n", + "Epoch 20 | Train Loss 0.2359010824314295 | Test Loss 0.2325611904818386\n", + "Epoch 40 | Train Loss 0.22188755162559423 | Test Loss 0.2217635127918686\n", + "Epoch 60 | Train Loss 0.21474640752415047 | Test Loss 0.2152613227580797\n", + "Epoch 80 | Train Loss 0.2095989567021037 | Test Loss 0.21037193610067245\n", + "Epoch 100 | Train Loss 0.20581761895152345 | Test Loss 0.20673038702760732\n", + "Epoch 120 | Train Loss 0.20303294882659725 | Test Loss 0.20402527473733864\n", + "Epoch 140 | Train Loss 0.20097918345162274 | Test Loss 0.20201418597478873\n", + "Epoch 160 | Train Loss 0.19946210112703647 | Test Loss 0.20051638978054404\n", + "Epoch 180 | Train Loss 0.19833950070910428 | Test Loss 0.19939846923590557\n", + "Epoch 200 | Train Loss 0.19750719045004836 | Test Loss 0.19856209982480624\n", + "Epoch 220 | Train Loss 0.1968887724916989 | Test Loss 0.1979347869242644\n", + "Epoch 240 | Train Loss 0.19642818272324772 | Test Loss 0.19746302071881933\n", + "Epoch 260 | Train Loss 0.19608424065206406 | Test Loss 0.19710724843033994\n", + "Epoch 280 | Train Loss 0.19582666642954677 | Test Loss 0.19683818623685323\n", + "Epoch 300 | Train Loss 0.19563316903289557 | Test Loss 0.19663411372213185\n", + "Epoch 320 | Train Loss 0.19548731666143612 | Test Loss 0.19647888801910168\n", + "Epoch 340 | Train Loss 0.19537697849050284 | Test Loss 0.19636048509166582\n", + "Epoch 360 | Train Loss 0.19529318388491884 | Test Loss 0.19626992725672532\n", + "Epoch 380 | Train Loss 0.19522928672715056 | Test Loss 0.19620049386222657\n", + "Epoch 400 | Train Loss 0.19518035283239818 | Test Loss 0.19614713968570663\n", + "Epoch 420 | Train Loss 0.19514271054504922 | Test Loss 0.1961060658296224\n", + "Epoch 440 | Train Loss 0.19511362075549118 | Test Loss 0.19607440266698017\n", + "Epoch 460 | Train Loss 0.19509103436032973 | Test Loss 0.196049975197568\n", + "Epoch 480 | Train Loss 0.19507341379189 | Test Loss 0.1960311290809374\n", + "Epoch 500 | Train Loss 0.195059601524607 | Test Loss 0.1960166013981983\n", + "Epoch 520 | Train Loss 0.19504872305387622 | Test Loss 0.19600542443105118\n", + "Epoch 540 | Train Loss 0.19504011519474806 | Test Loss 0.19599685384985982\n", + "Epoch 560 | Train Loss 0.19503327299733605 | Test Loss 0.19599031497728384\n", + "Epoch 580 | Train Loss 0.19502781036651573 | Test Loss 0.19598536246238987\n", + "Epoch 600 | Train Loss 0.19502343078313097 | Test Loss 0.19598164992474956\n", + "Epoch 620 | Train Loss 0.19501990548217418 | Test Loss 0.19597890702760307\n", + "Epoch 640 | Train Loss 0.19501705714492468 | Test Loss 0.1959769221005947\n", + "Epoch 660 | Train Loss 0.1950147476759042 | Test Loss 0.1959755289194148\n", + "Epoch 680 | Train Loss 0.19501286901218934 | Test Loss 0.19597459660841743\n", + "Epoch 700 | Train Loss 0.1950113361889526 | Test Loss 0.19597402189697258\n", + "Epoch 720 | Train Loss 0.1950100820879975 | Test Loss 0.19597372315589145\n", + "Epoch 740 | Train Loss 0.1950090534451732 | Test Loss 0.19597363578501353\n", + "Epoch 760 | Train Loss 0.1950082078022795 | Test Loss 0.19597370863036628\n", + "Epoch 780 | Train Loss 0.19500751116991094 | Test Loss 0.19597390118903935\n", + "Epoch 800 | Train Loss 0.19500693622732843 | Test Loss 0.19597418141927686\n", + "Epoch 820 | Train Loss 0.19500646092952323 | Test Loss 0.19597452401759977\n", + "Epoch 840 | Train Loss 0.19500606742426427 | Test Loss 0.1959749090579219\n", + "Epoch 860 | Train Loss 0.19500574120612196 | Test Loss 0.19597532091250944\n", + "Epoch 880 | Train Loss 0.19500547045245514 | Test Loss 0.19597574739336301\n", + "Epoch 900 | Train Loss 0.19500524549975556 | Test Loss 0.1959761790667557\n", + "Epoch 920 | Train Loss 0.19500505842876395 | Test Loss 0.19597660870438502\n", + "Epoch 940 | Train Loss 0.1950049027342804 | Test Loss 0.1959770308427642\n", + "Epoch 960 | Train Loss 0.19500477306123667 | Test Loss 0.1959774414287147\n", + "Epoch 980 | Train Loss 0.19500466499285687 | Test Loss 0.19597783753361098\n", + "Epoch 1000 | Train Loss 0.19500457487995757 | Test Loss 0.19597821712271776\n", + "Epoch 1020 | Train Loss 0.1950044997028894 | Test Loss 0.19597857886881814\n", + "Epoch 1040 | Train Loss 0.1950044369594934 | Test Loss 0.19597892200155245\n", + "Epoch 1060 | Train Loss 0.19500438457387914 | Test Loss 0.19597924618562568\n", + "Epoch 1080 | Train Loss 0.1950043408219371 | Test Loss 0.1959795514224043\n", + "Epoch 1100 | Train Loss 0.19500430427035154 | Test Loss 0.19597983797049975\n", + "Epoch 1120 | Train Loss 0.19500427372654458 | Test Loss 0.1959801062817876\n", + "Epoch 1140 | Train Loss 0.195004248197501 | Test Loss 0.19598035694999025\n", + "Epoch 1160 | Train Loss 0.19500422685583052 | Test Loss 0.1959805906694935\n", + "Epoch 1180 | Train Loss 0.1950042090117447 | Test Loss 0.19598080820250163\n", + "Epoch 1200 | Train Loss 0.195004194089881 | Test Loss 0.1959810103529865\n", + "Epoch 1220 | Train Loss 0.19500418161010774 | Test Loss 0.1959811979461702\n", + "Epoch 1240 | Train Loss 0.1950041711716055 | Test Loss 0.19598137181250938\n", + "Epoch 1260 | Train Loss 0.1950041624396518 | Test Loss 0.1959815327753366\n", + "Epoch 1280 | Train Loss 0.19500415513463745 | Test Loss 0.19598168164146745\n", + "Epoch 1300 | Train Loss 0.19500414902293145 | Test Loss 0.19598181919420496\n", + "Epoch 1320 | Train Loss 0.1950041439092753 | Test Loss 0.19598194618827683\n", + "Epoch 1340 | Train Loss 0.19500413963044858 | Test Loss 0.19598206334632265\n", + "Epoch 1360 | Train Loss 0.1950041360499879 | Test Loss 0.19598217135661974\n", + "Epoch 1380 | Train Loss 0.19500413305378356 | Test Loss 0.19598227087179027\n", + "Epoch 1400 | Train Loss 0.1950041305464049 | Test Loss 0.19598236250828016\n", + "Epoch 1420 | Train Loss 0.1950041284480337 | Test Loss 0.19598244684643956\n", + "Epoch 1440 | Train Loss 0.19500412669190298 | Test Loss 0.19598252443106423\n", + "Epoch 1460 | Train Loss 0.1950041252221582 | Test Loss 0.1959825957722857\n", + "Epoch 1480 | Train Loss 0.1950041239920701 | Test Loss 0.19598266134671743\n", + "Epoch 1500 | Train Loss 0.1950041229625413 | Test Loss 0.19598272159878383\n", + "Epoch 1520 | Train Loss 0.19500412210085774 | Test Loss 0.19598277694217195\n", + "Epoch 1540 | Train Loss 0.19500412137964543 | Test Loss 0.19598282776135909\n", + "Epoch 1560 | Train Loss 0.19500412077599738 | Test Loss 0.19598287441317852\n", + "Epoch 1580 | Train Loss 0.19500412027074418 | Test Loss 0.19598291722839395\n", + "Epoch 1600 | Train Loss 0.19500411984784347 | Test Loss 0.1959829565132598\n", + "Epoch 1620 | Train Loss 0.19500411949386964 | Test Loss 0.19598299255105023\n", + "Epoch 1640 | Train Loss 0.19500411919758648 | Test Loss 0.1959830256035433\n", + "Epoch 1660 | Train Loss 0.19500411894959 | Test Loss 0.1959830559124517\n", + "Epoch 1680 | Train Loss 0.19500411874200962 | Test Loss 0.19598308370079265\n", + "Epoch 1700 | Train Loss 0.19500411856825786 | Test Loss 0.19598310917419287\n", + "Epoch 1720 | Train Loss 0.19500411842282128 | Test Loss 0.1959831325221266\n", + "Epoch 1740 | Train Loss 0.19500411830108505 | Test Loss 0.19598315391908486\n", + "Epoch 1760 | Train Loss 0.19500411819918667 | Test Loss 0.19598317352567718\n", + "Epoch 1780 | Train Loss 0.19500411811389318 | Test Loss 0.19598319148966561\n", + "Epoch 1800 | Train Loss 0.19500411804249856 | Test Loss 0.19598320794693336\n", + "Epoch 1820 | Train Loss 0.19500411798273784 | Test Loss 0.1959832230223899\n", + "Epoch 1840 | Train Loss 0.19500411793271513 | Test Loss 0.19598323683081562\n", + "Epoch 1860 | Train Loss 0.19500411789084354 | Test Loss 0.19598324947764745\n", + "Epoch 1880 | Train Loss 0.19500411785579488 | Test Loss 0.19598326105971003\n", + "Epoch 1900 | Train Loss 0.19500411782645727 | Test Loss 0.19598327166589385\n", + "Epoch 1920 | Train Loss 0.19500411780190013 | Test Loss 0.1959832813777842\n", + "Epoch 1940 | Train Loss 0.1950041177813444 | Test Loss 0.1959832902702435\n", + "Epoch 1960 | Train Loss 0.1950041177641382 | Test Loss 0.1959832984119505\n", + "Epoch 1980 | Train Loss 0.19500411774973558 | Test Loss 0.19598330586589785\n", + "Epoch 2000 | Train Loss 0.1950041177376798 | Test Loss 0.1959833126898522\n", + "Epoch 2020 | Train Loss 0.19500411772758847 | Test Loss 0.1959833189367787\n", + "Epoch 2040 | Train Loss 0.19500411771914136 | Test Loss 0.19598332465523224\n", + "Epoch 2060 | Train Loss 0.1950041177120707 | Test Loss 0.19598332988971823\n", + "Epoch 2080 | Train Loss 0.1950041177061521 | Test Loss 0.1959833346810246\n", + "Epoch 2100 | Train Loss 0.195004117701198 | Test Loss 0.19598333906652787\n", + "Epoch 2120 | Train Loss 0.19500411769705106 | Test Loss 0.19598334308047438\n", + "Epoch 2140 | Train Loss 0.1950041176935798 | Test Loss 0.19598334675423898\n", + "Epoch 2160 | Train Loss 0.19500411769067422 | Test Loss 0.1959833501165632\n", + "Epoch 2180 | Train Loss 0.19500411768824202 | Test Loss 0.1959833531937735\n", + "Epoch 2200 | Train Loss 0.19500411768620615 | Test Loss 0.19598335600998243\n", + "Epoch 2220 | Train Loss 0.19500411768450196 | Test Loss 0.1959833585872731\n", + "Epoch 2240 | Train Loss 0.19500411768307552 | Test Loss 0.19598336094586813\n", + "Epoch 2260 | Train Loss 0.19500411768188147 | Test Loss 0.19598336310428585\n", + "Epoch 2280 | Train Loss 0.19500411768088202 | Test Loss 0.195983365079482\n", + "Epoch 2300 | Train Loss 0.19500411768004536 | Test Loss 0.19598336688698145\n", + "Epoch 2320 | Train Loss 0.19500411767934506 | Test Loss 0.1959833685409978\n", + "Epoch 2340 | Train Loss 0.19500411767875886 | Test Loss 0.19598337005454344\n", + "Epoch 2360 | Train Loss 0.1950041176782682 | Test Loss 0.1959833714395307\n", + "Epoch 2380 | Train Loss 0.1950041176778575 | Test Loss 0.1959833727068645\n", + "Epoch 2400 | Train Loss 0.19500411767751366 | Test Loss 0.19598337386652676\n", + "Epoch 2420 | Train Loss 0.19500411767722584 | Test Loss 0.1959833749276544\n", + "Epoch 2440 | Train Loss 0.19500411767698494 | Test Loss 0.1959833758986108\n", + "Epoch 2460 | Train Loss 0.19500411767678333 | Test Loss 0.19598337678705066\n", + "Epoch 2480 | Train Loss 0.19500411767661452 | Test Loss 0.19598337759998\n", + "Epoch 2500 | Train Loss 0.19500411767647324 | Test Loss 0.19598337834381113\n", + "Epoch 2520 | Train Loss 0.195004117676355 | Test Loss 0.19598337902441254\n", + "Epoch 2540 | Train Loss 0.195004117676256 | Test Loss 0.1959833796471551\n", + "Epoch 2560 | Train Loss 0.19500411767617312 | Test Loss 0.19598338021695402\n", + "Epoch 2580 | Train Loss 0.1950041176761038 | Test Loss 0.19598338073830746\n", + "Epoch 2600 | Train Loss 0.19500411767604572 | Test Loss 0.19598338121533196\n", + "Epoch 2620 | Train Loss 0.19500411767599712 | Test Loss 0.19598338165179446\n", + "Epoch 2640 | Train Loss 0.1950041176759564 | Test Loss 0.19598338205114224\n", + "Epoch 2660 | Train Loss 0.19500411767592238 | Test Loss 0.19598338241652982\n", + "Epoch 2680 | Train Loss 0.19500411767589387 | Test Loss 0.19598338275084387\n", + "Epoch 2700 | Train Loss 0.19500411767587003 | Test Loss 0.1959833830567258\n", + "Epoch 2720 | Train Loss 0.19500411767585005 | Test Loss 0.19598338333659285\n", + "Epoch 2740 | Train Loss 0.1950041176758333 | Test Loss 0.19598338359265657\n", + "Epoch 2760 | Train Loss 0.19500411767581935 | Test Loss 0.19598338382694094\n", + "Epoch 2780 | Train Loss 0.19500411767580764 | Test Loss 0.19598338404129775\n", + "Epoch 2800 | Train Loss 0.19500411767579784 | Test Loss 0.19598338423742154\n", + "Epoch 2820 | Train Loss 0.19500411767578962 | Test Loss 0.19598338441686272\n", + "Epoch 2840 | Train Loss 0.19500411767578277 | Test Loss 0.19598338458104\n", + "Epoch 2860 | Train Loss 0.195004117675777 | Test Loss 0.1959833847312515\n", + "Epoch 2880 | Train Loss 0.1950041176757722 | Test Loss 0.19598338486868488\n", + "Epoch 2900 | Train Loss 0.19500411767576814 | Test Loss 0.19598338499442705\n", + "Epoch 2920 | Train Loss 0.19500411767576475 | Test Loss 0.19598338510947227\n", + "Epoch 2940 | Train Loss 0.19500411767576192 | Test Loss 0.19598338521473044\n", + "Epoch 2960 | Train Loss 0.1950041176757596 | Test Loss 0.19598338531103396\n", + "Epoch 2980 | Train Loss 0.19500411767575762 | Test Loss 0.19598338539914462\n", + "Epoch 3000 | Train Loss 0.19500411767575593 | Test Loss 0.1959833854797592\n", + "Epoch 3020 | Train Loss 0.19500411767575457 | Test Loss 0.1959833855535154\n", + "Epoch 3040 | Train Loss 0.1950041176757534 | Test Loss 0.1959833856209966\n", + "Epoch 3060 | Train Loss 0.19500411767575246 | Test Loss 0.19598338568273665\n", + "Epoch 3080 | Train Loss 0.19500411767575163 | Test Loss 0.19598338573922397\n", + "Epoch 3100 | Train Loss 0.19500411767575093 | Test Loss 0.19598338579090527\n", + "Epoch 3120 | Train Loss 0.19500411767575035 | Test Loss 0.19598338583818956\n", + "Epoch 3140 | Train Loss 0.19500411767574988 | Test Loss 0.19598338588145087\n", + "Epoch 3160 | Train Loss 0.1950041176757495 | Test Loss 0.1959833859210314\n", + "Epoch 3180 | Train Loss 0.19500411767574916 | Test Loss 0.19598338595724435\n", + "Epoch 3200 | Train Loss 0.1950041176757489 | Test Loss 0.19598338599037615\n", + "Epoch 3220 | Train Loss 0.19500411767574863 | Test Loss 0.19598338602068902\n", + "Epoch 3240 | Train Loss 0.19500411767574843 | Test Loss 0.19598338604842275\n", + "Epoch 3260 | Train Loss 0.19500411767574832 | Test Loss 0.19598338607379676\n", + "Epoch 3280 | Train Loss 0.19500411767574816 | Test Loss 0.19598338609701185\n", + "Epoch 3300 | Train Loss 0.19500411767574805 | Test Loss 0.19598338611825167\n", + "Epoch 3320 | Train Loss 0.19500411767574796 | Test Loss 0.19598338613768423\n", + "Epoch 3340 | Train Loss 0.19500411767574785 | Test Loss 0.19598338615546346\n", + "Epoch 3360 | Train Loss 0.1950041176757478 | Test Loss 0.1959833861717299\n", + "Epoch 3380 | Train Loss 0.1950041176757477 | Test Loss 0.19598338618661224\n", + "Epoch 3400 | Train Loss 0.19500411767574768 | Test Loss 0.1959833862002283\n", + "Epoch 3420 | Train Loss 0.19500411767574766 | Test Loss 0.19598338621268585\n", + "Epoch 3440 | Train Loss 0.19500411767574763 | Test Loss 0.19598338622408337\n", + "Epoch 3460 | Train Loss 0.1950041176757476 | Test Loss 0.1959833862345112\n", + "Epoch 3480 | Train Loss 0.1950041176757476 | Test Loss 0.19598338624405162\n", + "Epoch 3500 | Train Loss 0.19500411767574752 | Test Loss 0.19598338625278036\n", + "Epoch 3520 | Train Loss 0.19500411767574755 | Test Loss 0.19598338626076636\n", + "Epoch 3540 | Train Loss 0.19500411767574752 | Test Loss 0.19598338626807282\n", + "Epoch 3560 | Train Loss 0.19500411767574752 | Test Loss 0.19598338627475767\n", + "Epoch 3580 | Train Loss 0.1950041176757475 | Test Loss 0.19598338628087364\n", + "Epoch 3600 | Train Loss 0.1950041176757475 | Test Loss 0.19598338628646922\n", + "Epoch 3620 | Train Loss 0.19500411767574746 | Test Loss 0.19598338629158868\n", + "Epoch 3640 | Train Loss 0.19500411767574746 | Test Loss 0.19598338629627257\n", + "Epoch 3660 | Train Loss 0.19500411767574746 | Test Loss 0.19598338630055787\n", + "Epoch 3680 | Train Loss 0.1950041176757475 | Test Loss 0.1959833863044786\n", + "Epoch 3700 | Train Loss 0.19500411767574746 | Test Loss 0.19598338630806564\n", + "Epoch 3720 | Train Loss 0.19500411767574746 | Test Loss 0.19598338631134748\n", + "Epoch 3740 | Train Loss 0.1950041176757475 | Test Loss 0.1959833863143501\n", + "Epoch 3760 | Train Loss 0.19500411767574746 | Test Loss 0.19598338631709722\n", + "Epoch 3780 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863196106\n", + "Epoch 3800 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863219101\n", + "Epoch 3820 | Train Loss 0.19500411767574746 | Test Loss 0.19598338632401394\n", + "Epoch 3840 | Train Loss 0.19500411767574746 | Test Loss 0.19598338632593876\n", + "Epoch 3860 | Train Loss 0.19500411767574746 | Test Loss 0.19598338632769982\n", + "Epoch 3880 | Train Loss 0.19500411767574743 | Test Loss 0.19598338632931098\n", + "Epoch 3900 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863307851\n", + "Epoch 3920 | Train Loss 0.19500411767574746 | Test Loss 0.19598338633213377\n", + "Epoch 3940 | Train Loss 0.19500411767574746 | Test Loss 0.19598338633336768\n", + "Epoch 3960 | Train Loss 0.19500411767574743 | Test Loss 0.19598338633449658\n", + "Epoch 3980 | Train Loss 0.19500411767574746 | Test Loss 0.19598338633552945\n", + "Epoch 4000 | Train Loss 0.19500411767574746 | Test Loss 0.19598338633647439\n", + "Epoch 4020 | Train Loss 0.19500411767574746 | Test Loss 0.19598338633733897\n", + "Epoch 4040 | Train Loss 0.19500411767574746 | Test Loss 0.19598338633812995\n", + "Epoch 4060 | Train Loss 0.19500411767574746 | Test Loss 0.19598338633885368\n", + "Epoch 4080 | Train Loss 0.19500411767574746 | Test Loss 0.19598338633951579\n", + "Epoch 4100 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634012158\n", + "Epoch 4120 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863406758\n", + "Epoch 4140 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634118284\n", + "Epoch 4160 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863416468\n", + "Epoch 4180 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634207124\n", + "Epoch 4200 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634245957\n", + "Epoch 4220 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863428149\n", + "Epoch 4240 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634313991\n", + "Epoch 4260 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634343732\n", + "Epoch 4280 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863437094\n", + "Epoch 4300 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634395834\n", + "Epoch 4320 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863441861\n", + "Epoch 4340 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863443945\n", + "Epoch 4360 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634458515\n", + "Epoch 4380 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634475954\n", + "Epoch 4400 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634491913\n", + "Epoch 4420 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634506512\n", + "Epoch 4440 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863451987\n", + "Epoch 4460 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634532095\n", + "Epoch 4480 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634543272\n", + "Epoch 4500 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634553503\n", + "Epoch 4520 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634562865\n", + "Epoch 4540 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634571427\n", + "Epoch 4560 | Train Loss 0.19500411767574743 | Test Loss 0.1959833863457926\n", + "Epoch 4580 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634586435\n", + "Epoch 4600 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863459299\n", + "Epoch 4620 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634598989\n", + "Epoch 4640 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634604479\n", + "Epoch 4660 | Train Loss 0.19500411767574746 | Test Loss 0.195983386346095\n", + "Epoch 4680 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634614093\n", + "Epoch 4700 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634618298\n", + "Epoch 4720 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634622145\n", + "Epoch 4740 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634625664\n", + "Epoch 4760 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863462888\n", + "Epoch 4780 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863463183\n", + "Epoch 4800 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634634527\n", + "Epoch 4820 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863463699\n", + "Epoch 4840 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634639245\n", + "Epoch 4860 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634641307\n", + "Epoch 4880 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634643198\n", + "Epoch 4900 | Train Loss 0.19500411767574743 | Test Loss 0.19598338634644927\n", + "Epoch 4920 | Train Loss 0.19500411767574743 | Test Loss 0.19598338634646506\n", + "Epoch 4940 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863464795\n", + "Epoch 4960 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634649273\n", + "Epoch 4980 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634650483\n", + "Epoch 5000 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634651596\n", + "Epoch 5020 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634652604\n", + "Epoch 5040 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863465353\n", + "Epoch 5060 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634654378\n", + "Epoch 5080 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863465516\n", + "Epoch 5100 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634655868\n", + "Epoch 5120 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634656515\n", + "Epoch 5140 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634657111\n", + "Epoch 5160 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634657653\n", + "Epoch 5180 | Train Loss 0.19500411767574743 | Test Loss 0.1959833863465815\n", + "Epoch 5200 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634658605\n", + "Epoch 5220 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634659024\n", + "Epoch 5240 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634659407\n", + "Epoch 5260 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634659754\n", + "Epoch 5280 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634660073\n", + "Epoch 5300 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634660364\n", + "Epoch 5320 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466063\n", + "Epoch 5340 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634660875\n", + "Epoch 5360 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634661094\n", + "Epoch 5380 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634661303\n", + "Epoch 5400 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634661489\n", + "Epoch 5420 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634661658\n", + "Epoch 5440 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634661816\n", + "Epoch 5460 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466196\n", + "Epoch 5480 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634662088\n", + "Epoch 5500 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466221\n", + "Epoch 5520 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634662318\n", + "Epoch 5540 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634662418\n", + "Epoch 5560 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466251\n", + "Epoch 5580 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634662596\n", + "Epoch 5600 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634662674\n", + "Epoch 5620 | Train Loss 0.19500411767574743 | Test Loss 0.19598338634662743\n", + "Epoch 5640 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466281\n", + "Epoch 5660 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634662868\n", + "Epoch 5680 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466292\n", + "Epoch 5700 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466297\n", + "Epoch 5720 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663018\n", + "Epoch 5740 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663057\n", + "Epoch 5760 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663093\n", + "Epoch 5780 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663132\n", + "Epoch 5800 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466316\n", + "Epoch 5820 | Train Loss 0.19500411767574743 | Test Loss 0.1959833863466319\n", + "Epoch 5840 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663218\n", + "Epoch 5860 | Train Loss 0.19500411767574743 | Test Loss 0.1959833863466324\n", + "Epoch 5880 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663262\n", + "Epoch 5900 | Train Loss 0.19500411767574743 | Test Loss 0.19598338634663284\n", + "Epoch 5920 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663304\n", + "Epoch 5940 | Train Loss 0.19500411767574743 | Test Loss 0.19598338634663318\n", + "Epoch 5960 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663331\n", + "Epoch 5980 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663348\n", + "Epoch 6000 | Train Loss 0.19500411767574743 | Test Loss 0.19598338634663356\n", + "Epoch 6020 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663373\n", + "Epoch 6040 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663384\n", + "Epoch 6060 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663395\n", + "Epoch 6080 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663404\n", + "Epoch 6100 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663412\n", + "Epoch 6120 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663418\n", + "Epoch 6140 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663423\n", + "Epoch 6160 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663431\n", + "Epoch 6180 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663437\n", + "Epoch 6200 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663445\n", + "Epoch 6220 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663448\n", + "Epoch 6240 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466345\n", + "Epoch 6260 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466346\n", + "Epoch 6280 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663462\n", + "Epoch 6300 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663465\n", + "Epoch 6320 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663465\n", + "Epoch 6340 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466347\n", + "Epoch 6360 | Train Loss 0.19500411767574743 | Test Loss 0.1959833863466347\n", + "Epoch 6380 | Train Loss 0.19500411767574743 | Test Loss 0.1959833863466347\n", + "Epoch 6400 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663479\n", + "Epoch 6420 | Train Loss 0.19500411767574743 | Test Loss 0.19598338634663479\n", + "Epoch 6440 | Train Loss 0.19500411767574746 | Test Loss 0.1959833863466348\n", + "Epoch 6460 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663484\n", + "Epoch 6480 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663484\n", + "Epoch 6500 | Train Loss 0.19500411767574743 | Test Loss 0.19598338634663484\n", + "Epoch 6520 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663487\n", + "Epoch 6540 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663487\n", + "Epoch 6560 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663487\n", + "Epoch 6580 | Train Loss 0.19500411767574743 | Test Loss 0.19598338634663487\n", + "Epoch 6600 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663492\n", + "Epoch 6620 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663492\n", + "Epoch 6640 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663492\n", + "Epoch 6660 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663495\n", + "Epoch 6680 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663492\n", + "Epoch 6700 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663495\n", + "Epoch 6720 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663495\n", + "Epoch 6740 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663495\n", + "Epoch 6760 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 6780 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 6800 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 6820 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 6840 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 6860 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 6880 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 6900 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 6920 | Train Loss 0.19500411767574743 | Test Loss 0.19598338634663498\n", + "Epoch 6940 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 6960 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 6980 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7000 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7020 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7040 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7060 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7080 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7100 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7120 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7140 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7160 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7180 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7200 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7220 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7240 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7260 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7280 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7300 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7320 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7340 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7360 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7380 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7400 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7420 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7440 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7460 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7480 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7500 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7520 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7540 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7560 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7580 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7600 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7620 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7640 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7660 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7680 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7700 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7720 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7740 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7760 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7780 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7800 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7820 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7840 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7860 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7880 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7900 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7920 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7940 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7960 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 7980 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8000 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8020 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8040 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8060 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8080 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8100 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8120 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8140 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8160 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8180 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8200 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8220 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8240 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8260 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8280 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8300 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8320 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8340 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8360 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8380 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8400 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8420 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8440 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8460 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8480 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8500 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8520 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8540 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8560 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8580 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8600 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8620 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8640 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8660 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8680 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8700 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8720 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8740 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8760 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8780 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8800 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8820 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8840 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8860 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8880 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8900 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8920 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8940 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8960 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 8980 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9000 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9020 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9040 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9060 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9080 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9100 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9120 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9140 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9160 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9180 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9200 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9220 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9240 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9260 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9280 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9300 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9320 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9340 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9360 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9380 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9400 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9420 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9440 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9460 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9480 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9500 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9520 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9540 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9560 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9580 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9600 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9620 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9640 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9660 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9680 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9700 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9720 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9740 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9760 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9780 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9800 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9820 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9840 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9860 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9880 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9900 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9920 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9940 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9960 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n", + "Epoch 9980 | Train Loss 0.19500411767574746 | Test Loss 0.19598338634663498\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "num_epochs = 10000\n", + "train_loss_history = []\n", + "test_loss_history = []\n", + "w_history = []\n", + "dim = x_train.shape[0]\n", + "num_train = x_train.shape[1]\n", + "num_test = x_test.shape[1]\n", + "\n", + "\n", + "model = LinearRegression(dim = dim, lr = 0.1)\n", + "for i in range(num_epochs):\n", + " y_hat = model.forward(x_train)\n", + " train_loss = 1/(2 * num_train) * ((y_train - y_hat) ** 2).sum()\n", + "\n", + " w_history.append(model.w)\n", + " model.backward(x_train,y_hat,y_train)\n", + " model.optimize()\n", + "\n", + " y_hat = model.forward(x_test)\n", + " test_loss = 1/(2 * num_test) * ((y_test - y_hat) ** 2).sum()\n", + "\n", + " train_loss_history.append(train_loss)\n", + " test_loss_history.append(test_loss)\n", + "\n", + " if i % 20 == 0:\n", + " print(f\"Epoch {i} | Train Loss {train_loss} | Test Loss {test_loss}\")\n", + "\n", + "plt.plot(range(num_epochs), train_loss_history, label = \"Training\")\n", + "plt.plot(range(num_epochs), test_loss_history, label = \"Test\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ldFMBPuvr0l0" + }, + "source": [ + "# Results\n", + "\n", + "Before viewing the results, we need to reverse the transformations applied on the output variable y.\n", + "\n", + "The `inverse_transform` method of the StandardScaler object will help us." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ZycI4aExMsoC", + "outputId": "47c6b8fa-d1ee-4ff6-90d0-7e9289cfe40e" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Set Error 0.5263803029005855\n" + ] + } + ], + "source": [ + "from sklearn.metrics import mean_squared_error\n", + "y_test = output_scalar.inverse_transform(y_test[np.newaxis,:])\n", + "y_hat = output_scalar.inverse_transform(y_hat[np.newaxis,:])\n", + "error = (((y_test - y_hat) ** 2).sum() / num_test )\n", + "print(\"Test Set Error\", error)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UoYobRS9uBIv" + }, + "source": [ + "# Libraries\n", + "\n", + "Instead of coding everything from scratch, i.e the model, loss functions, and gradient calculations, there are many libaries that have implemented many machine learning algorithms for us.\n", + "\n", + "These libraries will generally be faster and more optimized. We can use the LinearRegression and SGD regressor module from scikit learn to compare our model" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "txWBY_0eoNN_", + "outputId": "7886b7e0-c383-4676-e825-3d7197d06d80" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Set Error 0.5892243304217802\n" + ] + } + ], + "source": [ + "from sklearn.linear_model import SGDRegressor\n", + "\n", + "\n", + "x_train, x_test, y_train, y_test = dataset_copy\n", + "sgd = SGDRegressor()\n", + "sgd.fit(x_train.T, y_train)\n", + "y_hat = sgd.predict(x_test.T)\n", + "y_test = output_scalar.inverse_transform(y_test[np.newaxis,:])\n", + "y_hat = output_scalar.inverse_transform(y_hat[np.newaxis,:])\n", + "error = mean_squared_error(y_test, y_hat, squared = True)\n", + "print(\"Test Set Error\", error)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9CaqphG8TG7V", + "outputId": "579bc8dd-6093-4155-8966-f8c60c34b1b3" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Set Error 0.5263803029005857\n" + ] + } + ], + "source": [ + "from sklearn.linear_model import LinearRegression as LR\n", + "\n", + "x_train, x_test, y_train, y_test = dataset_copy\n", + "lr = LR()\n", + "lr.fit(x_train.T, y_train)\n", + "y_hat = lr.predict(x_test.T)\n", + "y_test = output_scalar.inverse_transform(y_test[np.newaxis,:])\n", + "y_hat = output_scalar.inverse_transform(y_hat[np.newaxis,:])\n", + "error = mean_squared_error(y_test, y_hat, squared = True)\n", + "print(\"Test Set Error\", error)" + ] + } + ], + "metadata": { + "colab": { + "authorship_tag": "ABX9TyODV4REEuhLrJ1l8OWl6dFt", + "collapsed_sections": [], + "include_colab_link": true, + "name": "Linear Regression.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/logistic_regression.ipynb b/notebooks/logistic_regression.ipynb new file mode 100644 index 0000000..fd525dc --- /dev/null +++ b/notebooks/logistic_regression.ipynb @@ -0,0 +1,970 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Implementing A Logistic Regression Model from Scratch with PyTorch\n", + "\n", + "![alt text](https://drive.google.com/uc?export=view&id=11Bv3uhZtVgRVYVWDl9_ZAYQ0GU36LhM9)\n", + "\n", + "\n", + "In this tutorial, we are going to implement a logistic regression model from scratch with PyTorch. The model will be designed with neural networks in mind and will be used for a simple image classification task. I believe this is a great approach to begin understanding the fundamental building blocks behind a neural network. Additionally, we will also look at best practices on how to use PyTorch for training neural networks.\n", + "\n", + "After completing this tutorial the learner is expected to know the basic building blocks of a logistic regression model. The learner is also expected to apply the logistic regression model to a binary image classification problem of their choice using PyTorch code.\n", + "\n", + "---\n", + "\n", + "**Author:** Elvis Saravia ( [Twitter](https://twitter.com/omarsar0) | [LinkedIn](https://www.linkedin.com/in/omarsar/))\n", + "\n", + "**Complete Code Walkthrough:** [Blog post](https://medium.com/dair-ai/implementing-a-logistic-regression-model-from-scratch-with-pytorch-24ea062cd856?source=friends_link&sk=49dcddb17d1d021d2d677f3666c88463)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "## Import the usual libraries\n", + "import torch\n", + "import torchvision\n", + "import torch.nn as nn\n", + "from torchvision import datasets, models, transforms\n", + "import os\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "cuda:0\n" + ] + } + ], + "source": [ + "## configuration to detect cuda or cpu\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", + "print (device)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Importing Dataset\n", + "In this tutorial we will be working on an image classification problem. You can find the public dataset [here](https://download.pytorch.org/tutorial/hymenoptera_data.zip). \n", + "\n", + "The objective of our model is to learn to classify between \"bee\" vs. \"no bee\" images.\n", + "\n", + "Uncomment the code below to download and unzip the data." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2022-06-17 23:55:52-- https://download.pytorch.org/tutorial/hymenoptera_data.zip\n", + "Resolving download.pytorch.org (download.pytorch.org)... 18.67.65.73, 18.67.65.42, 18.67.65.118, ...\n", + "Connecting to download.pytorch.org (download.pytorch.org)|18.67.65.73|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 47286322 (45M) [application/zip]\n", + "Saving to: ‘hymenoptera_data.zip’\n", + "\n", + "hymenoptera_data.zi 100%[===================>] 45.10M 152MB/s in 0.3s \n", + "\n", + "2022-06-17 23:55:53 (152 MB/s) - ‘hymenoptera_data.zip’ saved [47286322/47286322]\n", + "\n", + "Archive: hymenoptera_data.zip\n", + " creating: hymenoptera_data/\n", + " creating: hymenoptera_data/train/\n", + " creating: hymenoptera_data/train/ants/\n", + " inflating: hymenoptera_data/train/ants/0013035.jpg \n", + " inflating: hymenoptera_data/train/ants/1030023514_aad5c608f9.jpg \n", + " inflating: hymenoptera_data/train/ants/1095476100_3906d8afde.jpg \n", + " inflating: hymenoptera_data/train/ants/1099452230_d1949d3250.jpg \n", + " inflating: hymenoptera_data/train/ants/116570827_e9c126745d.jpg \n", + " inflating: hymenoptera_data/train/ants/1225872729_6f0856588f.jpg \n", + " inflating: hymenoptera_data/train/ants/1262877379_64fcada201.jpg \n", + " inflating: hymenoptera_data/train/ants/1269756697_0bce92cdab.jpg \n", + " inflating: hymenoptera_data/train/ants/1286984635_5119e80de1.jpg \n", + " inflating: hymenoptera_data/train/ants/132478121_2a430adea2.jpg \n", + " inflating: hymenoptera_data/train/ants/1360291657_dc248c5eea.jpg \n", + " inflating: hymenoptera_data/train/ants/1368913450_e146e2fb6d.jpg \n", + " inflating: hymenoptera_data/train/ants/1473187633_63ccaacea6.jpg \n", + " inflating: hymenoptera_data/train/ants/148715752_302c84f5a4.jpg \n", + " inflating: hymenoptera_data/train/ants/1489674356_09d48dde0a.jpg \n", + " inflating: hymenoptera_data/train/ants/149244013_c529578289.jpg \n", + " inflating: hymenoptera_data/train/ants/150801003_3390b73135.jpg \n", + " inflating: hymenoptera_data/train/ants/150801171_cd86f17ed8.jpg \n", + " inflating: hymenoptera_data/train/ants/154124431_65460430f2.jpg \n", + " inflating: hymenoptera_data/train/ants/162603798_40b51f1654.jpg \n", + " inflating: hymenoptera_data/train/ants/1660097129_384bf54490.jpg \n", + " inflating: hymenoptera_data/train/ants/167890289_dd5ba923f3.jpg \n", + " inflating: hymenoptera_data/train/ants/1693954099_46d4c20605.jpg \n", + " inflating: hymenoptera_data/train/ants/175998972.jpg \n", + " inflating: hymenoptera_data/train/ants/178538489_bec7649292.jpg \n", + " inflating: hymenoptera_data/train/ants/1804095607_0341701e1c.jpg \n", + " inflating: hymenoptera_data/train/ants/1808777855_2a895621d7.jpg \n", + " inflating: hymenoptera_data/train/ants/188552436_605cc9b36b.jpg \n", + " inflating: hymenoptera_data/train/ants/1917341202_d00a7f9af5.jpg \n", + " inflating: hymenoptera_data/train/ants/1924473702_daa9aacdbe.jpg \n", + " inflating: hymenoptera_data/train/ants/196057951_63bf063b92.jpg \n", + " inflating: hymenoptera_data/train/ants/196757565_326437f5fe.jpg \n", + " inflating: hymenoptera_data/train/ants/201558278_fe4caecc76.jpg \n", + " inflating: hymenoptera_data/train/ants/201790779_527f4c0168.jpg \n", + " inflating: hymenoptera_data/train/ants/2019439677_2db655d361.jpg \n", + " inflating: hymenoptera_data/train/ants/207947948_3ab29d7207.jpg \n", + " inflating: hymenoptera_data/train/ants/20935278_9190345f6b.jpg \n", + " inflating: hymenoptera_data/train/ants/224655713_3956f7d39a.jpg \n", + " inflating: hymenoptera_data/train/ants/2265824718_2c96f485da.jpg \n", + " inflating: hymenoptera_data/train/ants/2265825502_fff99cfd2d.jpg \n", + " inflating: hymenoptera_data/train/ants/226951206_d6bf946504.jpg \n", + " inflating: hymenoptera_data/train/ants/2278278459_6b99605e50.jpg \n", + " inflating: hymenoptera_data/train/ants/2288450226_a6e96e8fdf.jpg \n", + " inflating: hymenoptera_data/train/ants/2288481644_83ff7e4572.jpg \n", + " inflating: hymenoptera_data/train/ants/2292213964_ca51ce4bef.jpg \n", + " inflating: hymenoptera_data/train/ants/24335309_c5ea483bb8.jpg \n", + " inflating: hymenoptera_data/train/ants/245647475_9523dfd13e.jpg \n", + " inflating: hymenoptera_data/train/ants/255434217_1b2b3fe0a4.jpg \n", + " inflating: hymenoptera_data/train/ants/258217966_d9d90d18d3.jpg \n", + " inflating: hymenoptera_data/train/ants/275429470_b2d7d9290b.jpg \n", + " inflating: hymenoptera_data/train/ants/28847243_e79fe052cd.jpg \n", + " inflating: hymenoptera_data/train/ants/318052216_84dff3f98a.jpg \n", + " inflating: hymenoptera_data/train/ants/334167043_cbd1adaeb9.jpg \n", + " inflating: hymenoptera_data/train/ants/339670531_94b75ae47a.jpg \n", + " inflating: hymenoptera_data/train/ants/342438950_a3da61deab.jpg \n", + " inflating: hymenoptera_data/train/ants/36439863_0bec9f554f.jpg \n", + " inflating: hymenoptera_data/train/ants/374435068_7eee412ec4.jpg \n", + " inflating: hymenoptera_data/train/ants/382971067_0bfd33afe0.jpg \n", + " inflating: hymenoptera_data/train/ants/384191229_5779cf591b.jpg \n", + " inflating: hymenoptera_data/train/ants/386190770_672743c9a7.jpg \n", + " inflating: hymenoptera_data/train/ants/392382602_1b7bed32fa.jpg \n", + " inflating: hymenoptera_data/train/ants/403746349_71384f5b58.jpg \n", + " inflating: hymenoptera_data/train/ants/408393566_b5b694119b.jpg \n", + " inflating: hymenoptera_data/train/ants/424119020_6d57481dab.jpg \n", + " inflating: hymenoptera_data/train/ants/424873399_47658a91fb.jpg \n", + " inflating: hymenoptera_data/train/ants/450057712_771b3bfc91.jpg \n", + " inflating: hymenoptera_data/train/ants/45472593_bfd624f8dc.jpg \n", + " inflating: hymenoptera_data/train/ants/459694881_ac657d3187.jpg \n", + " inflating: hymenoptera_data/train/ants/460372577_f2f6a8c9fc.jpg \n", + " inflating: hymenoptera_data/train/ants/460874319_0a45ab4d05.jpg \n", + " inflating: hymenoptera_data/train/ants/466430434_4000737de9.jpg \n", + " inflating: hymenoptera_data/train/ants/470127037_513711fd21.jpg \n", + " inflating: hymenoptera_data/train/ants/474806473_ca6caab245.jpg \n", + " inflating: hymenoptera_data/train/ants/475961153_b8c13fd405.jpg \n", + " inflating: hymenoptera_data/train/ants/484293231_e53cfc0c89.jpg \n", + " inflating: hymenoptera_data/train/ants/49375974_e28ba6f17e.jpg \n", + " inflating: hymenoptera_data/train/ants/506249802_207cd979b4.jpg \n", + " inflating: hymenoptera_data/train/ants/506249836_717b73f540.jpg \n", + " inflating: hymenoptera_data/train/ants/512164029_c0a66b8498.jpg \n", + " inflating: hymenoptera_data/train/ants/512863248_43c8ce579b.jpg \n", + " inflating: hymenoptera_data/train/ants/518773929_734dbc5ff4.jpg \n", + " inflating: hymenoptera_data/train/ants/522163566_fec115ca66.jpg \n", + " inflating: hymenoptera_data/train/ants/522415432_2218f34bf8.jpg \n", + " inflating: hymenoptera_data/train/ants/531979952_bde12b3bc0.jpg \n", + " inflating: hymenoptera_data/train/ants/533848102_70a85ad6dd.jpg \n", + " inflating: hymenoptera_data/train/ants/535522953_308353a07c.jpg \n", + " inflating: hymenoptera_data/train/ants/540889389_48bb588b21.jpg \n", + " inflating: hymenoptera_data/train/ants/541630764_dbd285d63c.jpg \n", + " inflating: hymenoptera_data/train/ants/543417860_b14237f569.jpg \n", + " inflating: hymenoptera_data/train/ants/560966032_988f4d7bc4.jpg \n", + " inflating: hymenoptera_data/train/ants/5650366_e22b7e1065.jpg \n", + " inflating: hymenoptera_data/train/ants/6240329_72c01e663e.jpg \n", + " inflating: hymenoptera_data/train/ants/6240338_93729615ec.jpg \n", + " inflating: hymenoptera_data/train/ants/649026570_e58656104b.jpg \n", + " inflating: hymenoptera_data/train/ants/662541407_ff8db781e7.jpg \n", + " inflating: hymenoptera_data/train/ants/67270775_e9fdf77e9d.jpg \n", + " inflating: hymenoptera_data/train/ants/6743948_2b8c096dda.jpg \n", + " inflating: hymenoptera_data/train/ants/684133190_35b62c0c1d.jpg \n", + " inflating: hymenoptera_data/train/ants/69639610_95e0de17aa.jpg \n", + " inflating: hymenoptera_data/train/ants/707895295_009cf23188.jpg \n", + " inflating: hymenoptera_data/train/ants/7759525_1363d24e88.jpg \n", + " inflating: hymenoptera_data/train/ants/795000156_a9900a4a71.jpg \n", + " inflating: hymenoptera_data/train/ants/822537660_caf4ba5514.jpg \n", + " inflating: hymenoptera_data/train/ants/82852639_52b7f7f5e3.jpg \n", + " inflating: hymenoptera_data/train/ants/841049277_b28e58ad05.jpg \n", + " inflating: hymenoptera_data/train/ants/886401651_f878e888cd.jpg \n", + " inflating: hymenoptera_data/train/ants/892108839_f1aad4ca46.jpg \n", + " inflating: hymenoptera_data/train/ants/938946700_ca1c669085.jpg \n", + " inflating: hymenoptera_data/train/ants/957233405_25c1d1187b.jpg \n", + " inflating: hymenoptera_data/train/ants/9715481_b3cb4114ff.jpg \n", + " inflating: hymenoptera_data/train/ants/998118368_6ac1d91f81.jpg \n", + " inflating: hymenoptera_data/train/ants/ant photos.jpg \n", + " inflating: hymenoptera_data/train/ants/Ant_1.jpg \n", + " inflating: hymenoptera_data/train/ants/army-ants-red-picture.jpg \n", + " inflating: hymenoptera_data/train/ants/formica.jpeg \n", + " inflating: hymenoptera_data/train/ants/hormiga_co_por.jpg \n", + " inflating: hymenoptera_data/train/ants/imageNotFound.gif \n", + " inflating: hymenoptera_data/train/ants/kurokusa.jpg \n", + " inflating: hymenoptera_data/train/ants/MehdiabadiAnt2_600.jpg \n", + " inflating: hymenoptera_data/train/ants/Nepenthes_rafflesiana_ant.jpg \n", + " inflating: hymenoptera_data/train/ants/swiss-army-ant.jpg \n", + " inflating: hymenoptera_data/train/ants/termite-vs-ant.jpg \n", + " inflating: hymenoptera_data/train/ants/trap-jaw-ant-insect-bg.jpg \n", + " inflating: hymenoptera_data/train/ants/VietnameseAntMimicSpider.jpg \n", + " creating: hymenoptera_data/train/bees/\n", + " inflating: hymenoptera_data/train/bees/1092977343_cb42b38d62.jpg \n", + " inflating: hymenoptera_data/train/bees/1093831624_fb5fbe2308.jpg \n", + " inflating: hymenoptera_data/train/bees/1097045929_1753d1c765.jpg \n", + " inflating: hymenoptera_data/train/bees/1232245714_f862fbe385.jpg \n", + " inflating: hymenoptera_data/train/bees/129236073_0985e91c7d.jpg \n", + " inflating: hymenoptera_data/train/bees/1295655112_7813f37d21.jpg \n", + " inflating: hymenoptera_data/train/bees/132511197_0b86ad0fff.jpg \n", + " inflating: hymenoptera_data/train/bees/132826773_dbbcb117b9.jpg \n", + " inflating: hymenoptera_data/train/bees/150013791_969d9a968b.jpg \n", + " inflating: hymenoptera_data/train/bees/1508176360_2972117c9d.jpg \n", + " inflating: hymenoptera_data/train/bees/154600396_53e1252e52.jpg \n", + " inflating: hymenoptera_data/train/bees/16838648_415acd9e3f.jpg \n", + " inflating: hymenoptera_data/train/bees/1691282715_0addfdf5e8.jpg \n", + " inflating: hymenoptera_data/train/bees/17209602_fe5a5a746f.jpg \n", + " inflating: hymenoptera_data/train/bees/174142798_e5ad6d76e0.jpg \n", + " inflating: hymenoptera_data/train/bees/1799726602_8580867f71.jpg \n", + " inflating: hymenoptera_data/train/bees/1807583459_4fe92b3133.jpg \n", + " inflating: hymenoptera_data/train/bees/196430254_46bd129ae7.jpg \n", + " inflating: hymenoptera_data/train/bees/196658222_3fffd79c67.jpg \n", + " inflating: hymenoptera_data/train/bees/198508668_97d818b6c4.jpg \n", + " inflating: hymenoptera_data/train/bees/2031225713_50ed499635.jpg \n", + " inflating: hymenoptera_data/train/bees/2037437624_2d7bce461f.jpg \n", + " inflating: hymenoptera_data/train/bees/2053200300_8911ef438a.jpg \n", + " inflating: hymenoptera_data/train/bees/205835650_e6f2614bee.jpg \n", + " inflating: hymenoptera_data/train/bees/208702903_42fb4d9748.jpg \n", + " inflating: hymenoptera_data/train/bees/21399619_3e61e5bb6f.jpg \n", + " inflating: hymenoptera_data/train/bees/2227611847_ec72d40403.jpg \n", + " inflating: hymenoptera_data/train/bees/2321139806_d73d899e66.jpg \n", + " inflating: hymenoptera_data/train/bees/2330918208_8074770c20.jpg \n", + " inflating: hymenoptera_data/train/bees/2345177635_caf07159b3.jpg \n", + " inflating: hymenoptera_data/train/bees/2358061370_9daabbd9ac.jpg \n", + " inflating: hymenoptera_data/train/bees/2364597044_3c3e3fc391.jpg \n", + " inflating: hymenoptera_data/train/bees/2384149906_2cd8b0b699.jpg \n", + " inflating: hymenoptera_data/train/bees/2397446847_04ef3cd3e1.jpg \n", + " inflating: hymenoptera_data/train/bees/2405441001_b06c36fa72.jpg \n", + " inflating: hymenoptera_data/train/bees/2445215254_51698ff797.jpg \n", + " inflating: hymenoptera_data/train/bees/2452236943_255bfd9e58.jpg \n", + " inflating: hymenoptera_data/train/bees/2467959963_a7831e9ff0.jpg \n", + " inflating: hymenoptera_data/train/bees/2470492904_837e97800d.jpg \n", + " inflating: hymenoptera_data/train/bees/2477324698_3d4b1b1cab.jpg \n", + " inflating: hymenoptera_data/train/bees/2477349551_e75c97cf4d.jpg \n", + " inflating: hymenoptera_data/train/bees/2486729079_62df0920be.jpg \n", + " inflating: hymenoptera_data/train/bees/2486746709_c43cec0e42.jpg \n", + " inflating: hymenoptera_data/train/bees/2493379287_4100e1dacc.jpg \n", + " inflating: hymenoptera_data/train/bees/2495722465_879acf9d85.jpg \n", + " inflating: hymenoptera_data/train/bees/2528444139_fa728b0f5b.jpg \n", + " inflating: hymenoptera_data/train/bees/2538361678_9da84b77e3.jpg \n", + " inflating: hymenoptera_data/train/bees/2551813042_8a070aeb2b.jpg \n", + " inflating: hymenoptera_data/train/bees/2580598377_a4caecdb54.jpg \n", + " inflating: hymenoptera_data/train/bees/2601176055_8464e6aa71.jpg \n", + " inflating: hymenoptera_data/train/bees/2610833167_79bf0bcae5.jpg \n", + " inflating: hymenoptera_data/train/bees/2610838525_fe8e3cae47.jpg \n", + " inflating: hymenoptera_data/train/bees/2617161745_fa3ebe85b4.jpg \n", + " inflating: hymenoptera_data/train/bees/2625499656_e3415e374d.jpg \n", + " inflating: hymenoptera_data/train/bees/2634617358_f32fd16bea.jpg \n", + " inflating: hymenoptera_data/train/bees/2638074627_6b3ae746a0.jpg \n", + " inflating: hymenoptera_data/train/bees/2645107662_b73a8595cc.jpg \n", + " inflating: hymenoptera_data/train/bees/2651621464_a2fa8722eb.jpg \n", + " inflating: hymenoptera_data/train/bees/2652877533_a564830cbf.jpg \n", + " inflating: hymenoptera_data/train/bees/266644509_d30bb16a1b.jpg \n", + " inflating: hymenoptera_data/train/bees/2683605182_9d2a0c66cf.jpg \n", + " inflating: hymenoptera_data/train/bees/2704348794_eb5d5178c2.jpg \n", + " inflating: hymenoptera_data/train/bees/2707440199_cd170bd512.jpg \n", + " inflating: hymenoptera_data/train/bees/2710368626_cb42882dc8.jpg \n", + " inflating: hymenoptera_data/train/bees/2722592222_258d473e17.jpg \n", + " inflating: hymenoptera_data/train/bees/2728759455_ce9bb8cd7a.jpg \n", + " inflating: hymenoptera_data/train/bees/2756397428_1d82a08807.jpg \n", + " inflating: hymenoptera_data/train/bees/2765347790_da6cf6cb40.jpg \n", + " inflating: hymenoptera_data/train/bees/2781170484_5d61835d63.jpg \n", + " inflating: hymenoptera_data/train/bees/279113587_b4843db199.jpg \n", + " inflating: hymenoptera_data/train/bees/2792000093_e8ae0718cf.jpg \n", + " inflating: hymenoptera_data/train/bees/2801728106_833798c909.jpg \n", + " inflating: hymenoptera_data/train/bees/2822388965_f6dca2a275.jpg \n", + " inflating: hymenoptera_data/train/bees/2861002136_52c7c6f708.jpg \n", + " inflating: hymenoptera_data/train/bees/2908916142_a7ac8b57a8.jpg \n", + " inflating: hymenoptera_data/train/bees/29494643_e3410f0d37.jpg \n", + " inflating: hymenoptera_data/train/bees/2959730355_416a18c63c.jpg \n", + " inflating: hymenoptera_data/train/bees/2962405283_22718d9617.jpg \n", + " inflating: hymenoptera_data/train/bees/3006264892_30e9cced70.jpg \n", + " inflating: hymenoptera_data/train/bees/3030189811_01d095b793.jpg \n", + " inflating: hymenoptera_data/train/bees/3030772428_8578335616.jpg \n", + " inflating: hymenoptera_data/train/bees/3044402684_3853071a87.jpg \n", + " inflating: hymenoptera_data/train/bees/3074585407_9854eb3153.jpg \n", + " inflating: hymenoptera_data/train/bees/3079610310_ac2d0ae7bc.jpg \n", + " inflating: hymenoptera_data/train/bees/3090975720_71f12e6de4.jpg \n", + " inflating: hymenoptera_data/train/bees/3100226504_c0d4f1e3f1.jpg \n", + " inflating: hymenoptera_data/train/bees/342758693_c56b89b6b6.jpg \n", + " inflating: hymenoptera_data/train/bees/354167719_22dca13752.jpg \n", + " inflating: hymenoptera_data/train/bees/359928878_b3b418c728.jpg \n", + " inflating: hymenoptera_data/train/bees/365759866_b15700c59b.jpg \n", + " inflating: hymenoptera_data/train/bees/36900412_92b81831ad.jpg \n", + " inflating: hymenoptera_data/train/bees/39672681_1302d204d1.jpg \n", + " inflating: hymenoptera_data/train/bees/39747887_42df2855ee.jpg \n", + " inflating: hymenoptera_data/train/bees/421515404_e87569fd8b.jpg \n", + " inflating: hymenoptera_data/train/bees/444532809_9e931e2279.jpg \n", + " inflating: hymenoptera_data/train/bees/446296270_d9e8b93ecf.jpg \n", + " inflating: hymenoptera_data/train/bees/452462677_7be43af8ff.jpg \n", + " inflating: hymenoptera_data/train/bees/452462695_40a4e5b559.jpg \n", + " inflating: hymenoptera_data/train/bees/457457145_5f86eb7e9c.jpg \n", + " inflating: hymenoptera_data/train/bees/465133211_80e0c27f60.jpg \n", + " inflating: hymenoptera_data/train/bees/469333327_358ba8fe8a.jpg \n", + " inflating: hymenoptera_data/train/bees/472288710_2abee16fa0.jpg \n", + " inflating: hymenoptera_data/train/bees/473618094_8ffdcab215.jpg \n", + " inflating: hymenoptera_data/train/bees/476347960_52edd72b06.jpg \n", + " inflating: hymenoptera_data/train/bees/478701318_bbd5e557b8.jpg \n", + " inflating: hymenoptera_data/train/bees/507288830_f46e8d4cb2.jpg \n", + " inflating: hymenoptera_data/train/bees/509247772_2db2d01374.jpg \n", + " inflating: hymenoptera_data/train/bees/513545352_fd3e7c7c5d.jpg \n", + " inflating: hymenoptera_data/train/bees/522104315_5d3cb2758e.jpg \n", + " inflating: hymenoptera_data/train/bees/537309131_532bfa59ea.jpg \n", + " inflating: hymenoptera_data/train/bees/586041248_3032e277a9.jpg \n", + " inflating: hymenoptera_data/train/bees/760526046_547e8b381f.jpg \n", + " inflating: hymenoptera_data/train/bees/760568592_45a52c847f.jpg \n", + " inflating: hymenoptera_data/train/bees/774440991_63a4aa0cbe.jpg \n", + " inflating: hymenoptera_data/train/bees/85112639_6e860b0469.jpg \n", + " inflating: hymenoptera_data/train/bees/873076652_eb098dab2d.jpg \n", + " inflating: hymenoptera_data/train/bees/90179376_abc234e5f4.jpg \n", + " inflating: hymenoptera_data/train/bees/92663402_37f379e57a.jpg \n", + " inflating: hymenoptera_data/train/bees/95238259_98470c5b10.jpg \n", + " inflating: hymenoptera_data/train/bees/969455125_58c797ef17.jpg \n", + " inflating: hymenoptera_data/train/bees/98391118_bdb1e80cce.jpg \n", + " creating: hymenoptera_data/val/\n", + " creating: hymenoptera_data/val/ants/\n", + " inflating: hymenoptera_data/val/ants/10308379_1b6c72e180.jpg \n", + " inflating: hymenoptera_data/val/ants/1053149811_f62a3410d3.jpg \n", + " inflating: hymenoptera_data/val/ants/1073564163_225a64f170.jpg \n", + " inflating: hymenoptera_data/val/ants/1119630822_cd325ea21a.jpg \n", + " inflating: hymenoptera_data/val/ants/1124525276_816a07c17f.jpg \n", + " inflating: hymenoptera_data/val/ants/11381045_b352a47d8c.jpg \n", + " inflating: hymenoptera_data/val/ants/119785936_dd428e40c3.jpg \n", + " inflating: hymenoptera_data/val/ants/1247887232_edcb61246c.jpg \n", + " inflating: hymenoptera_data/val/ants/1262751255_c56c042b7b.jpg \n", + " inflating: hymenoptera_data/val/ants/1337725712_2eb53cd742.jpg \n", + " inflating: hymenoptera_data/val/ants/1358854066_5ad8015f7f.jpg \n", + " inflating: hymenoptera_data/val/ants/1440002809_b268d9a66a.jpg \n", + " inflating: hymenoptera_data/val/ants/147542264_79506478c2.jpg \n", + " inflating: hymenoptera_data/val/ants/152286280_411648ec27.jpg \n", + " inflating: hymenoptera_data/val/ants/153320619_2aeb5fa0ee.jpg \n", + " inflating: hymenoptera_data/val/ants/153783656_85f9c3ac70.jpg \n", + " inflating: hymenoptera_data/val/ants/157401988_d0564a9d02.jpg \n", + " inflating: hymenoptera_data/val/ants/159515240_d5981e20d1.jpg \n", + " inflating: hymenoptera_data/val/ants/161076144_124db762d6.jpg \n", + " inflating: hymenoptera_data/val/ants/161292361_c16e0bf57a.jpg \n", + " inflating: hymenoptera_data/val/ants/170652283_ecdaff5d1a.jpg \n", + " inflating: hymenoptera_data/val/ants/17081114_79b9a27724.jpg \n", + " inflating: hymenoptera_data/val/ants/172772109_d0a8e15fb0.jpg \n", + " inflating: hymenoptera_data/val/ants/1743840368_b5ccda82b7.jpg \n", + " inflating: hymenoptera_data/val/ants/181942028_961261ef48.jpg \n", + " inflating: hymenoptera_data/val/ants/183260961_64ab754c97.jpg \n", + " inflating: hymenoptera_data/val/ants/2039585088_c6f47c592e.jpg \n", + " inflating: hymenoptera_data/val/ants/205398178_c395c5e460.jpg \n", + " inflating: hymenoptera_data/val/ants/208072188_f293096296.jpg \n", + " inflating: hymenoptera_data/val/ants/209615353_eeb38ba204.jpg \n", + " inflating: hymenoptera_data/val/ants/2104709400_8831b4fc6f.jpg \n", + " inflating: hymenoptera_data/val/ants/212100470_b485e7b7b9.jpg \n", + " inflating: hymenoptera_data/val/ants/2127908701_d49dc83c97.jpg \n", + " inflating: hymenoptera_data/val/ants/2191997003_379df31291.jpg \n", + " inflating: hymenoptera_data/val/ants/2211974567_ee4606b493.jpg \n", + " inflating: hymenoptera_data/val/ants/2219621907_47bc7cc6b0.jpg \n", + " inflating: hymenoptera_data/val/ants/2238242353_52c82441df.jpg \n", + " inflating: hymenoptera_data/val/ants/2255445811_dabcdf7258.jpg \n", + " inflating: hymenoptera_data/val/ants/239161491_86ac23b0a3.jpg \n", + " inflating: hymenoptera_data/val/ants/263615709_cfb28f6b8e.jpg \n", + " inflating: hymenoptera_data/val/ants/308196310_1db5ffa01b.jpg \n", + " inflating: hymenoptera_data/val/ants/319494379_648fb5a1c6.jpg \n", + " inflating: hymenoptera_data/val/ants/35558229_1fa4608a7a.jpg \n", + " inflating: hymenoptera_data/val/ants/412436937_4c2378efc2.jpg \n", + " inflating: hymenoptera_data/val/ants/436944325_d4925a38c7.jpg \n", + " inflating: hymenoptera_data/val/ants/445356866_6cb3289067.jpg \n", + " inflating: hymenoptera_data/val/ants/459442412_412fecf3fe.jpg \n", + " inflating: hymenoptera_data/val/ants/470127071_8b8ee2bd74.jpg \n", + " inflating: hymenoptera_data/val/ants/477437164_bc3e6e594a.jpg \n", + " inflating: hymenoptera_data/val/ants/488272201_c5aa281348.jpg \n", + " inflating: hymenoptera_data/val/ants/502717153_3e4865621a.jpg \n", + " inflating: hymenoptera_data/val/ants/518746016_bcc28f8b5b.jpg \n", + " inflating: hymenoptera_data/val/ants/540543309_ddbb193ee5.jpg \n", + " inflating: hymenoptera_data/val/ants/562589509_7e55469b97.jpg \n", + " inflating: hymenoptera_data/val/ants/57264437_a19006872f.jpg \n", + " inflating: hymenoptera_data/val/ants/573151833_ebbc274b77.jpg \n", + " inflating: hymenoptera_data/val/ants/649407494_9b6bc4949f.jpg \n", + " inflating: hymenoptera_data/val/ants/751649788_78dd7d16ce.jpg \n", + " inflating: hymenoptera_data/val/ants/768870506_8f115d3d37.jpg \n", + " inflating: hymenoptera_data/val/ants/800px-Meat_eater_ant_qeen_excavating_hole.jpg \n", + " inflating: hymenoptera_data/val/ants/8124241_36b290d372.jpg \n", + " inflating: hymenoptera_data/val/ants/8398478_50ef10c47a.jpg \n", + " inflating: hymenoptera_data/val/ants/854534770_31f6156383.jpg \n", + " inflating: hymenoptera_data/val/ants/892676922_4ab37dce07.jpg \n", + " inflating: hymenoptera_data/val/ants/94999827_36895faade.jpg \n", + " inflating: hymenoptera_data/val/ants/Ant-1818.jpg \n", + " inflating: hymenoptera_data/val/ants/ants-devouring-remains-of-large-dead-insect-on-red-tile-in-Stellenbosch-South-Africa-closeup-1-DHD.jpg \n", + " inflating: hymenoptera_data/val/ants/desert_ant.jpg \n", + " inflating: hymenoptera_data/val/ants/F.pergan.28(f).jpg \n", + " inflating: hymenoptera_data/val/ants/Hormiga.jpg \n", + " creating: hymenoptera_data/val/bees/\n", + " inflating: hymenoptera_data/val/bees/1032546534_06907fe3b3.jpg \n", + " inflating: hymenoptera_data/val/bees/10870992_eebeeb3a12.jpg \n", + " inflating: hymenoptera_data/val/bees/1181173278_23c36fac71.jpg \n", + " inflating: hymenoptera_data/val/bees/1297972485_33266a18d9.jpg \n", + " inflating: hymenoptera_data/val/bees/1328423762_f7a88a8451.jpg \n", + " inflating: hymenoptera_data/val/bees/1355974687_1341c1face.jpg \n", + " inflating: hymenoptera_data/val/bees/144098310_a4176fd54d.jpg \n", + " inflating: hymenoptera_data/val/bees/1486120850_490388f84b.jpg \n", + " inflating: hymenoptera_data/val/bees/149973093_da3c446268.jpg \n", + " inflating: hymenoptera_data/val/bees/151594775_ee7dc17b60.jpg \n", + " inflating: hymenoptera_data/val/bees/151603988_2c6f7d14c7.jpg \n", + " inflating: hymenoptera_data/val/bees/1519368889_4270261ee3.jpg \n", + " inflating: hymenoptera_data/val/bees/152789693_220b003452.jpg \n", + " inflating: hymenoptera_data/val/bees/177677657_a38c97e572.jpg \n", + " inflating: hymenoptera_data/val/bees/1799729694_0c40101071.jpg \n", + " inflating: hymenoptera_data/val/bees/181171681_c5a1a82ded.jpg \n", + " inflating: hymenoptera_data/val/bees/187130242_4593a4c610.jpg \n", + " inflating: hymenoptera_data/val/bees/203868383_0fcbb48278.jpg \n", + " inflating: hymenoptera_data/val/bees/2060668999_e11edb10d0.jpg \n", + " inflating: hymenoptera_data/val/bees/2086294791_6f3789d8a6.jpg \n", + " inflating: hymenoptera_data/val/bees/2103637821_8d26ee6b90.jpg \n", + " inflating: hymenoptera_data/val/bees/2104135106_a65eede1de.jpg \n", + " inflating: hymenoptera_data/val/bees/215512424_687e1e0821.jpg \n", + " inflating: hymenoptera_data/val/bees/2173503984_9c6aaaa7e2.jpg \n", + " inflating: hymenoptera_data/val/bees/220376539_20567395d8.jpg \n", + " inflating: hymenoptera_data/val/bees/224841383_d050f5f510.jpg \n", + " inflating: hymenoptera_data/val/bees/2321144482_f3785ba7b2.jpg \n", + " inflating: hymenoptera_data/val/bees/238161922_55fa9a76ae.jpg \n", + " inflating: hymenoptera_data/val/bees/2407809945_fb525ef54d.jpg \n", + " inflating: hymenoptera_data/val/bees/2415414155_1916f03b42.jpg \n", + " inflating: hymenoptera_data/val/bees/2438480600_40a1249879.jpg \n", + " inflating: hymenoptera_data/val/bees/2444778727_4b781ac424.jpg \n", + " inflating: hymenoptera_data/val/bees/2457841282_7867f16639.jpg \n", + " inflating: hymenoptera_data/val/bees/2470492902_3572c90f75.jpg \n", + " inflating: hymenoptera_data/val/bees/2478216347_535c8fe6d7.jpg \n", + " inflating: hymenoptera_data/val/bees/2501530886_e20952b97d.jpg \n", + " inflating: hymenoptera_data/val/bees/2506114833_90a41c5267.jpg \n", + " inflating: hymenoptera_data/val/bees/2509402554_31821cb0b6.jpg \n", + " inflating: hymenoptera_data/val/bees/2525379273_dcb26a516d.jpg \n", + " inflating: hymenoptera_data/val/bees/26589803_5ba7000313.jpg \n", + " inflating: hymenoptera_data/val/bees/2668391343_45e272cd07.jpg \n", + " inflating: hymenoptera_data/val/bees/2670536155_c170f49cd0.jpg \n", + " inflating: hymenoptera_data/val/bees/2685605303_9eed79d59d.jpg \n", + " inflating: hymenoptera_data/val/bees/2702408468_d9ed795f4f.jpg \n", + " inflating: hymenoptera_data/val/bees/2709775832_85b4b50a57.jpg \n", + " inflating: hymenoptera_data/val/bees/2717418782_bd83307d9f.jpg \n", + " inflating: hymenoptera_data/val/bees/272986700_d4d4bf8c4b.jpg \n", + " inflating: hymenoptera_data/val/bees/2741763055_9a7bb00802.jpg \n", + " inflating: hymenoptera_data/val/bees/2745389517_250a397f31.jpg \n", + " inflating: hymenoptera_data/val/bees/2751836205_6f7b5eff30.jpg \n", + " inflating: hymenoptera_data/val/bees/2782079948_8d4e94a826.jpg \n", + " inflating: hymenoptera_data/val/bees/2809496124_5f25b5946a.jpg \n", + " inflating: hymenoptera_data/val/bees/2815838190_0a9889d995.jpg \n", + " inflating: hymenoptera_data/val/bees/2841437312_789699c740.jpg \n", + " inflating: hymenoptera_data/val/bees/2883093452_7e3a1eb53f.jpg \n", + " inflating: hymenoptera_data/val/bees/290082189_f66cb80bfc.jpg \n", + " inflating: hymenoptera_data/val/bees/296565463_d07a7bed96.jpg \n", + " inflating: hymenoptera_data/val/bees/3077452620_548c79fda0.jpg \n", + " inflating: hymenoptera_data/val/bees/348291597_ee836fbb1a.jpg \n", + " inflating: hymenoptera_data/val/bees/350436573_41f4ecb6c8.jpg \n", + " inflating: hymenoptera_data/val/bees/353266603_d3eac7e9a0.jpg \n", + " inflating: hymenoptera_data/val/bees/372228424_16da1f8884.jpg \n", + " inflating: hymenoptera_data/val/bees/400262091_701c00031c.jpg \n", + " inflating: hymenoptera_data/val/bees/416144384_961c326481.jpg \n", + " inflating: hymenoptera_data/val/bees/44105569_16720a960c.jpg \n", + " inflating: hymenoptera_data/val/bees/456097971_860949c4fc.jpg \n", + " inflating: hymenoptera_data/val/bees/464594019_1b24a28bb1.jpg \n", + " inflating: hymenoptera_data/val/bees/485743562_d8cc6b8f73.jpg \n", + " inflating: hymenoptera_data/val/bees/540976476_844950623f.jpg \n", + " inflating: hymenoptera_data/val/bees/54736755_c057723f64.jpg \n", + " inflating: hymenoptera_data/val/bees/57459255_752774f1b2.jpg \n", + " inflating: hymenoptera_data/val/bees/576452297_897023f002.jpg \n", + " inflating: hymenoptera_data/val/bees/586474709_ae436da045.jpg \n", + " inflating: hymenoptera_data/val/bees/590318879_68cf112861.jpg \n", + " inflating: hymenoptera_data/val/bees/59798110_2b6a3c8031.jpg \n", + " inflating: hymenoptera_data/val/bees/603709866_a97c7cfc72.jpg \n", + " inflating: hymenoptera_data/val/bees/603711658_4c8cd2201e.jpg \n", + " inflating: hymenoptera_data/val/bees/65038344_52a45d090d.jpg \n", + " inflating: hymenoptera_data/val/bees/6a00d8341c630a53ef00e553d0beb18834-800wi.jpg \n", + " inflating: hymenoptera_data/val/bees/72100438_73de9f17af.jpg \n", + " inflating: hymenoptera_data/val/bees/759745145_e8bc776ec8.jpg \n", + " inflating: hymenoptera_data/val/bees/936182217_c4caa5222d.jpg \n", + " inflating: hymenoptera_data/val/bees/abeja.jpg \n" + ] + } + ], + "source": [ + "# download the data\n", + "!wget https://download.pytorch.org/tutorial/hymenoptera_data.zip\n", + "!unzip hymenoptera_data.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data Transformation\n", + "This is an image classification task, which means that we need to perform a few transformations on our dataset before we train our models. I used similar transformations as used in this [tutorial](https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html#transfer-learning-for-computer-vision-tutorial). For a detailed overview of each transformation take a look at the official torchvision [documentation](https://pytorch.org/docs/stable/torchvision/transforms.html).\n", + "\n", + "The following code block performs the following operations:\n", + "- The `data_transforms` contains a series of transformations that will be performed on each image found in the dataset. This includes cropping the image, resizing the image, converting it to tensor, reshaping it, and normalizing it. \n", + "- Once those transformations have been defined, then the `DataLoader` function is used to automatically load the datasets and perform any additional configuration such as shuffling, batches, etc." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# configure root folder on your gdrive\n", + "data_dir = 'hymenoptera_data'\n", + "\n", + "# custom transformer to flatten the image tensors\n", + "class ReshapeTransform:\n", + " def __init__(self, new_size):\n", + " self.new_size = new_size\n", + "\n", + " def __call__(self, img):\n", + " result = torch.reshape(img, self.new_size)\n", + " return result\n", + "\n", + "# transformations used to standardize and normalize the datasets\n", + "data_transforms = {\n", + " 'train': transforms.Compose([\n", + " transforms.Resize(224),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " ReshapeTransform((-1,)) # flattens the data\n", + " ]),\n", + " 'val': transforms.Compose([\n", + " transforms.Resize(224),\n", + " transforms.CenterCrop(224),\n", + " transforms.ToTensor(),\n", + " ReshapeTransform((-1,)) # flattens the data\n", + " ]),\n", + "}\n", + "\n", + "# load the correspoding folders\n", + "image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),\n", + " data_transforms[x])\n", + " for x in ['train', 'val']}\n", + "\n", + "# load the entire dataset; we are not using minibatches here\n", + "train_dataset = torch.utils.data.DataLoader(image_datasets['train'],\n", + " batch_size=len(image_datasets['train']),\n", + " shuffle=True)\n", + "\n", + "test_dataset = torch.utils.data.DataLoader(image_datasets['val'],\n", + " batch_size=len(image_datasets['val']),\n", + " shuffle=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(244, 153)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(image_datasets['train']), len(image_datasets['val'])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Print sample\n", + "It's always a good practise to take a quick look at the dataset before training your models. Below we print out an example of one of the images from the `train_dataset`." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dimension of image: torch.Size([244, 150528]) \n", + " Dimension of labels torch.Size([244])\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/codespace/.conda/envs/play/lib/python3.7/site-packages/ipykernel_launcher.py:9: UserWarning: The use of `x.T` on tensors of dimension other than 2 to reverse their shape is deprecated and it will throw an error in a future release. Consider `x.mT` to transpose batches of matricesor `x.permute(*torch.arange(x.ndim - 1, -1, -1))` to reverse the dimensions of a tensor. (Triggered internally at /opt/conda/conda-bld/pytorch_1646755953518/work/aten/src/ATen/native/TensorShape.cpp:2318.)\n", + " if __name__ == '__main__':\n" + ] + }, + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# load the entire dataset\n", + "x, y = next(iter(train_dataset))\n", + "\n", + "# print one example\n", + "dim = x.shape[1]\n", + "print(\"Dimension of image:\", x.shape, \"\\n\", \n", + " \"Dimension of labels\", y.shape)\n", + "\n", + "plt.imshow(x[160].reshape(1, 3, 224, 224).squeeze().T.numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Building the Model\n", + "Let's now implement our [logistic regression](https://en.wikipedia.org/wiki/Logistic_regression) model. Logistic regression is one in a family of machine learning techniques that are used to train binary classifiers. They are also a great way to understand the fundamental building blocks of neural networks, thus they can also be considered the simplest of neural networks where the model performs a `forward` and `backward` propagation to train the model on the data provided. \n", + "\n", + "If you don't fully understand the structure of the code below, I strongly recommend you to read the following [tutorial](https://medium.com/dair-ai/pytorch-1-2-introduction-guide-f6fa9bb7597c), which I wrote for PyTorch beginners. You can also check out [Week 2](https://www.coursera.org/learn/neural-networks-deep-learning/home/week/2) of Andrew Ng's Deep Learning Specialization course for all the explanation, intuitions, and details of the different parts of the neural network such as the `forward`, `sigmoid`, `backward`, and `optimization` steps. \n", + "\n", + "In short:\n", + "- The `__init__` function initializes all the parameters (`W`, `b`, `grad`) that will be used to train the model through backpropagation. \n", + "- The goal is to learn the `W` and `b` that minimimizes the cost function which is computed as seen in the `loss` function below.\n", + "\n", + "Note that this is a very detailed implementation of a logistic regression model so I had to explicitly move a lot of the computations into the GPU for faster calcuation, `to(device)` takes care of this in PyTorch. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "class LR(nn.Module):\n", + " def __init__(self, dim, lr=torch.scalar_tensor(0.01)):\n", + " super(LR, self).__init__()\n", + " # intialize parameters\n", + " self.w = torch.zeros(dim, 1, dtype=torch.float).to(device)\n", + " self.b = torch.scalar_tensor(0).to(device)\n", + " self.grads = {\"dw\": torch.zeros(dim, 1, dtype=torch.float).to(device),\n", + " \"db\": torch.scalar_tensor(0).to(device)}\n", + " self.lr = lr.to(device)\n", + "\n", + " def forward(self, x):\n", + " # compute forward\n", + " z = torch.mm(self.w.T, x) + self.b\n", + " a = self.sigmoid(z)\n", + " return a\n", + "\n", + " def sigmoid(self, z):\n", + " # compute sigmoid\n", + " return 1/(1 + torch.exp(-z))\n", + "\n", + " def backward(self, x, yhat, y):\n", + " # compute backward\n", + " self.grads[\"dw\"] = (1/x.shape[1]) * torch.mm(x, (yhat - y).T)\n", + " self.grads[\"db\"] = (1/x.shape[1]) * torch.sum(yhat - y)\n", + " \n", + " def optimize(self):\n", + " # optimization step\n", + " self.w = self.w - self.lr * self.grads[\"dw\"]\n", + " self.b = self.b - self.lr * self.grads[\"db\"]\n", + "\n", + "## utility functions\n", + "def loss(yhat, y):\n", + " m = y.size()[1]\n", + " return -(1/m)* torch.sum(y*torch.log(yhat) + (1 - y)* torch.log(1-yhat))\n", + "\n", + "def predict(yhat, y):\n", + " y_prediction = torch.zeros(1, y.size()[1])\n", + " for i in range(yhat.size()[1]):\n", + " if yhat[0, i] <= 0.5:\n", + " y_prediction[0, i] = 0\n", + " else:\n", + " y_prediction[0, i] = 1\n", + " return 100 - torch.mean(torch.abs(y_prediction - y)) * 100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pretesting the Model\n", + "It is also good practice to test your model and make sure the right steps are taking place before training the entire model." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost: tensor(0.6931)\n", + "Accuracy: tensor(50.4098)\n" + ] + } + ], + "source": [ + "# model pretesting\n", + "x, y = next(iter(train_dataset))\n", + "\n", + "# flatten/transform the data\n", + "x_flatten = x.T\n", + "y = y.unsqueeze(0) \n", + "\n", + "# num_px is the dimension of the images\n", + "dim = x_flatten.shape[0]\n", + "\n", + "# model instance\n", + "model = LR(dim)\n", + "model.to(device)\n", + "yhat = model.forward(x_flatten.to(device))\n", + "yhat = yhat.data.cpu()\n", + "\n", + "# calculate loss\n", + "cost = loss(yhat, y)\n", + "prediction = predict(yhat, y)\n", + "print(\"Cost: \", cost)\n", + "print(\"Accuracy: \", prediction)\n", + "\n", + "# backpropagate\n", + "model.backward(x_flatten.to(device), yhat.to(device), y.to(device))\n", + "model.optimize()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Train the Model\n", + "It's now time to train the model. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Cost after iteration 0: 0.6931472420692444 | Train Acc: 50.40983581542969 | Test Acc: 45.75163269042969\n", + "Cost after iteration 10: 0.6691470742225647 | Train Acc: 64.3442611694336 | Test Acc: 54.24836730957031\n", + "Cost after iteration 20: 0.6513182520866394 | Train Acc: 68.44261932373047 | Test Acc: 54.24836730957031\n", + "Cost after iteration 30: 0.6367825865745544 | Train Acc: 68.03278350830078 | Test Acc: 54.24836730957031\n", + "Cost after iteration 40: 0.6245337128639221 | Train Acc: 69.67213439941406 | Test Acc: 54.90196228027344\n", + "Cost after iteration 50: 0.6139225959777832 | Train Acc: 70.90164184570312 | Test Acc: 56.20914840698242\n", + "Cost after iteration 60: 0.6045235395431519 | Train Acc: 72.54098510742188 | Test Acc: 56.86274337768555\n", + "Cost after iteration 70: 0.5960511565208435 | Train Acc: 74.18032836914062 | Test Acc: 57.51633834838867\n", + "Cost after iteration 80: 0.5883085131645203 | Train Acc: 73.77049255371094 | Test Acc: 57.51633834838867\n", + "Cost after iteration 90: 0.5811557769775391 | Train Acc: 74.59016418457031 | Test Acc: 58.1699333190918\n", + "Cost after iteration 100: 0.5744912028312683 | Train Acc: 75.0 | Test Acc: 59.47712326049805\n", + "Cost after iteration 110: 0.5682381987571716 | Train Acc: 75.40983581542969 | Test Acc: 60.13071823120117\n", + "Cost after iteration 120: 0.5623382925987244 | Train Acc: 75.81967163085938 | Test Acc: 60.13071823120117\n", + "Cost after iteration 130: 0.5567454099655151 | Train Acc: 75.81967163085938 | Test Acc: 59.47712326049805\n", + "Cost after iteration 140: 0.5514224767684937 | Train Acc: 75.81967163085938 | Test Acc: 59.47712326049805\n", + "Cost after iteration 150: 0.5463393926620483 | Train Acc: 76.22950744628906 | Test Acc: 58.82352828979492\n", + "Cost after iteration 160: 0.5414712429046631 | Train Acc: 76.63934326171875 | Test Acc: 58.82352828979492\n", + "Cost after iteration 170: 0.5367968678474426 | Train Acc: 77.04917907714844 | Test Acc: 58.82352828979492\n", + "Cost after iteration 180: 0.5322986245155334 | Train Acc: 77.04917907714844 | Test Acc: 58.82352828979492\n", + "Cost after iteration 190: 0.5279611349105835 | Train Acc: 77.45901489257812 | Test Acc: 58.82352828979492\n", + "Cost after iteration 200: 0.5237710475921631 | Train Acc: 78.2786865234375 | Test Acc: 58.1699333190918\n", + "Cost after iteration 210: 0.5197169184684753 | Train Acc: 78.2786865234375 | Test Acc: 58.1699333190918\n", + "Cost after iteration 220: 0.5157885551452637 | Train Acc: 79.09835815429688 | Test Acc: 57.51633834838867\n", + "Cost after iteration 230: 0.5119768977165222 | Train Acc: 79.91802978515625 | Test Acc: 57.51633834838867\n", + "Cost after iteration 240: 0.5082740187644958 | Train Acc: 79.91802978515625 | Test Acc: 60.13071823120117\n", + "Cost after iteration 250: 0.5046727657318115 | Train Acc: 79.91802978515625 | Test Acc: 60.13071823120117\n", + "Cost after iteration 260: 0.5011667013168335 | Train Acc: 80.73770141601562 | Test Acc: 60.7843132019043\n", + "Cost after iteration 270: 0.49775001406669617 | Train Acc: 81.14753723144531 | Test Acc: 60.7843132019043\n", + "Cost after iteration 280: 0.49441757798194885 | Train Acc: 81.557373046875 | Test Acc: 60.7843132019043\n", + "Cost after iteration 290: 0.49116453528404236 | Train Acc: 81.557373046875 | Test Acc: 61.43790817260742\n", + "Cost after iteration 300: 0.48798662424087524 | Train Acc: 81.557373046875 | Test Acc: 61.43790817260742\n", + "Cost after iteration 310: 0.48487982153892517 | Train Acc: 81.96721649169922 | Test Acc: 61.43790817260742\n", + "Cost after iteration 320: 0.4818406403064728 | Train Acc: 81.96721649169922 | Test Acc: 61.43790817260742\n", + "Cost after iteration 330: 0.4788656532764435 | Train Acc: 82.37704467773438 | Test Acc: 61.43790817260742\n", + "Cost after iteration 340: 0.4759517014026642 | Train Acc: 82.37704467773438 | Test Acc: 61.43790817260742\n", + "Cost after iteration 350: 0.4730961322784424 | Train Acc: 83.19672393798828 | Test Acc: 62.09150314331055\n", + "Cost after iteration 360: 0.4702962040901184 | Train Acc: 84.01639556884766 | Test Acc: 62.09150314331055\n", + "Cost after iteration 370: 0.46754947304725647 | Train Acc: 84.01639556884766 | Test Acc: 62.09150314331055\n", + "Cost after iteration 380: 0.46485379338264465 | Train Acc: 84.01639556884766 | Test Acc: 61.43790817260742\n", + "Cost after iteration 390: 0.4622068703174591 | Train Acc: 84.01639556884766 | Test Acc: 61.43790817260742\n", + "Cost after iteration 400: 0.4596068263053894 | Train Acc: 84.01639556884766 | Test Acc: 61.43790817260742\n", + "Cost after iteration 410: 0.45705193281173706 | Train Acc: 84.01639556884766 | Test Acc: 61.43790817260742\n", + "Cost after iteration 420: 0.4545402526855469 | Train Acc: 84.42623138427734 | Test Acc: 61.43790817260742\n", + "Cost after iteration 430: 0.4520702660083771 | Train Acc: 84.83606719970703 | Test Acc: 61.43790817260742\n", + "Cost after iteration 440: 0.4496404826641083 | Train Acc: 84.83606719970703 | Test Acc: 61.43790817260742\n", + "Cost after iteration 450: 0.4472493827342987 | Train Acc: 85.24590301513672 | Test Acc: 61.43790817260742\n", + "Cost after iteration 460: 0.4448956549167633 | Train Acc: 85.6557388305664 | Test Acc: 61.43790817260742\n", + "Cost after iteration 470: 0.4425780475139618 | Train Acc: 85.6557388305664 | Test Acc: 61.43790817260742\n", + "Cost after iteration 480: 0.44029536843299866 | Train Acc: 85.6557388305664 | Test Acc: 61.43790817260742\n", + "Cost after iteration 490: 0.43804648518562317 | Train Acc: 85.6557388305664 | Test Acc: 61.43790817260742\n" + ] + } + ], + "source": [ + "# hyperparams\n", + "costs = []\n", + "dim = x_flatten.shape[0]\n", + "learning_rate = torch.scalar_tensor(0.0001).to(device)\n", + "num_iterations = 500\n", + "lrmodel = LR(dim, learning_rate)\n", + "lrmodel.to(device)\n", + "\n", + "# transform the data\n", + "def transform_data(x, y):\n", + " x_flatten = x.T\n", + " y = y.unsqueeze(0) \n", + " return x_flatten, y \n", + "\n", + "# train the model\n", + "for i in range(num_iterations):\n", + " x, y = next(iter(train_dataset))\n", + " test_x, test_y = next(iter(test_dataset))\n", + " x, y = transform_data(x, y)\n", + " test_x, test_y = transform_data(test_x, test_y)\n", + "\n", + " # forward\n", + " yhat = lrmodel.forward(x.to(device))\n", + " cost = loss(yhat.data.cpu(), y)\n", + " train_pred = predict(yhat, y)\n", + " \n", + " # backward\n", + " lrmodel.backward(x.to(device), \n", + " yhat.to(device), \n", + " y.to(device))\n", + " lrmodel.optimize()\n", + "\n", + " # test\n", + " yhat_test = lrmodel.forward(test_x.to(device))\n", + " test_pred = predict(yhat_test, test_y)\n", + "\n", + " if i % 10 == 0:\n", + " costs.append(cost)\n", + "\n", + " if i % 10 == 0:\n", + " print(\"Cost after iteration {}: {} | Train Acc: {} | Test Acc: {}\".format(i, \n", + " cost, \n", + " train_pred,\n", + " test_pred))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Result\n", + "From the loss curve below you can see that the model is sort of learning to classify the images given the decreas in the loss. I only ran the model for `100` iterations. Train the model for many more rounds and analyze the results. In fact, I have suggested a couple of experiments and exercises at the end of the tutorial that you can try to get a more improved model." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## the trend in the context of loss\n", + "plt.plot(costs)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Some Notes\n", + "There are many improvements and different experiments that you can perform on top of this notebook to keep practising ML:\n", + "- It is always good to normalize/standardize your images which helps with learning. As an experiment, you can research and try different ways to standarize the dataset. We have normalized the dataset with the builtin PyTorch [normalizer](https://pytorch.org/docs/stable/torchvision/transforms.html#torchvision.transforms.Normalize) which uses the mean and standard deviation. Play around with different transformations or normalization techniques. What effect does this have on learning in terms of speed and loss?\n", + "- You can try many things to help with learning such as playing around with the learning rate. Try to decrease and increase the learning rate and observe the effect of this in learning? \n", + "- If you explored the dataset further, you may have noticed that all the \"no-bee\" images are actually \"ant\" images. If you would like to create a more robust model, you may want to make your \"no-bee\" images more random and diverse through some data augmentation technique. This is a more advanced approach but there is a lot of good content to try out this idea. \n", + "- The model is not really performing well just using simple logistic regression model. It could be because of the dataset I am using and because I didn't train it for long enough. Hyperparameters may also be off. It is a relatively small dataset but the performance could get better with more data and training over time. A more challenging task involves adopting the model to other datasets. Give it a try!\n", + "- Another important part that is missing in this tutorial is the comprehensive analysis of the model results. If you understand the code, it should be easy to figure out how to test with a few examples. In fact, it would also be great if you can put aside a small testing dataset for this part of the exercise, so as to test the generalization capabilities of the model.\n", + "- We built the logistic regression model from scratch but with libraries like PyTorch, these days you can simply leverage the high-level functions that implement certain parts of the neural network for you. This simplifies your code and minimizes the amount of bugs in your code. Plus you don't have to code your neural networks from scratch all the time. As a bonus exercise, try to adapt PyTorch builtin modules and functions for implementing a simpler, more concise version of the above logistic regression model. I will also add this as a to-do task for myself and post a solution soon. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "- [Understanding the Impact of Learning Rate on Neural Network Performance](https://machinelearningmastery.com/understand-the-dynamics-of-learning-rate-on-deep-learning-neural-networks/)\n", + "- [Transfer Learning for Computer Vision Tutorial](https://pytorch.org/tutorials/beginner/transfer_learning_tutorial.html#transfer-learning-for-computer-vision-tutorial)\n", + "- [Deep Learning Specialization by Andrew Ng](https://www.coursera.org/learn/neural-networks-deep-learning/home/welcome)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('play')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "cf9800998463bc980d70cdbacff0c7e9a10687346dc898569e92f016d6e252c9" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/loglin-lm-dataloader.ipynb b/notebooks/loglin-lm-dataloader.ipynb new file mode 100644 index 0000000..8d6f66b --- /dev/null +++ b/notebooks/loglin-lm-dataloader.ipynb @@ -0,0 +1,385 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Linear Language Model with Data Loader\n", + "\n", + "Status of Notebook: Work in Progress\n", + "\n", + "Difference from `loglin-lm.ipynb` is that we use a data loader to load the data." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import random\n", + "import torch\n", + "import torch.nn as nn\n", + "import math\n", + "import time\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# uncomment to download the datasets\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/test.txt\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/train.txt\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/valid.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# function to read in data, process each line and split columns by \" ||| \"\n", + "def read_data(filename):\n", + " data = []\n", + " with open(filename, \"r\") as f:\n", + " for line in f:\n", + " line = line.strip().split(\" \")\n", + " data.append(line)\n", + " return data\n", + "\n", + "# read the data\n", + "train_data = read_data('data/ptb/train.txt')\n", + "val_data = read_data('data/ptb/valid.txt')\n", + "\n", + "# creating the word and tag indices and special tokens\n", + "word_to_index = {}\n", + "index_to_word = {}\n", + "word_to_index[\"\"] = len(word_to_index)\n", + "index_to_word[len(word_to_index)-1] = \"\"\n", + "word_to_index[\"\"] = len(word_to_index) # add to dictionary\n", + "index_to_word[len(word_to_index)-1] = \"\"\n", + "\n", + "# create word to index dictionary and tag to index dictionary from data\n", + "def create_dict(data, check_unk=False):\n", + " for line in data:\n", + " for word in line:\n", + " if check_unk == False:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = len(word_to_index)\n", + " index_to_word[len(word_to_index)-1] = word\n", + " \n", + " # has no effect because data already comes with \n", + " # should work with data without already processed\n", + " else: \n", + " if word not in word_to_index:\n", + " word_to_index[word] = word_to_index[\"\"]\n", + " index_to_word[len(word_to_index)-1] = word\n", + "\n", + "create_dict(train_data)\n", + "create_dict(val_data, check_unk=True)\n", + "\n", + "# create word and tag tensors from data\n", + "def create_tensor(data):\n", + " for line in data:\n", + " yield [word_to_index[word] for word in line]\n", + "\n", + "train_data = [*create_tensor(train_data)]\n", + "val_data = [*create_tensor(val_data)]\n", + "\n", + "number_of_words = len(word_to_index)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Convert data to PyTorch Dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from torch.utils.data import Dataset, DataLoader\n", + "\n", + "class PTB(Dataset):\n", + " def __init__(self, data):\n", + " self.data = data\n", + "\n", + " def __len__(self):\n", + " return len(self.data)\n", + "\n", + " def __getitem__(self, idx):\n", + " return torch.as_tensor(self.data[idx])\n", + "\n", + "train_dataset = PTB(train_data)\n", + "val_dataset = PTB(val_data)\n", + "\n", + "train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)\n", + "val_loader = DataLoader(val_dataset, batch_size=1, shuffle=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In our implementation we are using batched training. There are a few differences from the original implementation found [here](https://github.com/neubig/nn4nlp-code/blob/master/02-lm/loglin-lm.py). " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "## define the model\n", + "\n", + "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", + "\n", + "# length of the n-gram\n", + "N = 2\n", + "\n", + "# logilinear model\n", + "class LogLinear(nn.Module):\n", + " def __init__(self, number_of_words, ngram_length):\n", + " super(LogLinear, self).__init__()\n", + "\n", + " # different lookups for each position in the n-gram\n", + " self.embeddings = nn.ModuleList([nn.Embedding(number_of_words, number_of_words) for _ in range(ngram_length)])\n", + " self.bias = torch.zeros(number_of_words, requires_grad=True).type(torch.FloatTensor).to(device)\n", + "\n", + " # initialize\n", + " for i in range(N):\n", + " nn.init.xavier_uniform_(self.embeddings[i].weight)\n", + "\n", + " def forward(self, x):\n", + " # calculate score\n", + " embs = torch.cat([lookup(x) for x, lookup in zip(x.T, self.embeddings)]).view(N, x.shape[0], -1) # N x batch_size x embedding_size\n", + " embs = torch.sum(embs, dim=0) # batch_size x embedding_size\n", + " scores = embs + self.bias\n", + " \n", + " return scores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model Settings and Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogLinear(number_of_words, N)\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=0.1)\n", + "criterion = torch.nn.CrossEntropyLoss()\n", + "\n", + "if torch.cuda.is_available():\n", + " model.to(device)\n", + "\n", + "# function to calculate the sentence loss\n", + "def calc_sent_loss(sent):\n", + " S = word_to_index[\"\"]\n", + " \n", + " # initial history is equal to end of sentence symbols\n", + " hist = [S] * N\n", + " \n", + " # collect all target and histories\n", + " all_targets = []\n", + " all_histories = []\n", + " \n", + " # step through the sentence, including the end of sentence token\n", + " for next_word in sent + torch.Tensor([S]):\n", + " all_histories.append(list(hist))\n", + " all_targets.append(next_word)\n", + " hist = hist[1:] + [next_word]\n", + "\n", + " logits = model(torch.LongTensor(all_histories).to(device))\n", + " loss = criterion(logits, torch.LongTensor(all_targets).to(device))\n", + "\n", + " return loss\n", + "\n", + "MAX_LEN = 100\n", + "# Function to generate a sentence\n", + "def generate_sent():\n", + " S = word_to_index[\"\"]\n", + " hist = [S] * N\n", + " sent = []\n", + " while True:\n", + " logits = model(torch.LongTensor([hist]).to(device))\n", + " p = torch.nn.functional.softmax(logits) # 1 x number_of_words\n", + " next_word = p.multinomial(num_samples=1).item()\n", + " if next_word == S or len(sent) == MAX_LEN:\n", + " break\n", + " sent.append(next_word)\n", + " hist = hist[1:] + [next_word]\n", + " return sent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--finished 5000 sentences\n", + "--finished 10000 sentences\n", + "--finished 15000 sentences\n", + "--finished 20000 sentences\n", + "--finished 25000 sentences\n", + "--finished 30000 sentences\n", + "--finished 35000 sentences\n", + "--finished 40000 sentences\n", + "iter 0: train loss/word=9.0947, ppl=8907.6500\n", + "iter 0: dev loss/word=9.7668, ppl=17444.9221, time=1.76s\n", + "in this case of the trade deficit of the globe weeks columnist months from a character succeed reflects as an effort will teaching mr. chestman was essentially flat to deal with the board is this time the an international machines are n't being any at this time you were n't disclosed this week it to take over a company said it will introduce a new york that since friday 's sharp swings in the field sales were down on N at a company said it will invest in quarterly profit by the new securities\n", + "on monday at N yen $ N million navy contract for advanced there were when he 's no decision has been done by the bush administration has of new hampshire preferred holders total package that includes is that the full of only N to rise N N months of sept. N N share of $ N down N N N to N N to N this year and sales increased nearly N million shares outstanding as of that japan is starting in france spain italy and turkey late 1960s commissioner worthy of a food rose to\n", + "speaking to build a giant corp. new york stock exchange during the first nine months charges for example banks station and gas production at the hands of our crowd efforts have been trying to plot against him the chief received the payment problem of that big institutions were never going to be loyal to try to units in the federal reserve onto the field with any securities by the irs recently said it will introduce a new york that replaced become known as resources inc. between what 's own decision\n", + "these funds will be a it a better business he the market after the N after an a computer company for the defense plan and will come from a gene was missing acting expired award clients ' portfolios are the close of N million navy contract for an analyst with by saturday morning hat in big trading houses analysts expected to seek to clean up all says he is the best thing you do n't even the clutter of gold for current delivery of $ N million of $ N a vehicle\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/envs/nlp/lib/python3.7/site-packages/ipykernel_launcher.py:38: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "advertisers and advertising rates for the s&p N issue of the issues pace with rival very small amounts to veto the constitution sen coordinator of the big three the las vegas 's increased activity is only one or for one thing is important as of as many as N million navy contract for the government is by mr. has business conditions and the earnings or N on the firm of that this is that mr. gorbachev 's economic activity and only half of the proposal to reduce interest rates in the he\n", + "--finished 5000 sentences\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_1861/185239032.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msent_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msent\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_loader\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# CHANGE to all train_data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mmy_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcalc_sent_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msent\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mtrain_loss\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mmy_loss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/tmp/ipykernel_1861/2590246674.py\u001b[0m in \u001b[0;36mcalc_sent_loss\u001b[0;34m(sent)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mhist\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhist\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mnext_word\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mlogits\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLongTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_histories\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlogits\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLongTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_targets\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "# start training\n", + "for ITER in range (10): # CHANGE to 100\n", + " # training\n", + "\n", + " model.train()\n", + " train_words, train_loss = 0, 0.0\n", + " for sent_id, sent in enumerate(train_loader):\n", + " \n", + " my_loss = calc_sent_loss(sent[0])\n", + " \n", + " train_loss += my_loss.item()\n", + " train_words += len(sent)\n", + "\n", + " optimizer.zero_grad()\n", + " my_loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (sent_id+1) % 5000 == 0:\n", + " print(\"--finished %r sentences\" % (sent_id+1))\n", + " print(\"iter %r: train loss/word=%.4f, ppl=%.4f\" % (ITER, train_loss/train_words, math.exp(train_loss/train_words)))\n", + "\n", + " # evaluation\n", + " model.eval()\n", + " dev_words, dev_loss = 0, 0.0\n", + " start = time.time()\n", + " for sent_id, sent in enumerate(val_loader):\n", + " my_loss = calc_sent_loss(sent[0])\n", + " dev_loss += my_loss.item()\n", + " dev_words += len(sent)\n", + " print(\"iter %r: dev loss/word=%.4f, ppl=%.4f, time=%.2fs\" % (ITER, dev_loss/dev_words, math.exp(dev_loss/dev_words), time.time()-start))\n", + "\n", + " # Generate a few sentences\n", + " for _ in range(5):\n", + " sent = generate_sent()\n", + " print(\" \".join([index_to_word[x] for x in sent]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nlp", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "154abf72fb8cc0db1aa0e7366557ff891bff86d6d75b7e5f2e68a066d591bfd7" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/loglin-lm.ipynb b/notebooks/loglin-lm.ipynb new file mode 100644 index 0000000..ac4e456 --- /dev/null +++ b/notebooks/loglin-lm.ipynb @@ -0,0 +1,409 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Status of Notebook: Work in Progress" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Bad pipe message: %s [b'\\x16\\x8e\"\\xe17\\x07lq\\xfcGy\\x1b[\\xfd\\x8c\\x10\\x9d\\x0b cY\\xcf\\x83\\x06\\xa4\\x93\\x94\\xa71F\\xbb\\xf8\\x05\\xfd\\xdc\\x02\\x05e\\x06\\x951\\xb5\\xa7Khq\\xd3\\xc5\\xafb\\xe6\\x00\\x08\\x13\\x02\\x13\\x03\\x13\\x01\\x00\\xff\\x01\\x00\\x00\\x8f\\x00\\x00\\x00', b'\\x0c\\x00\\x00\\t127.0.0.1']\n", + "Bad pipe message: %s [b'\\x8b2X\\xa3\\x10\\x9c,\"b\\xaf\\xc2{\\x82\\xf7\\xe8\\xca\\xc8\\xe9\\x00\\x00|\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa\\xc0\\xaf\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]\\xc0a\\xc0W\\xc0S', b\"\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae\\xc0\\xac\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0#\\xc0'\\x00g\\x00@\\xc0\\n\\xc0\\x14\\x009\\x008\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00<\\x005\\x00/\\x00\\x9a\\x00\\x99\\xc0\\x07\\xc0\\x11\\x00\\x96\\x00\\x05\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x000\\x00.\\x04\\x03\\x05\\x03\\x06\\x03\\x08\\x07\\x08\\x08\\x08\\t\\x08\\n\\x08\\x0b\\x08\\x04\\x08\\x05\\x08\\x06\\x04\\x01\\x05\\x01\\x06\\x01\\x03\\x03\\x02\\x03\\x03\\x01\\x02\", b'', b'\\x02']\n", + "Bad pipe message: %s [b'\\x05\\x02\\x06']\n", + "Bad pipe message: %s [b'P0H\\x12\\xfd!F\\x95\\xef\\xbb\\\\\\xb4]F\\xe3\\t\\xe9\\x04\\x00\\x00\\xa6\\xc0,\\xc00\\x00\\xa3\\x00\\x9f\\xcc\\xa9\\xcc\\xa8\\xcc\\xaa\\xc0\\xaf\\xc0\\xad\\xc0\\xa3\\xc0\\x9f\\xc0]\\xc0a\\xc0W\\xc0S\\xc0+\\xc0/\\x00\\xa2\\x00\\x9e\\xc0\\xae']\n", + "Bad pipe message: %s [b\"\\xc0\\xa2\\xc0\\x9e\\xc0\\\\\\xc0`\\xc0V\\xc0R\\xc0$\\xc0(\\x00k\\x00j\\xc0s\\xc0w\\x00\\xc4\\x00\\xc3\\xc0#\\xc0'\\x00g\\x00@\\xc0r\\xc0v\\x00\\xbe\\x00\\xbd\\xc0\\n\\xc0\\x14\\x009\\x008\\x00\\x88\\x00\\x87\\xc0\\t\\xc0\\x13\\x003\\x002\\x00\\x9a\\x00\\x99\\x00E\\x00D\\xc0\\x07\\xc0\\x11\\xc0\\x08\\xc0\\x12\\x00\\x16\\x00\\x13\\x00\\x9d\\xc0\\xa1\\xc0\\x9d\\xc0Q\\x00\\x9c\\xc0\\xa0\\xc0\\x9c\\xc0P\\x00=\\x00\\xc0\\x00<\\x00\\xba\\x005\\x00\\x84\\x00/\\x00\\x96\\x00A\\x00\\x05\\x00\\n\\x00\\xff\\x01\\x00\\x00j\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x0c\\x00\\n\\x00\\x1d\\x00\\x17\\x00\\x1e\\x00\\x19\\x00\\x18\\x00#\\x00\\x00\\x00\\x16\\x00\\x00\\x00\\x17\\x00\\x00\\x00\\r\\x000\\x00.\\x04\\x03\\x05\"]\n", + "Bad pipe message: %s [b'\\x03\\x08']\n", + "Bad pipe message: %s [b'\\x08\\x08\\t\\x08\\n\\x08']\n", + "Bad pipe message: %s [b'\\x04\\x08\\x05\\x08\\x06\\x04\\x01\\x05\\x01\\x06']\n", + "Bad pipe message: %s [b'', b'\\x03\\x03']\n", + "Bad pipe message: %s [b'']\n", + "Bad pipe message: %s [b'', b'\\x02']\n", + "Bad pipe message: %s [b'\\x05\\x02\\x06']\n", + "Bad pipe message: %s [b'\\xe0\\xb4@\\xec\\xce7\\x91\\x04\\xc2\\xe0\\xf5\\x846\\x117\\x97\\xdc\\xe9\\x00\\x00\\xa2\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00:\\x00\\x89\\xc0\\x0f\\xc0\\x05\\x005\\x00\\x84\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\\x00D\\x00C\\x00B\\xc0\\x18\\x004\\x00\\x9b\\x00F\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x96\\x00A\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x16\\x00\\x18\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\xc0\\x12\\xc0\\x08\\x00\\x16\\x00\\x13\\x00\\x10\\x00\\r\\xc0\\x17\\x00\\x1b\\xc0\\r\\xc0\\x03\\x00\\n\\x00\\x15\\x00\\x12\\x00\\x0f\\x00\\x0c\\x00\\x1a\\x00\\t\\x00\\x14\\x00\\x11\\x00\\x19\\x00\\x08\\x00\\x06\\x00\\x17\\x00\\x03\\xc0\\x10\\xc0\\x06\\xc0\\x15\\xc0\\x0b\\xc0\\x01\\x00\\x02\\x00\\x01\\x00\\xff\\x02\\x01\\x00\\x00C']\n", + "Bad pipe message: %s [b\"\\xd4J7\\n\\xb0v9\\xec\\xbc'K\\xb9\\xe9\\x9f9\\x8c\\xa2\\x9c\\x00\\x00>\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\xc0\\x0f\\xc0\\x05\\x005\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00\\x96\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\x00\\xff\\x02\\x01\\x00\\x00C\\x00\\x00\\x00\\x0e\\x00\\x0c\\x00\\x00\\t127.0.0.1\\x00\\x0b\\x00\\x04\\x03\\x00\\x01\\x02\\x00\\n\\x00\\x1c\\x00\\x1a\\x00\\x17\\x00\\x19\\x00\\x1c\\x00\\x1b\\x00\\x18\\x00\\x1a\\x00\\x16\\x00\\x0e\\x00\\r\\x00\\x0b\\x00\\x0c\\x00\\t\\x00\\n\\x00#\\x00\\x00\\x00\\x0f\\x00\\x01\\x01\\x15\\x03\\x01\\x00\\x02\\x02\"]\n", + "Bad pipe message: %s [b'\\xd1\\xfeg4RQ\\xbf\\x18\\xfa\\x90\\xfe+\\xcb\\xcaU\\xb8{\\x94\\x00\\x00\\xa2\\xc0\\x14\\xc0\\n\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00:\\x00\\x89\\xc0\\x0f\\xc0\\x05\\x005\\x00\\x84\\xc0\\x13\\xc0\\t\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\\x00D\\x00C\\x00B\\xc0\\x18\\x004\\x00\\x9b\\x00F\\xc0\\x0e\\xc0\\x04\\x00/\\x00\\x96\\x00A\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x16\\x00\\x18\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\xc0\\x12\\xc0\\x08\\x00\\x16\\x00\\x13\\x00\\x10\\x00\\r\\xc0\\x17\\x00\\x1b\\xc0\\r\\xc0\\x03\\x00\\n\\x00\\x15\\x00\\x12\\x00\\x0f\\x00\\x0c']\n", + "Bad pipe message: %s [b'^Ii\\xb2J\\xe30\\x9f\\xd7\\xe2\\xc0\\x8d&\\xd2\\x92\\xdb\\xa5\\\\\\x00\\x00']\n", + "Bad pipe message: %s [b\"0\\xc0,\\xc0(\\xc0$\\xc0\\x14\\xc0\\n\\x00\\xa5\\x00\\xa3\\x00\\xa1\\x00\\x9f\\x00k\\x00j\\x00i\\x00h\\x009\\x008\\x007\\x006\\x00\\x88\\x00\\x87\\x00\\x86\\x00\\x85\\xc0\\x19\\x00\\xa7\\x00m\\x00:\\x00\\x89\\xc02\\xc0.\\xc0*\\xc0&\\xc0\\x0f\\xc0\\x05\\x00\\x9d\\x00=\\x005\\x00\\x84\\xc0/\\xc0+\\xc0'\\xc0#\\xc0\\x13\\xc0\\t\\x00\\xa4\\x00\\xa2\\x00\\xa0\\x00\\x9e\\x00g\\x00@\\x00?\\x00>\\x003\\x002\\x001\\x000\\x00\\x9a\\x00\\x99\\x00\\x98\\x00\\x97\\x00E\\x00D\\x00C\\x00B\\xc0\\x18\\x00\\xa6\\x00l\\x004\\x00\\x9b\\x00F\\xc01\\xc0-\\xc0)\\xc0%\\xc0\\x0e\\xc0\\x04\\x00\\x9c\\x00<\\x00/\\x00\\x96\\x00A\\x00\\x07\\xc0\\x11\\xc0\\x07\\xc0\\x16\\x00\\x18\\xc0\\x0c\\xc0\\x02\\x00\\x05\\x00\\x04\\xc0\\x12\\xc0\\x08\\x00\\x16\\x00\\x13\\x00\\x10\\x00\\r\\xc0\\x17\\x00\\x1b\\xc0\\r\\xc0\\x03\\x00\\n\\x00\\x15\\x00\\x12\\x00\\x0f\\x00\\x0c\\x00\\x1a\\x00\\t\\x00\\x14\\x00\\x11\\x00\\x19\\x00\\x08\\x00\\x06\\x00\\x17\\x00\\x03\\xc0\\x10\\xc0\\x06\\xc0\\x15\\xc0\\x0b\\xc0\\x01\\x00;\\x00\\x02\\x00\\x01\\x00\\xff\"]\n" + ] + } + ], + "source": [ + "import torch\n", + "import random\n", + "import torch\n", + "import torch.nn as nn\n", + "import math\n", + "import time\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# uncomment to download the datasets\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/test.txt\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/train.txt\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/valid.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# function to read in data, process each line and split columns by \" ||| \"\n", + "def read_data(filename):\n", + " data = []\n", + " with open(filename, \"r\") as f:\n", + " for line in f:\n", + " line = line.strip().split(\" \")\n", + " data.append(line)\n", + " return data\n", + "\n", + "# read the data\n", + "train_data = read_data('data/ptb/train.txt')\n", + "val_data = read_data('data/ptb/valid.txt')\n", + "\n", + "# creating the word and tag indices and special tokens\n", + "word_to_index = {}\n", + "index_to_word = {}\n", + "word_to_index[\"\"] = len(word_to_index)\n", + "index_to_word[len(word_to_index)-1] = \"\"\n", + "word_to_index[\"\"] = len(word_to_index) # add to dictionary\n", + "index_to_word[len(word_to_index)-1] = \"\"\n", + "\n", + "# create word to index dictionary and tag to index dictionary from data\n", + "def create_dict(data, check_unk=False):\n", + " for line in data:\n", + " for word in line:\n", + " if check_unk == False:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = len(word_to_index)\n", + " index_to_word[len(word_to_index)-1] = word\n", + " \n", + " # has no effect because data already comes with \n", + " # should work with data without already processed\n", + " else: \n", + " if word not in word_to_index:\n", + " word_to_index[word] = word_to_index[\"\"]\n", + " index_to_word[len(word_to_index)-1] = word\n", + "\n", + "create_dict(train_data)\n", + "create_dict(val_data, check_unk=True)\n", + "\n", + "# create word and tag tensors from data\n", + "def create_tensor(data):\n", + " for line in data:\n", + " yield([word_to_index[word] for word in line])\n", + "\n", + "train_data = list(create_tensor(train_data))\n", + "val_data = list(create_tensor(val_data))\n", + "\n", + "number_of_words = len(word_to_index)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In our implementation we are using batched training. There are a few differences from the original implementation found [here](https://github.com/neubig/nn4nlp-code/blob/master/02-lm/loglin-lm.py). " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "## define the model\n", + "\n", + "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", + "\n", + "# length of the n-gram\n", + "N = 2\n", + "\n", + "# logilinear model\n", + "class LogLinear(nn.Module):\n", + " def __init__(self, number_of_words, ngram_length):\n", + " super(LogLinear, self).__init__()\n", + "\n", + " # different lookups for each position in the n-gram\n", + " self.embeddings = nn.ModuleList([nn.Embedding(number_of_words, number_of_words) for _ in range(ngram_length)])\n", + " self.bias = torch.zeros(number_of_words, requires_grad=True).type(torch.FloatTensor).to(device)\n", + "\n", + " # initialize\n", + " for i in range(N):\n", + " nn.init.xavier_uniform_(self.embeddings[i].weight)\n", + "\n", + " def forward(self, x):\n", + " # calculate score\n", + " embs = torch.cat([lookup(x) for x, lookup in zip(x.T, self.embeddings)]).view(N, x.shape[0], -1) # N x batch_size x embedding_size\n", + " embs = torch.sum(embs, dim=0) # batch_size x embedding_size\n", + " scores = embs + self.bias\n", + " \n", + " return scores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model Settings and Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "model = LogLinear(number_of_words, N)\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=0.1)\n", + "criterion = torch.nn.CrossEntropyLoss()\n", + "\n", + "if torch.cuda.is_available():\n", + " model.to(device)\n", + "\n", + "# function to calculate the sentence loss\n", + "def calc_sent_loss(sent):\n", + " S = word_to_index[\"\"]\n", + " \n", + " # initial history is equal to end of sentence symbols\n", + " hist = [S] * N\n", + " \n", + " # collect all target and histories\n", + " all_targets = []\n", + " all_histories = []\n", + " \n", + " # step through the sentence, including the end of sentence token\n", + " for next_word in sent + [S]:\n", + " all_histories.append(list(hist))\n", + " all_targets.append(next_word)\n", + " hist = hist[1:] + [next_word]\n", + "\n", + " logits = model(torch.LongTensor(all_histories).to(device))\n", + " loss = criterion(logits, torch.LongTensor(all_targets).to(device))\n", + "\n", + " return loss\n", + "\n", + "MAX_LEN = 100\n", + "# Function to generate a sentence\n", + "def generate_sent():\n", + " S = word_to_index[\"\"]\n", + " hist = [S] * N\n", + " sent = []\n", + " while True:\n", + " logits = model(torch.LongTensor([hist]).to(device))\n", + " p = torch.nn.functional.softmax(logits) # 1 x number_of_words\n", + " next_word = p.multinomial(num_samples=1).item()\n", + " if next_word == S or len(sent) == MAX_LEN:\n", + " break\n", + " sent.append(next_word)\n", + " hist = hist[1:] + [next_word]\n", + " return sent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iter 0: train loss/word=0.3799, ppl=1.4621\n", + "iter 0: dev loss/word=0.3860, ppl=1.4710, time=1.20s\n", + "the dollar and it was n't the only at the national last to the \n", + "i think the importance of \n", + "the dollar began friday on a new\n", + "the purchase of the transaction\n", + "but even mr. boren added combination wall street firms developed clarify judgment roads current joel announce services enthusiasts jeffrey trades nor quite school highlight co-chief manpower unveil frustration plunged admits investigator spent sdi museum exchanged passenger interpublic interbank prosecutorial undo earn base relevant ounces cray-3 cellular harmful ultimate wells co\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/envs/nlp/lib/python3.7/site-packages/ipykernel_launcher.py:38: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "iter 1: train loss/word=0.3836, ppl=1.4676\n", + "iter 1: dev loss/word=0.3949, ppl=1.4842, time=1.18s\n", + "the which has been a recently acquired british air is said a big stance major market activity\n", + "i dominates so slightly in the current N N\n", + "consumer product sales global viewpoint wastewater sensitive sweetened professionals mather are all of them and to the move quickly unprecedented maturing in N as N in cash and that 's not\n", + "an incident in N to $ N\n", + "an incident in N the increase in and just in panama\n", + "iter 2: train loss/word=0.3819, ppl=1.4650\n", + "iter 2: dev loss/word=0.4004, ppl=1.4924, time=1.16s\n", + "perhaps he is n't a federal judge the end of the natural will probably try to gain from its traditional new york stock exchange composite trading\n", + "u.s. wants to buy her sell to the indictment\n", + "he is a so far has been painful for renaissance\n", + "am radio which has been dogged in beijing\n", + "it 's an in u.s.-soviet affairs\n", + "iter 3: train loss/word=0.3743, ppl=1.4539\n", + "iter 3: dev loss/word=0.4072, ppl=1.5027, time=1.14s\n", + "in january\n", + "coca-cola co. may the unit of fees N N because of increased N N N to N times quarter when demand met with soviet counterparts must be doing that they could not been fully discounted stripped mississippi inventory accessible caribbean anti-nuclear mitchell said it is a net loss of $ N billion from $ N billion a temporary reduction when it the mine was preparing to meet with a 's chairman and chief executive officer\n", + "the remainder of the common stock reflecting a dramatic fight trecker sent often \n", + "in january N of the to systems inc. a widely test pills basic potential clients on top of N \n", + "kidder spokesman said the u.s. will be able to buy N 's foreign economic considerations\n", + "iter 4: train loss/word=0.3873, ppl=1.4730\n", + "iter 4: dev loss/word=0.4137, ppl=1.5125, time=1.17s\n", + "this has n't been in the credit-card 's focus on the new agreement will give them\n", + "as the u.s.\n", + " following a who was the of as much of the new company will have about $ N billion yen\n", + "as its vice president at the securities\n", + "goodyear 's steady revenues buoyed italian upgrade cananea worrisome stop-loss wealthy disclosed inviting i. building appeals in the year-ago period\n", + "iter 5: train loss/word=0.3804, ppl=1.4629\n", + "iter 5: dev loss/word=0.4196, ppl=1.5214, time=1.16s\n", + "imports were allegedly getting insurance against environmental disaster the price was a of highway is set at up $ N billion\n", + "the acquisition of says a spokesman fla. is part of an his previous positions\n", + "whatever a single court in new york another individual said\n", + "some small that it is but we have inc. of its his for their buying french selling and N to N N down from N million or N cents a share on revenue in the insurance companies and the term bonds due nov. N\n", + "mr. had been executive\n", + "iter 6: train loss/word=0.3648, ppl=1.4402\n", + "iter 6: dev loss/word=0.4255, ppl=1.5303, time=1.14s\n", + "the government would walk into kabul\n", + "according to west\n", + "now\n", + "the practice is known as a lot of power off\n", + "the government would walk into kabul\n", + "iter 7: train loss/word=0.3834, ppl=1.4672\n", + "iter 7: dev loss/word=0.4299, ppl=1.5371, time=1.17s\n", + "he was also to have suffered some sort of friendly japanese companies\n", + "they are the first time to consider the bill says\n", + "the move quickly made influential enemies are engaged in a u.s. appellate court ruling against the mark\n", + " the aircraft and five N \n", + " & co. and its international business machines corp. which is the us in a national debt ceiling to see the glass house\n", + "iter 8: train loss/word=0.3771, ppl=1.4580\n", + "iter 8: dev loss/word=0.4344, ppl=1.5440, time=1.16s\n", + "and some of the investment in\n", + "a few weeks ago\n", + "some of the day of new york city 's problem\n", + "in the case of a crime and that is almost all but the mothers of several times as fast and others if not given chivas for christmas\n", + "an estimated N N\n", + "iter 9: train loss/word=0.3709, ppl=1.4491\n", + "iter 9: dev loss/word=0.4374, ppl=1.5486, time=1.17s\n", + " case says\n", + "her story\n", + "commodore international fell N\n", + "short-term rates\n", + "market 's recent troubles which have included in the present when rates seem headed down\n" + ] + } + ], + "source": [ + "# start training\n", + "for ITER in range (10): # CHANGE to 100\n", + " # training\n", + " random.shuffle(train_data)\n", + "\n", + " model.train()\n", + " train_words, train_loss = 0, 0.0\n", + " for sent_id, sent in enumerate(train_data[1:1000]): # CHANGE to all train_data\n", + " \n", + " my_loss = calc_sent_loss(sent)\n", + " \n", + " train_loss += my_loss.item()\n", + " train_words += len(sent)\n", + "\n", + " optimizer.zero_grad()\n", + " my_loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (sent_id+1) % 5000 == 0:\n", + " print(\"--finished %r sentences\" % (sent_id+1))\n", + " print(\"iter %r: train loss/word=%.4f, ppl=%.4f\" % (ITER, train_loss/train_words, math.exp(train_loss/train_words)))\n", + "\n", + " # evaluation\n", + " model.eval()\n", + " dev_words, dev_loss = 0, 0.0\n", + " start = time.time()\n", + " for sent_id, sent in enumerate(val_data):\n", + " my_loss = calc_sent_loss(sent)\n", + " dev_loss += my_loss.item()\n", + " dev_words += len(sent)\n", + " print(\"iter %r: dev loss/word=%.4f, ppl=%.4f, time=%.2fs\" % (ITER, dev_loss/dev_words, math.exp(dev_loss/dev_words), time.time()-start))\n", + "\n", + " # Generate a few sentences\n", + " for _ in range(5):\n", + " sent = generate_sent()\n", + " print(\" \".join([index_to_word[x] for x in sent]))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nlp", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "154abf72fb8cc0db1aa0e7366557ff891bff86d6d75b7e5f2e68a066d591bfd7" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/maths/README.md b/notebooks/maths/README.md new file mode 100644 index 0000000..d9e87ff --- /dev/null +++ b/notebooks/maths/README.md @@ -0,0 +1 @@ +All notebooks in this folder are tagged as work in progress [WIP] and contain very minimal rough notes and code snippets that may contain innaccuracies and errors. diff --git a/notebooks/maths/algebra.ipynb b/notebooks/maths/algebra.ipynb new file mode 100644 index 0000000..e262e63 --- /dev/null +++ b/notebooks/maths/algebra.ipynb @@ -0,0 +1,857 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Notes for Algebra with Python [WIP]\n", + "\n", + "The following are a set of notes for understanding some foundational concepts of Algebra using Python with a focus on ML. \n", + "\n", + "References: \n", + "\n", + "- [Chapter 4 - Functions and Algebra with Python](https://learning.oreilly.com/library/view/the-statistics-and/9781800209763/B15968_04_Final_RK.xhtml#_idParaDest-95)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Functions\n", + "\n", + "Functions are used to map from one mathematical object to another. In ML, it's important to understand the concept of function as we use them a lot. In fact, for a lot of ML concepts we are essentially tying functions together mapping inputs to outputs. Let's start with some basic concept of functions, then gradually make our way into the more common functions applied in ML." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Example of a squaring function:\n", + "\n", + "$$\n", + "f(x) = x^2\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# import your main libraries\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "4\n" + ] + } + ], + "source": [ + "# simple squaring function\n", + "def f(x):\n", + " return x**2\n", + "\n", + "# test the function\n", + "x = 2\n", + "print(f(x))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's visualize what that looks like:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# visualize using matplotlib\n", + "\n", + "x = np.linspace(-10, 10, 100)\n", + "y = f(x)\n", + "plt.plot(x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A constant function:\n", + "\n", + "$$\n", + "f(x) = c\n", + "$$\n", + "\n", + "with $c$ being a constant." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# a constant function\n", + "def f(x):\n", + " return 2 # constant\n", + "\n", + "# test the function\n", + "x = 2\n", + "\n", + "# visualize using matplotlib\n", + "x = np.linspace(-10, 10, 100)\n", + "y = [f(x_i) for x_i in x]\n", + "\n", + "plt.plot(x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A linear function:\n", + "\n", + "$$\n", + "f(x) = mx + c\n", + "$$\n", + "\n", + ", with $m$ being the slope and $c$ being the y-intercept -- both constants." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# a linear function\n", + "def f(x, m, c):\n", + " return m*x + c\n", + "\n", + "# test the function\n", + "x = 2\n", + "m = 3\n", + "c = 4\n", + "\n", + "# visualize using matplotlib\n", + "x = np.linspace(-10, 10, 100)\n", + "y = [f(x_i, m, c) for x_i in x]\n", + "\n", + "plt.plot(x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Polynomial functions:\n", + "\n", + "$$\n", + "f(x) = a_0 + a_1x + a_2x^2 + \\cdots + a_nx^n\n", + "$$\n", + "\n", + "with $a_0, a_1, a_2, ..., a_n$ being constants. And $n$ is the degree of the polynomial." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# a polynomial function\n", + "def f(x, a, b, c):\n", + " # a, b, c are coefficients\n", + " return a*x**2 + b*x + c\n", + "\n", + "# test the function\n", + "x = 2\n", + "a = 8\n", + "b = 1\n", + "c = 8\n", + "\n", + "# visualize using matplotlib\n", + "x = np.linspace(-10, 10, 100)\n", + "y = [f(x_i, a, b, c) for x_i in x]\n", + "\n", + "plt.plot(x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Logarithmic functions:\n", + "\n", + "$$\n", + "f(x) = c\\log_a x\n", + "$$\n", + "\n", + "with $c$ and $a$ being constants, $log_a$ is the logarithm function with base $a$." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# a logarithmic function\n", + "def f(x, a):\n", + " return a*np.log(x) # natural log\n", + "\n", + "# test the function\n", + "x = 2\n", + "a = 3\n", + "\n", + "# visualize using matplotlib\n", + "x = np.linspace(0.1, 10, 100)\n", + "y = [f(x_i, a) for x_i in x]\n", + "\n", + "plt.plot(x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Exponential functions:\n", + "\n", + "$$\n", + "f(x) = c\\cdot a^x\n", + "$$\n", + "\n", + "with $c$ and $a$ being constants." + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAEDCAYAAAAcI05xAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAUy0lEQVR4nO3dfXBld33f8fdXWu2TH3bXXQFrr+01KSGkNH6IQgkhDDUJMS7FpIOp0zalhRkPzdCBTjMZGNpM2vxT0pahD0nplniStAyYmEA9jKljEgihEwxrs2t7bWyvjd198lrGSNq1dbX34ds/7pEsC2l1r1bn3t/i92tGs1f3nHvPV+ee/ein3/md84vMRJJUrpFhFyBJOjODWpIKZ1BLUuEMakkqnEEtSYUzqCWpcLUFdUTcEhFPR8QDPaz7poi4NyJaEfGuRc9fXj2/PyIORsT766pXkkoVdY2jjog3AaeAP8rM166y7h7gQuDXgdsz87bq+Y1VjXMRcT7wAPCGzDxWS9GSVKDaWtSZ+XXg2cXPRcSPRcT/iYh7IuIvI+InqnWfyMz7gM6S9zidmXPVt5vqrFeSSjXo4NsL/PPM/Gm6reffW+0FEXFpRNwHHAY+Zmta0kvNhkFtqOq6eAPwxxEx//Sm1V6XmYeBn4qIi4EvRsRtmXmivkolqSwDC2q6rfepzLxqLS/OzGPVicmfB25bz8IkqWQD6/rIzBngexFxI0B0XXmm10TE7ojYUj3eAbwReLj2YiWpIHUOz/sM8FfAqyPiSES8D/iHwPsi4gBwELihWvdnIuIIcCPw3yPiYPU2rwHurtb/C+A/ZOb9ddUsSSWqbXieJGl9ONxNkgpXy8nEnTt35p49e+p4a0n6kXTPPfc8k5njyy2rJaj37NnDvn376nhrSfqRFBFPrrTMrg9JKpxBLUmFM6glqXAGtSQVzqCWpMIZ1JJUOINakgpnUEvSOrjrwRN88i8eq+W9DWpJWgd/9tAJbvnG92p5b4NaktZBo9lm01g9kWpQS9I6mGt12LxhtJb3NqglaR00mm02jxnUklSsRrPDZrs+JKlcjZYtakkqWqPZYdMGW9SSVKy5VptNtqglqVxzTUd9SFLRuqM+htz1ERGjEfGdiPhSLZVI0jms0WyzqYAW9QeBh2qpQpLOcXOtIQ/Pi4jdwN8BPlVLFZJ0Dmu1O7Q6OfTheZ8AfgPorLRCRNwcEfsiYt/k5OR61CZJ54RGqxuNQ2tRR8Tbgacz854zrZeZezNzIjMnxsfH161ASSpdo9kGGGof9c8B74iIJ4DPAtdGxP+qpRpJOgfNDbtFnZkfyczdmbkHuAn488z8R7VUI0nnoPkW9bD7qCVJK6i762NDPytn5teAr9VSiSSdoxrNbteHEwdIUqHmWlXXRwEXvEiSljHXHPLJREnSmXkyUZIK12gZ1JJUtIWTiU4cIEllmrPrQ5LKNvR7fUiSzqyEe31Iks6g0ewwNhqMjkQt729QS9JZmmu1a7vYBQxqSTprjWanthnIwaCWpLM2V+PEtmBQS9JZa7TatY2hBoNaks7aXLNT2xhqMKgl6aw1Wm2DWpJK1mh27KOWpJI1mu3aLnYBg1qSztpcyxa1JBWt0fSCF0kqmhe8SFLh5pqOo5akojk8T5IK1u4kzXZ6MlGSSjVX83yJYFBL0lmZny9xs33UklSmhdldbFFLUpnmap4vEQxqSTor8y1qL3iRpEItBLVdH5JUpvmTiV7wIkmFmh+e58lESSrUwvA8TyZKUpm84EWSCrcwjto+akkq0wvjqG1RS1KRHJ4nSYUr4l4fEbE5Ir4VEQci4mBE/JvaqpGkc0yj2WZ0JNgwWl9Qb+hhnTng2sw8FRFjwDci4suZ+c3aqpKkc0Sj2am1NQ09BHVmJnCq+nas+so6i5Kkc8VczbO7QI991BExGhH7gaeBuzLz7mXWuTki9kXEvsnJyXUuU5LK1Gh2ygjqzGxn5lXAbuB1EfHaZdbZm5kTmTkxPj6+zmVKUpkarTabarwqEfoc9ZGZU8BXgetqqUaSzjHdGciH3KKOiPGI2F493gL8IvDdWquSpHPEXKtT630+oLdRH7uAP4yIUbrB/rnM/FKtVUnSOaLRbNc6aQD0NurjPuDqWquQpHNUo9lh5/m9tHnXzisTJeksNEroo5YkrWwQfdQGtSSdhUazkAteJEnLM6glqXCNVqfWSQPAoJakNet0ktOtTq0T24JBLUlrdrpd/8S2YFBL0potzO7i8DxJKtP87C5F3ZRJkvQCW9SSVLhBzEAOBrUkrdkLM5Db9SFJRZoPau/1IUmFarQcnidJRZtb6PqwRS1JRbJFLUmFs49akgo33/XhBS+SVCjHUUtS4bwyUZIK12h2GAkYG41at2NQS9IazU9sG2FQS1KRGq127UPzwKCWpDWba3ZqP5EIBrUkrVmjZVBLUtGen2uxxaCWpHLNNJps2zJW+3YMaklao5nZFhdu2VD7dgxqSVqjmUaTCzfbopakYs3MNrnQrg9JKlOr3eG5021b1JJUqpONFoB91JJUqplGE8AWtSSVamZ2vkVtUEtSkV5oUdv1IUlFmpmtgtoWtSSVaaFFXUJQR8SlEfHViHgwIg5GxAdrr0qSCrfQRz2Aro9ettAC/mVm3hsRFwD3RMRdmflgzbVJUrFmGk1GAs7bWEAfdWYez8x7q8cngYeAS+ouTJJKNjPb5ILNY4yM1Du7C/TZRx0Re4CrgbuXWXZzROyLiH2Tk5PrVJ4klWmm0RrInfOgj6COiPOBzwMfysyZpcszc29mTmTmxPj4+HrWKEnF6d7no/5uD+gxqCNijG5Ifzoz/6TekiSpfNOzg7lzHvQ26iOA3wceysyP11+SJJVvULc4hd5a1D8H/CpwbUTsr76ur7kuSSraoCYNgB6G52XmN4D6T2tK0jmktBa1JGmRZrvD86fbA7kqEQxqSerbwr2oB3BVIhjUktS3Qd6QCQxqSerbICcNAINakvo2yEkDwKCWpL69cItT+6glqUgLfdR2fUhSmQY5aQAY1JLUt5nZFqMjwXkbRweyPYNakvrUvSpxA91bIdXPoJakPnVvcTqYbg8wqCWpbzON1sBOJIJBLUl9G+SkAWBQS1LfBnnnPDCoJalvM7N2fUhS0WYadn1IUrEW7kVti1qSyjToW5yCQS1JfZmZnzTArg9JKtOgb8gEBrUk9WXQN2QCg1qS+rIwaYAtakkq06AnDQCDWpL6Mt9Hvc2uD0kq00yjyYaRYMvYYO5FDQa1JPVlZrbFhVvGBnYvajCoJakv85MGDJJBLUl9GPSkAWBQS1Jfnjl1mh1bNw50mwa1JPXh+PQsF2/fMtBtGtSS1KNGs80zp05z8bbNA92uQS1JPXpqugHALlvUklSmY9OzAFy83Ra1JBXp2FS3RX3xNlvUklSk41PdFvUr7KOWpDIdm55l5/kb2TzAy8fBoJaknh2barBrwN0eYFBLUs+OTc0O/EQi9BDUEXFLRDwdEQ8MoiBJKlFmcmxqttgW9R8A19VchyQVbabR4rnTbS4Z8Bhq6CGoM/PrwLMDqEWSinW8GkO9q8Suj15FxM0RsS8i9k1OTq7X20pSEY5NzV/sUmCLuleZuTczJzJzYnx8fL3eVpKKMKyLXcBRH5LUk+PTs2wYCcYv2DTwbRvUktSDY1MNXn7hZkZHBjcF17xehud9Bvgr4NURcSQi3ld/WZJUlmGNoQZYdeKvzPyVQRQiSSU7Nj3LNZftGMq27fqQpFV0OslT08O5fBwMakla1TOn5mi2k0uG1PVhUEvSKo7Nz+xii1qSyjR/scswrkoEg1qSVjUf1MO4zwcY1JK0qmNTDbaMjbJty9hQtm9QS9Iqjk93x1BHDP5iFzCoJWlVR6dmh3IzpnkGtSSdQbPd4ZETJ3nVyy4YWg0GtSSdwSMnTtJodrjy0m1Dq8GglqQzOHB4GoCrLt0+tBoMakk6gwOHp9i+dYzLLto6tBoMakk6gwNHprhy9/ahjfgAg1qSVvTcXItHTpzkyiF2e4BBLUkreuDoNJ2Eq4Z4IhEMakla0YEjUwD81O7tQ63DoJakFRw4PM3uHVvYef7g50lczKCWpBXsPzw19P5pMKglaVmTJ+c4OjXLVUPu9gCDWpKWdV/VP22LWpIKdeDwFCMBr73kwmGXYlBL0nL2H5nmx19+AVs3bhh2KQa1JC31/OkW9z75A66+bMewSwEMakn6IXfc/xSn5lq886qLh10KYFBL0g/53LcPc8XO83jdFRcNuxTAoJakF3l88hTfeuJZbpzYPdQbMS1mUEvSIp/bd4TRkeBd1+wedikLDGpJqrTaHT5/7xHe/OPjvOzCzcMuZ4FBLUmVrz08yeTJOd79M5cOu5QXMaglqXLrvsPsPH8T1/7Ey4ZdyosY1JIE3P349/nKQye4cWI3Y6NlRWNZ1UjSEEzPNvkXt+7n8ou28oG//deHXc4PGf61kZI0ZP/6iw9w4uQct73/ZzlvU3mxaIta0kvaF79zlNsPHONDb3lVMZeML2VQS3rJuvPgU3z0C/czcfkOfq3ALo955bXxJalmrXaH37nzYfZ+/XGu3L2N//oPrmF0pIyrEJdjUEt6ychM/u+h7/OJrzzCvid/wK++/nL+1dtfw6YNo8Mu7Yx6CuqIuA74T8Ao8KnM/He1ViVJ66TTSR6bPMVfPvoMn777SR6bfI6LztvIJ/7+Vbzz6kuGXV5PVg3qiBgFfhf4ReAI8O2IuD0zH6y7OEk6k2a7w2yzzfNzbaZnm0zPNnn2uTmO/GCWo1OzfO+Z5/jO/5tierYJdKfV+vi7r+T6v7mLzWNlt6IX66VF/TrgUGY+DhARnwVuANY9qP/uf/kGjWZ7vd9WUk2y3/XzhVfkoge5ZHkCnUw6ne5z7UzaHWh3OrQ6SaudNNvdxyvZunGUyy7aytte+wquuXwHE5fv4JXj5/dZcRl6CepLgMOLvj8C/K2lK0XEzcDNAJdddtmaivmx8fM43e6s6bWShiPo8yRc/PDDiFj0uPvvaAQEjEQwGsHISDA6AmOjI9VXsGVslM3V1/atY2zbMsaOrRu5ZPsWtm8dK+Y2pWdr3U4mZuZeYC/AxMREv79oAfjETVevVzmS9COjl3HUR4HFt5LaXT0nSRqAXoL628CrIuKKiNgI3ATcXm9ZkqR5q3Z9ZGYrIj4A3El3eN4tmXmw9sokSUCPfdSZeQdwR821SJKW4b0+JKlwBrUkFc6glqTCGdSSVLhYfEnnur1pxCTw5BpfvhN4Zh3LWS/W1R/r6o919edHsa7LM3N8uQW1BPXZiIh9mTkx7DqWsq7+WFd/rKs/L7W67PqQpMIZ1JJUuBKDeu+wC1iBdfXHuvpjXf15SdVVXB+1JOnFSmxRS5IWMaglqXBDCeqIuDEiDkZEJyImliz7SEQcioiHI+KXVnj9FRFxd7XerdXtV9e7xlsjYn/19URE7F9hvSci4v5qvX3rXccy2/utiDi6qLbrV1jvumofHoqIDw+grn8fEd+NiPsi4gsRsX2F9Qayv1b7+SNiU/UZH6qOpT111bJom5dGxFcj4sHq+P/gMuu8OSKmF32+v1l3XdV2z/i5RNd/rvbXfRFxzQBqevWi/bA/ImYi4kNL1hnI/oqIWyLi6Yh4YNFzF0XEXRHxaPXvjhVe+55qnUcj4j1rKiAzB/4FvAZ4NfA1YGLR8z8JHAA2AVcAjwGjy7z+c8BN1eNPAv+s5nr/I/CbKyx7Atg5wH33W8Cvr7LOaLXvXglsrPbpT9Zc11uBDdXjjwEfG9b+6uXnB34N+GT1+Cbg1gF8druAa6rHFwCPLFPXm4EvDep46vVzAa4Hvkx39qzXA3cPuL5R4Cm6F4UMfH8BbwKuAR5Y9NzvAB+uHn94uWMeuAh4vPp3R/V4R7/bH0qLOjMfysyHl1l0A/DZzJzLzO8Bh+hOrrsgupOgXQvcVj31h8A766q12t67gc/UtY0aLExInJmngfkJiWuTmX+ama3q22/SnQloWHr5+W+ge+xA91h6S9Q8wV5mHs/Me6vHJ4GH6M5Jei64Afij7PomsD0idg1w+28BHsvMtV7xfFYy8+vAs0ueXnwMrZRDvwTclZnPZuYPgLuA6/rdfml91MtNpLv0QP5rwNSiUFhunfX088CJzHx0heUJ/GlE3FNN8DsIH6j+/LxlhT+3etmPdXov3dbXcgaxv3r5+RfWqY6labrH1kBUXS1XA3cvs/hnI+JARHw5Iv7GgEpa7XMZ9jF1Eys3loaxvwBenpnHq8dPAS9fZp112W/rNrntUhHxFeAVyyz6aGb+77q2248ea/wVztyafmNmHo2IlwF3RcR3q9++tdQF/Dfgt+n+x/ptut0y7z2b7a1HXfP7KyI+CrSAT6/wNuu+v841EXE+8HngQ5k5s2TxvXT/vD9VnX/4IvCqAZRV7OdSnYN6B/CRZRYPa3+9SGZmRNQ21rm2oM7MX1jDy3qZSPf7dP/s2lC1hNY82e5qNUbEBuDvAT99hvc4Wv37dER8ge6f3Wd1gPe67yLifwBfWmZRLRMS97C//gnwduAtWXXQLfMe676/ltHLzz+/zpHqc95G99iqVUSM0Q3pT2fmnyxdvji4M/OOiPi9iNiZmbXegKiHz2WYk1y/Dbg3M08sXTCs/VU5ERG7MvN41Q309DLrHKXbjz5vN91zc30prevjduCm6oz8FXR/M35r8QpVAHwVeFf11HuAulrovwB8NzOPLLcwIs6LiAvmH9M9ofbAcuuulyX9gr+8wvYGPiFxRFwH/Abwjsx8foV1BrW/evn5b6d77ED3WPrzlX65rJeqD/z3gYcy8+MrrPOK+b7yiHgd3f+jtf4C6fFzuR34x9Xoj9cD04v+7K/bin/VDmN/LbL4GFoph+4E3hoRO6puyrdWz/Wn7rOlK5xB/WW6fTVzwAngzkXLPkr3jP3DwNsWPX8HcHH1+JV0A/wQ8MfApprq/APg/Uueuxi4Y1EdB6qvg3S7AOred/8TuB+4rzpQdi2tq/r+erqjCh4bUF2H6PbF7a++Prm0rkHur+V+fuDf0v1FArC5OnYOVcfSKwewj95It8vqvkX76Xrg/fPHGfCBat8coHtS9g0DqGvZz2VJXQH8brU/72fRaK2aazuPbvBuW/TcwPcX3V8Ux4FmlV3vo3tO48+AR4GvABdV604An1r02vdWx9kh4J+uZfteQi5JhSut60OStIRBLUmFM6glqXAGtSQVzqCWpMIZ1JJUOINakgr3/wG0dysi3tZLLAAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# an exponential function\n", + "def f(x, a, c):\n", + " return c*np.exp(a*x)\n", + "\n", + "# test the function\n", + "x = 2\n", + "a = 3\n", + "c = 4\n", + "\n", + "# visualize using matplotlib\n", + "x = np.linspace(-10, 10, 100)\n", + "y = [f(x_i, a, c) for x_i in x]\n", + "\n", + "plt.plot(x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Function Roots" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Check if a specific function has root. Below we are checking linear function which has a unique root of $x = -c/m$." + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(-1.3333333333333333, 0.0, 'x = -c/m-1.3333333333333333')" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# function to find root of a linear function\n", + "\n", + "def f(x, m, c):\n", + " return m*x + c\n", + "\n", + "def has_root(x, m, c):\n", + " return x == -c/m\n", + "\n", + "# generate range of values that includes root of linear function\n", + "x = np.linspace(-10, 10, 100)\n", + "\n", + "# add root to the list\n", + "x = np.append(x, -c/m)\n", + "\n", + "# order the list of values\n", + "x = np.sort(x)\n", + "\n", + "y = [has_root(x_i, m, c) for x_i in x]\n", + "\n", + "# y to visualize\n", + "y = [f(x_i, m, c) for x_i in x]\n", + "\n", + "# visualize using matplotlib, with red dots at roots\n", + "plt.plot(-c/m, f(-c/m, m, c), 'r.')\n", + "plt.plot(x, y)\n", + "\n", + "# label root value with text x = -c/m\n", + "plt.text(-c/m, f(-c/m, m, c), 'x = -c/m'+str(-c/m))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Transformations of Functions\n", + "\n", + "Transformations is an important concept where you take the output of one function and put it through another function." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Shifts happen when you move the function up or down, left or right. See example below of a vertical shift:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# function f(x) = x**2\n", + "def f(x):\n", + " return x**2\n", + "\n", + "# test the function\n", + "x = np.linspace(-4, 4, 100)\n", + "\n", + "# visualize using matplotlib\n", + "plt.plot(x, f(x))\n", + "\n", + "# plot function shifted by 1 unit vertically; use red\n", + "plt.plot(x, f(x)+1, 'r')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now we can do a horizontal shift:" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# plot function shifted by 2 units horizontally; use green\n", + "x = np.linspace(-5, 5, 100)\n", + "plt.plot(x, x**2, label='$f(x)=x^2$')\n", + "plt.plot(x, (x-2)**2, label='$f(x)=(x-2)^2$')\n", + "\n", + "plt.xlim(-6, 6)\n", + "plt.ylim(-2, 4)\n", + "\n", + "plt.plot(x, x*0, 'k', alpha=0.2)\n", + "plt.plot(x*0, x, 'k', alpha=0.2)\n", + "plt.legend()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Scaling transformation example:" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# vertical scaling\n", + "x = np.linspace(-6, 6, 100)\n", + "plt.plot(x, x**2)\n", + "plt.plot(x, (x**2)/2, 'g')\n", + "\n", + "plt.xlim(-6, 6)\n", + "plt.ylim(-2, 4)\n", + "\n", + "plt.plot(x, x*0, 'k', alpha=0.2)\n", + "plt.plot(x*0, x, 'k', alpha=0.2)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Scaled by negative constant" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# horizontal scaling\n", + "x = np.linspace(-6, 6, 100)\n", + "plt.plot(x, x**2)\n", + "plt.plot(x, -(x**2)/2, 'g')\n", + "\n", + "plt.xlim(-6, 6)\n", + "plt.ylim(-4, 4)\n", + "\n", + "plt.plot(x, x*0, 'k', alpha=0.2)\n", + "plt.plot(x*0, x, 'k', alpha=0.2)\n", + "\n", + "plt.plot(x, x**2, label='$f(x)=x^2$')\n", + "plt.plot(x, -(x**2)/2, 'g', label='$f(x)=-\\\\frac{1}{2}x^2$')\n", + "plt.legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plotting sine function" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x = np.linspace(-6, 6, 100)\n", + "plt.plot(x, np.sin(x), label='$f(x)=sin(x)$')\n", + "\n", + "plt.plot(x, x*0, 'k', alpha=0.2)\n", + "plt.plot(x*0, x, 'k', alpha=0.2)\n", + "\n", + "plt.legend()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another example of shifting transformation" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x = np.linspace(-6, 6, 100)\n", + "plt.plot(x, x**3 - x, label='$f(x)=x^3-x$')\n", + "\n", + "# horizontal shift\n", + "plt.plot(x, (x+2)**3 - (x+2), 'g', label='$f(x)=(x+2)^3-(x+2)$')\n", + "\n", + "plt.plot(x, x*0, 'k', alpha=0.2)\n", + "plt.plot(x*0, x, 'k', alpha=0.2)\n", + "\n", + "plt.ylim(-5, 5)\n", + "plt.legend()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Vertical scaling of sine function:" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x = np.linspace(-6, 6, 100)\n", + "\n", + "# sine with vertical scaling\n", + "plt.plot(x, np.sin(x), label='$f(x)=sin(x)$')\n", + "plt.plot(x, 2*np.sin(x), 'g', label='$f(x)=2sin(x)$')\n", + "\n", + "plt.plot(x, x*0, 'k', alpha=0.2)\n", + "plt.plot(x*0, x, 'k', alpha=0.2)\n", + "\n", + "plt.ylim(-5, 5)\n", + "plt.legend()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Exercises:\n", + "\n", + "- Plot the following function: $f(x) = x^2 + 2x + 1$ and find the roots (if any) of the function.\n", + "- A good way to understand function is to understand more closely their properties. Play around with the function by changing values and observe how they behave. Are any of the functions related to each other? How? \n", + "- Write a code that applies a [sigmoid](https://en.wikipedia.org/wiki/Sigmoid_function) function to a given input." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.15 ('math')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.15" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "885f0b8c324fe4d130bb8744e2598b34a480bb115953c09edacbc3cda2096502" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/maths/feature-scaling.ipynb b/notebooks/maths/feature-scaling.ipynb new file mode 100644 index 0000000..ac235fc --- /dev/null +++ b/notebooks/maths/feature-scaling.ipynb @@ -0,0 +1,291 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Feature Scaling [WIP]\n", + "\n", + "Deals with normalizing range of features or independent variables of data. In ML, feature scaling is used to \n", + "- ensure features contribute approximately proportionately to the final distance as opposed to one feature dominating; particularly important when euclidean distance is used\n", + "- help gradient descent converge faster, thus helping with the optimization problem and speeding up the learning process\n", + "- penalize coefficients appropriately when regularization is used as part of the loss function\n", + "\n", + "Source: [Wikipedia](https://en.wikipedia.org/wiki/Feature_scaling)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Mean normalization\n", + "\n", + "Mean normalization is a method used in normalization. The formula is as follows:\n", + "\n", + "$$\n", + "x^{\\prime}=\\frac{x-\\operatorname{average}(x)}{\\max (x)-\\min (x)}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([-0.5352, -0.0845, 0.0423, 0.1127, 0.4648])\n", + "tensor([-1.4786, -0.2335, 0.1167, 0.3113, 1.2840])\n" + ] + } + ], + "source": [ + "import torch\n", + "\n", + "x = torch.Tensor([4, 36, 45, 50, 75])\n", + "x_prime = (x - x.mean()) / (x.max() - x.min()) \n", + "print(x_prime)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Standardization (Z-score Normalization)\n", + "\n", + "This can be used to ensure that the values of each feature in the data have zero-mean and unit-variance. The equation is as follows:\n", + "$$\n", + "x^{\\prime}=\\frac{x-\\operatorname{average}(x)}{\\operatorname{standard deviation}(x)}\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([-1.4786, -0.2335, 0.1167, 0.3113, 1.2840])\n" + ] + } + ], + "source": [ + "import torch\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "x = torch.Tensor([4, 36, 45, 50, 75])\n", + "x_prime = (x - x.mean()) / x.std()\n", + "print(x_prime)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Detailed Examples\n", + "Here is an illustrated example with two features. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD4CAYAAAD8Zh1EAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAT50lEQVR4nO3dfYxcV33G8efBmHYJtFs1Vkk2XmypkasUtxiNEtBWiCK1TtKIWC6VEqlQKioLCiJQ6spBiFJUKW4j0bcgkEsiSkGBiqQrl6R1I20kCCKB9UswtkllIWi8RI1Ja4cII8Xh1z92nH2b3XnZO3PuOff7kVbZmbneORmtH5/7u79zriNCAID8vST1AAAA1SDQAaAQBDoAFIJAB4BCEOgAUIiXpnrjyy+/PLZs2ZLq7QEgS4cPH/5hRGzq9FqyQN+yZYtmZ2dTvT0AZMn291d7jZILABSCQAeAQhDoAFAIAh0ACkGgA0AhknW5jMr00TndeegJ/eDcBV05Pqa9O7dp146J1MMCgMoVHejTR+d0+/3HdeH5FyRJc+cu6Pb7j0sSoQ6gOEWXXO489MSLYX7Jhedf0J2Hnkg0IgAYnqID/QfnLvT1PADkrOiSy5XjY5rrEN5Xjo9lUVvPYYwA6qPoGfrends0tnHDkufGNm7Qb/7KJt1+/3HNnbug0EJtffroXJqBdnCp/l/nMQKol66Bbnuz7Ydtn7R9wvZtHY55k+3zto+1vz4ynOH2Z9eOCd2xe7smxsdkSRPjY7pj93Y9/J2zta+tU/8H0K9eSi4XJX0wIo7YfqWkw7YfioiTy477akTcVP0Q12fXjokVZYoPfPFYx2PrVFun/g+gX11n6BHxVEQcaX//I0mnJGVdyL1yfKyv51PIYYwA6qWvGrrtLZJ2SHqsw8tvsP247X+3/aur/Pk9tmdtz549e7b/0VZktdr63p3bEo1opRzGCKBeeu5ysf0KSfdJen9EPLvs5SOSXh0Rz9m+UdK0pKuX/4yIOCDpgCS1Wq0YdNDrdakEU+cOkhzGiLXRpYRRc0T3XLW9UdKXJR2KiI/3cPz3JLUi4oerHdNqtYIbXKBUy1cpS/NnWHfs3k6oY11sH46IVqfXeulysaS7JZ1aLcxtv6p9nGxf2/65zww+ZCBvdCkhhV5KLlOS3ibpuO1j7ec+JGlSkiLiU5LeKundti9KuiDpluhl6g8Uii4lpNA10CPiEUnucsxdku6qalDIX9Prx2utUgaGJfuVotNH5zS1f0Zb9z2gqf0zrKSsAVa50qWENLIOdIKjnqgfr75KuUlnKRi9rDfnWis4+IszXGuVVKgfz+u0ShkYpqxn6ARHGt3OjFjlCqSRdaATHGl0K6lQPwbSyDrQCY40up0ZUT8G0si6hs7y+DR6acmjfgyMXtaBLhEcKezdua3jsnbOjIC0sg90jB5nRkA9EegYCGdGQP0Q6KiFpm8VAFSBQEdyy7eavdTXLolQB/pAoCO5Xlb8MoMHuiPQkVy3vnZm8EBvsl5YhDJ0W/HLZl9Abwh0JNdtxS979gC9IdCRXLetAtizB+gNNXTUwlp97axMBXpDoKP2WJmKfjS5I4pARxZYmYpeNL0jiho6gGI0vSOKQAdQjKZ3RFFyQVaaXB9Fd73s1V8yZujIRrd7mQJNv4sZgY5sNL0+iu6afvtDSi7IRkn1UUpHw9Pkjihm6MhGKStGKR1hWAh0ZKOU+iilo+aZPjqnqf0z2rrvAU3tnxnaP96UXJCNUlaMllQ6QnejXOxEoCMrJdRHm95a1zS93MClKl1LLrY3237Y9knbJ2zf1uEY2/5726dtf8v26yodJVCQ3EtHoyoflGKUZ2S9zNAvSvpgRByx/UpJh20/FBEnFx1zg6Sr21/XSfpk+78Alsm5dNT0vVIGMcozsq6BHhFPSXqq/f2PbJ+SNCFpcaDfLOmzERGSHrU9bvuK9p8FsEyupaNRlg9KMcrtn/uqodveImmHpMeWvTQh6clFj8+0n1sS6Lb3SNojSZOTk30OFUBqXNDt3yjPyHoOdNuvkHSfpPdHxLODvFlEHJB0QJJarVYM8jMApMMF3cGM6oyspz502xs1H+afj4j7OxwyJ2nzosdXtZ8DUJDcL+iWrpcuF0u6W9KpiPj4KocdlPT2drfL6yWdp34OlKfpe6XUXS8llylJb5N03Pax9nMfkjQpSRHxKUkPSrpR0mlJP5b0h5WPFEAt5HpBtwl66XJ5RJK7HBOS3lPVoAAA/WMvFwAoBIEOAIUg0AGgEAQ6ABSC3RYzwN1tAPSCQK85NkMC0CtKLjXH3W0A9IoZuupd0mAzJAC9avwMve437C3lxsgAhq/xgV73kgabIQFp5XSHpsaXXOpe0qjr3W3qXKYCqpJbU0LjAz2H/Z3rthlSbr/kwKByu0NT40sulDT6V/cyFVCVup/BL9f4QGd/5/7l9ksODCq3poTGl1yk+pU06i6HMhVQhVHe4LkKjZ+ho3+UqdAUuZ3BM0NH3+raeQMMQ05n8AQ6BpLTLznQFJRcAKAQBDoAFIJAB4BCEOgAUAgCHQAKQaADQCEIdAAoBH3oAFChlFtLE+gAUJHUW0tTcgGAiqTeWppAB4CKpN5ampILssFt71B3qbeW7jpDt32P7adtf3uV199k+7ztY+2vj1Q/TDTdpdrk3LkLCi3UJut8w140T+qtpXspuXxG0vVdjvlqRLy2/fWx9Q8LWCp1bRLoRer907uWXCLiK7a3jGAswKpS1yaBXqXcWrqqGvobbD8u6QeS/jQiTnQ6yPYeSXskaXJysqK3RhOkrk0Ogpo/Rq2KLpcjkl4dEb8u6R8kTa92YEQciIhWRLQ2bdpUwVujKVLXJvtFzR8prDvQI+LZiHiu/f2DkjbavnzdIwMWSV2b7Bc1f6Sw7pKL7VdJ+p+ICNvXav4fiWfWPTJka1ilhpxue0fNHyl0DXTb90p6k6TLbZ+R9OeSNkpSRHxK0lslvdv2RUkXJN0SETG0EaPWUi99rosca/7IXy9dLrd2ef0uSXdVNiJkba1SQ5MCfe/ObUv+YZPqXfNHGVgpikpRaph36R8vulwwSgQ6KkWpYUFONX+Ugc25UKnc2gtLM310TlP7Z7R13wOa2j9Dm2TDMENHpSg1pMMFaRDoqBylhjRKuyDNStv+EehAIUq6IM3ZxmCooQOFWO3Cc44XpFlpOxgCHShESRekSzrbGCVKLsgS9dWVSrogTfvrYAh0ZIf66upKuSDNStvBUHJBdqivli+33TXrghk6skN9tRlKOdsYJWboyE5J3RxAlQh0ZKekbg6gSpRckJ2SujmAKhHoyBL1VWAlSi4AUAgCHQAKQaADQCEIdAAoBIEOAIUg0AGgEAQ6ABQi2z50tk8FgKWyDHS2TwWAlbIsubB9KgCslGWgs30qAKyUZaCzfSoArJRloLN9KgCslOVFUbZPBYCVuga67Xsk3STp6Yh4TYfXLenvJN0o6ceS3hERR6oe6HJsnwoAS/VScvmMpOvXeP0GSVe3v/ZI+uT6hwUA6FfXGXpEfMX2ljUOuVnSZyMiJD1qe9z2FRHxVFWDRP5SLgRjERqaoooa+oSkJxc9PtN+bkWg296j+Vm8JicnK3hr5CDlQjAWoaFJRtrlEhEHIqIVEa1NmzaN8q2RUMqFYCxCQ5NUEehzkjYvenxV+zlAUtqFYCxCQ5NUEegHJb3d814v6Tz1cyyWciEYi9DQJF0D3fa9kr4uaZvtM7bfaftdtt/VPuRBSd+VdFrSP0r646GNFllKuRCMRWhokl66XG7t8npIek9lI0JxUi4EYxEamsTzeTx6rVYrZmdnk7x3arTRoan43V8/24cjotXptSyX/ueMNjoMQw5Bye/+8GW5OVfOaKND1S4F5dy5CwotBOX00Xo1m/G7P3wE+oit1i43d+6CpvbP1O4vIeovl6CkhXT4CPQRW6tdrq4zK9RbLkFJC+nwEegj1qmNbrE6zqxyNn10TlP7Z7R13wPFngHlEpS0kA4fF0VHbHEb3VwmM6tcdboIt/dLj+ujB0/o/IXna3vxsF97d25b8v8p1TMo69xCmsNF5V7QtpjQ1P6ZjqE+MT6mr+17c4IRlWW1z3exsY0bdMfu7Vn+5V2slEBKYfk//FK9fy9oW6ypXGZWuerlTOdSiauOf3H7wQ1fBrfWReXcPlNq6Ant2jGhO3Zv18T4mKz5mXldZwU56rWGTImr2XK5qNwLZuiJMbMank5nQJ3U7eIhRuvK8bGOpbkcfy+YoaNYy8+AfuHlG7XxJV5yDCUulNR9wwwdRVt+BsTFQyxX5+6bftHlAgAZWavLhZILABSCQAeAQhDoAFAIAh0ACkGgA0AhaFsEkDVaURcQ6ACyxW3tlqLkAiBbudytaVQIdADZKmljrSoQ6ACylcvdmkaFQAeQrZI21qoCF0UBZKukjbWqkFWg054EYDnuKbAgm0CnPQkA1pZNoJd03z+gqTjLHq5sAp32JCBvnGUPX09dLravt/2E7dO293V4/R22z9o+1v76o6oHSnsSkDcWAQ1f10C3vUHSJyTdIOkaSbfavqbDoV+MiNe2vz5d8ThpTwIyx1n28PVScrlW0umI+K4k2f6CpJslnRzmwJajPamePjx9XPc+9qReiNAGW7det1l/uWt76mGhhq4cH9Nch/DmLLs6vQT6hKQnFz0+I+m6Dsf9ru03SvovSR+IiCeXH2B7j6Q9kjQ5Odn3YGlPqpcPTx/X5x797xcfvxDx4mNCHcvt3bltSQ1d4iy7alWtFP03SVsi4tckPSTpnzodFBEHIqIVEa1NmzZV9NZI5d7HVvybvebzaLZdOyZ0x+7tmhgfkyVNjI/pjt3bmaRVqJcZ+pykzYseX9V+7kUR8cyih5+W9NfrHxrq7oWIvp4HOMserl5m6N+UdLXtrbZfJukWSQcXH2D7ikUP3yLpVHVDRF1tsPt6HsBwdQ30iLgo6b2SDmk+qP8lIk7Y/pjtt7QPe5/tE7Yfl/Q+Se8Y1oBRH7det7mv5wEMlyPR6XGr1YrZ2dkk743q0OUCjJbtwxHR6vgagQ4A+Vgr0NkPHQAKkc1eLrlKsRkRGyABzUSgD1GKzYjYAAloLkouQ5RiMyI2QAKai0AfohSbEbEBEtBcBPoQpdjyl22GgeYi0IcoxZa/bDMM1Nf00TlN7Z/R1n0PaGr/jKaPznX/Q33gougQpdjyl22GsRgdT/UxioYFFhYBhVoeINL82Ro7HKYxtX+m437wE+Nj+tq+N/f8c1hYBDQQHU/1MoqGBQIdKBQdT/UyioYFAh0oFB1P9TKKhgUCHSgUHU/1Moo7NtHlAhRqvR1PdMgsqOqzGPYdmwh0oGCDBgh7Ai3I6bOg5AJgBTpkFgz6WQx7EVEnzNABrECHzIJBPotUs3pm6ABWGH/5xo7PN7FDZpBuoVRnOAQ6gCWmj87puZ9cXPH8xg1uZIfMIN1Cqc5wCHQAS9x56Ak9/9OVW4Jc9rKX1u4i4CgM0m6Yag0ANXQAS6w2izx/4fkRj6Q++u0W2rtzW8d9dIZ9hsMMHcASrDBdv1EsIuqEGTqAJVLNLksz7EVEnRDoAJZgT/18EehAw/SyjD3F7BLrR6ADDZLTMnb0r5GBzqZDaKq1FrzwdyB/jQt0ZihoMpb0l61xbYtsOoQmoyWxbD0Fuu3rbT9h+7TtfR1e/xnbX2y//pjtLZWPtCLMUNBk3PSibF0D3fYGSZ+QdIOkayTdavuaZYe9U9L/RcQvS/obSX9V9UCrwgwFTZZqwQtGo5ca+rWSTkfEdyXJ9hck3Szp5KJjbpb00fb3X5J0l21HxMoNIRJj0QSajpbEcvVScpmQ9OSix2faz3U8JiIuSjov6ReX/yDbe2zP2p49e/bsYCNeJ2YoAEo10i6XiDgg6YAktVqtZLN3ZigAStTLDH1O0uZFj69qP9fxGNsvlfTzkp6pYoAAgN70EujflHS17a22XybpFkkHlx1zUNIftL9/q6SZOtbPAaBkXUsuEXHR9nslHZK0QdI9EXHC9sckzUbEQUl3S/pn26cl/a/mQx8AMEI91dAj4kFJDy577iOLvv+JpN+rdmgAgH40bqUoAJTKqUrdts9K+n6SNx+NyyX9MPUgaoLPYgGfxQI+iwX9fBavjohNnV5IFuilsz0bEa3U46gDPosFfBYL+CwWVPVZUHIBgEIQ6ABQCAJ9eA6kHkCN8Fks4LNYwGexoJLPgho6ABSCGToAFIJAB4BCEOgVs32P7adtfzv1WFKzvdn2w7ZP2j5h+7bUY0rF9s/a/obtx9ufxV+kHlNKtjfYPmr7y6nHkprt79k+bvuY7dl1/Sxq6NWy/UZJz0n6bES8JvV4UrJ9haQrIuKI7VdKOixpV0Sc7PJHi2Pbki6LiOdsb5T0iKTbIuLRxENLwvafSGpJ+rmIuCn1eFKy/T1JrYhY9yIrZugVi4ivaH6DssaLiKci4kj7+x9JOqWVN0dphJj3XPvhxvZXI2dTtq+S9DuSPp16LKUh0DES7RuH75D0WOKhJNMuMxyT9LSkhyKiqZ/F30r6M0k/TTyOughJ/2n7sO096/lBBDqGzvYrJN0n6f0R8Wzq8aQSES9ExGs1f5OYa203riRn+yZJT0fE4dRjqZHfiIjXSbpB0nvaZduBEOgYqna9+D5Jn4+I+1OPpw4i4pykhyVdn3goKUxJeku7bvwFSW+2/bm0Q0orIuba/31a0r9KunbQn0WgY2jaFwLvlnQqIj6eejwp2d5ke7z9/Zik35L0naSDSiAibo+IqyJii+ZvhDMTEb+feFjJ2L6s3TAg25dJ+m1JA3fIEegVs32vpK9L2mb7jO13ph5TQlOS3qb5Wdix9teNqQeVyBWSHrb9Lc3f1vGhiGh8yx70S5Iesf24pG9IeiAi/mPQH0bbIgAUghk6ABSCQAeAQhDoAFAIAh0ACkGgA0AhCHQAKASBDgCF+H/SAjUyLUEVoAAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# x_1 feature with 50 random values in range (0, 5)\n", + "x_1 = torch.rand(50, 1) * 5\n", + "x_2 = torch.rand(50, 1) * 3\n", + "\n", + "# scatter plot\n", + "plt.scatter(x_1.numpy(), x_2.numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Calculate and subtract mean to get zero-mean." + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD4CAYAAADvsV2wAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAATWElEQVR4nO3db4xc11nH8d8PN6kWilggFo3Xbm0gGCKMMB2lrfymtKnsVihJDYUECVpoZV40Aipk5CgVSBXIRpF4gYiorDZq+aMkVUm3Rgkybd0qEtAoYxzq/KnBRKTxNKLbpi5UdZs4fXixs9n17OzO7M6de+495/uRouzce7X36GbnyTnPec65jggBAPL3fakbAACoBwEfAApBwAeAQhDwAaAQBHwAKMQrUjdgLddcc03s3LkzdTMAoFVOnz79tYjYOuxcYwP+zp071e12UzcDAFrF9jNrnSOlAwCFIOADQCEI+ABQCAI+ABSCgA8AhWhslU5d5s/0dNfJc/rKxUvaNjujw/t365a9c6mbBQCVKzrgz5/p6Y4HzurSiy9JknoXL+mOB85KEkEfQHaKTuncdfLcy8F+yaUXX9JdJ88lahEATE/RAf8rFy9t6DgAtFnRKZ1tszPqDQnu22ZnWpHbb0MbATRH0T38w/t3a+aqLVccm7lqi37xp7fqjgfOqnfxkkLLuf35M700DR1iaf6hyW0E0CxFB/xb9s7p6ME9mpudkSXNzc7o6ME9+tyXFhqf22f+AcBGFZ3SkRaD/mAa5P33Pzb02ibl9pl/ALBRRffw17JtdmZDx1NoQxsBNAsBf4i1cvuH9+9O1KLV2tBGAM1SfEpnmKUUT5MrYNrQRoxGpRXq5IhI3YahOp1O8AIU5Gxwpbe0OEo7enAPQR+bZvt0RHSGnSOlAyRCpRXqRsAHEqHSCnUjh48kyF2vv9IbmIbse/jzZ3rad+yUdh15UPuOnWIlagOwSngRlVaoW9YBn8DSTOSuF6210ru0kQ7qk3VKZ73AwpdqutZL2ZC7XjZspTcwLVn38AksaYwaWbFKGEgj64BPYEljVMqG3DWQRtYBn8CSxqiRFblrII2sc/hsP5DGOOWG5K6B+mUd8CUCSwqH9+8eumUAIysgrewDPurHyApoJgI+poKRFdA8lUza2r7H9ldtP77Gedv+C9vnbX/R9i9UcV+UgxXTwOSqqtL5qKQD65x/m6Tr+v8ckvRXFd0XBWDFNFCNSgJ+RDws6fl1LrlZ0l/Hoi9ImrV9bRX3Rv7G2YqBEQAwWl11+HOSnl3x+UL/2BVsH7Ldtd1dWFioqWloulF1/YwAgPE0auFVRByPiE5EdLZu3Zq6OWiIUSum2YwNGE9dAb8naceKz9v7x4CRRq2YZs8kYDx1BfwTkn6zX63zBknfjIjnaro3Wm7UVgzsmQSMp5I6fNv3SnqTpGtsX5D0x5KukqSI+JCkhyS9XdJ5Sd+W9FtV3BflWK+un5W9wHgqCfgRcduI8yHpfVXcCxjEyl5sVKmv2GSlLbLAyl6Ma6mqa2lEuFTVJSn7v6FGVekAwLSVXNVFwAdQlJKrukjpICul5mYxvnHe15ArevjIBituMY6S34RHwEc2Ss7NYnwlv2KTlA6ykVNultTUdJVa1UUPH9nIZcUtqSlMCwEf2cglN0tqqkx1bPFNSgfZyGXFbU6pKYynrsVgBHxkJYfcbMllg6Vab1RX5d8zKR2gYdqemuLtYxtX16iOHj7QMG1OTZW8T80k6hrVEfCBBmpraqqu1ERu6trim4APoDJMOG9OXaM6Aj6AyjDhvHl1jOqYtAVQmbZPOOeOHj6AyrR5wrkEBHwAlWrrhHMJSOkAQCEI+ABQCAI+ABSCgA8AhSDgA0AhqNLJAG9HAjAOAn7LsVkVgHGR0mk53o4EYFz08MfQ5JQJm1UBGBc9/BGa/kLpXF7cDWD6CPgjND1lwmZVQFptesNXJQHf9gHb52yft31kyPl3216w/Vj/n/dWcd86ND1lcsveOR09uEdzszOypLnZGR09uCd5yqlNXwJgs5qeARg0cQ7f9hZJd0t6q6QLkh61fSIinhy49P6IuH3S+9WtDft7N22zKiqHUIq2veGrih7+DZLOR8TTEfGCpPsk3VzB720EUiYb1/Q0GFCVpmcABlUR8OckPbvi84X+sUG/bPuLtj9he8ewX2T7kO2u7e7CwkIFTZtcU1MmTda2LwGwWW0rmqirLPMfJN0bEd+1/TuSPibpzYMXRcRxScclqdPpRE1tG6lpKZOma0MaDKhCXS8fr0oVPfyepJU99u39Yy+LiK9HxHf7Hz8s6XUV3BcNRRoMpWhbBqCKHv6jkq6zvUuLgf5WSb++8gLb10bEc/2PN0l6qoL7oqF4zR1K0qYMwMQBPyIu275d0klJWyTdExFP2P6gpG5EnJD0u7ZvknRZ0vOS3j3pfdFsbfoSAKVwRGNS5VfodDrR7XZTNwMAWsX26YjoDDvHSlsAKAQBHwAKQcAHgEIQ8AGgEAR8ACgEAR8ACsEbrwCgRinfoEfAB4CapN46nJQOANQk9dbhBHwAqEnqrcNJ6SArKfOjwCiptw6nh49stO39oihP6q3DCfjIRur8KDBK6v3zSekgG6nzo8A4Um4dTsBHNlLnRzeK+QbUjZQOspE6P7oRzDcgBQI+spE6P7oRzDcgBVI6qN00UxltebUi8w1IgR4+akUqY9Fa8wpNnW9AHgj4qBWpjEVtmm9APkjpoFakMhYtpZ2o0kGdCPioVdtKJ6epLfMNyAcpHdSKVEZa82d62nfslHYdeVD7jp0qbu6kdPTwUStSGemk3osd6RHwUTtSGWmsN2Hexv8erFTeOAI+UIicJswZrWwOOXygEDnV/lPeuzkEfKAQOU2Y5zRaqRMpHWSJ/O5qOU2YU967OQR8ZIf87tpymTA/vH/3Ff+NpfaOVupUSUrH9gHb52yft31kyPlX2r6/f/4R2zuruC8wDPnd/LVpZ9QmmbiHb3uLpLslvVXSBUmP2j4REU+uuOw9kr4RET9p+1ZJfybp1ya9NzAM+d0y5DJaqVMVPfwbJJ2PiKcj4gVJ90m6eeCamyV9rP/zJyS9xbYruDewSk7VKECVqgj4c5KeXfH5Qv/Y0Gsi4rKkb0r60cFfZPuQ7a7t7sLCQgVNQ4lyqkYBqtSossyIOB4RnYjobN26NXVz0FLkd4HhqqjS6UnaseLz9v6xYddcsP0KST8k6esV3BsYivwusFoVPfxHJV1ne5ftqyXdKunEwDUnJL2r//OvSDoVEVHBvQEAY5q4hx8Rl23fLumkpC2S7omIJ2x/UFI3Ik5I+oikv7F9XtLzWvyfAgCgRpUsvIqIhyQ9NHDsj1b8/B1J76ziXgCAzWnUpC0AYHoI+ABQCAI+ABSCgA8Ahch2t0y2xwWAK2UZ8NkeFwBWyzKlw/a4ALBalgGf7XEBYLUsAz7b4wLAalkGfLbHBYDVspy0zellzQBQlSwDvsT2uAAwKMuUDgBgtWx7+GifVIvlWKSHUhDw0QipFsuxSA8lIaWDRki1WI5FeigJAR+NkGqxHIv0UBICPhoh1WI5FumhJAR8NEKqxXIs0kNJmLRFI6RaLMciPZTEEZG6DUN1Op3odrupm5EEZYIoFX/7k7N9OiI6w87Rw28YygQxDW0IpPztTx85/IahTBBVWwqkvYuXFFoOpPNneqmbdgX+9qePgN8wa5UD9i5e0r5jpxr3JUXztSWQUiI7fQT8hlmvHLCpPTM0W1sCKSWy00fAb5hhZYIrNbFn1nbzZ3rad+yUdh15MMtRVFsCKSWy00fAb5hb9s7p6ME9mlvny9i0nlmbDctvv//+x7Qzo+DflkC68m/fkuZmZ3T04J5GTNjm0imgSqeBlvby33fslHpDgnvTemZtNiy/vVSonEuVSJvWGjTxPRY5VQ8R8Bvs8P7dV/yhSc3smbXZqNHSUgqtbV/sQU0MpG2x3qR3254pKZ0Ga/IQNxfjjJZIoZWtLZPe45ioh2/7RyTdL2mnpP+W9KsR8Y0h170k6Wz/45cj4qZJ7lsSembTNWwUNYgUWtm2zc5kk1qdtId/RNJnI+I6SZ/tfx7mUkT8fP8fgj0aY3CS3APnSaGhLZPe45hoLx3b5yS9KSKes32tpM9HxKqnYPtbEfGqjfzukvfSQTpt2IIA9WvT38V6e+lMGvAvRsRs/2dL+sbS54HrLkt6TNJlScciYn6N33dI0iFJes1rXvO6Z555ZtNtA4ASTbR5mu3PSHr1kFN3rvwQEWF7rf97vDYierZ/XNIp22cj4r8GL4qI45KOS4s9/FFtAwCMb2TAj4gb1zpn+39sX7sipfPVNX5Hr//vp21/XtJeSasCPgBgeiadtD0h6V39n98l6VODF9j+Yduv7P98jaR9kp6c8L4AgA2aNOAfk/RW2/8p6cb+Z9nu2P5w/5qfkdS1/e+SPqfFHD4BHwBqNlEdfkR8XdJbhhzvSnpv/+d/kbRnkvsAACbH1goAstamksppI+ADyFZOG59Vgb10AGSrLW/7qgsBH0C2ctr4rAoEfADZasvbvupCwAeQrZw2PqsCk7YAstWmt33VIbuATwkWgJV4p8SyrAI+JVgAsLasAn5O754ESsQIfbqyCviUYAHtxQh9+rKq0qEEC2gvFklNX1YBnxIsoL0YoU9fVikdSrCa5wPzZ3XvI8/qpQhtsXXb63foT25h81Sstm12Rr0hwZ0RenWyCvgSJVhN8oH5s/rbL3z55c8vRbz8maCPQYf3774ihy8xQq9aVikdNMu9jzy7oeMo2y1753T04B7Nzc7IkuZmZ3T04B46cBXKroeP5ngphr+Hfq3jACP06aKHj6nZYm/oOIDpIuBjam57/Y4NHQcwXaR0MDVLE7NU6QDN4GhoPrXT6US3203dDABoFdunI6Iz7BwpHQAoBCmdxFJsFsUGVUCZCPgJpdgsig2qgHKR0kkoxWZRbFAFlIuAn1CKzaLYoAooFwE/oRTbObOFNFAuAn5CKbZzZgtpoLnmz/S079gp7TryoPYdO6X5M71Kfz+Ttgml2M6ZLaQxiKqtZqijoIKFV0DBBoOMtDjiY5fK+u07dmro+wDmZmf0z0fePPbvmdrCK9vvtP2E7e/ZHnqD/nUHbJ+zfd72kUnuCaA6VG01Rx0FFZPm8B+XdFDSw2tdYHuLpLslvU3S9ZJus339hPcFUAGqtpqjjoKKiQJ+RDwVEaO6AjdIOh8RT0fEC5Luk3TzJPcFUA2qtpqjjoKKOqp05iStfMXRhf6xVWwfst213V1YWKihaUDZqNpqjjre+DWySsf2ZyS9esipOyPiU5W1RFJEHJd0XFqctK3ydwNYbdKqLSp8FlX1HKb9xq+RAT8ibpzwHj1JK994sb1/DEADbDbIsC/TojY9hzpSOo9Kus72LttXS7pV0oka7gtgiqjwWTTJc5j2QqtBk5ZlvsP2BUlvlPSg7ZP949tsPyRJEXFZ0u2STkp6StLHI+KJyZoNIDUqfBZt9jksjQx6Fy8ptDwymGbQn7RK55MRsT0iXhkRPxYR+/vHvxIRb19x3UMR8VMR8RMR8aeTNhpAerPff9XQ46VV+Gy20inFCIm9dABs2PyZnr71ncurjl+1xcVV+Gy20inFCImAD2DD7jp5Ti9+b3Uh3Q9c/YrGTVRO22bLKVOsgWDzNAAbtlYv9JuXXqy5Jc2wmUqnw/t3D93HaJojJHr4ADaMFbqTq2Oh1SB6+AA2LEXvNEfTXmg1iIAPYMN4r0I7EfABXGHcbQLq7p1icgR8AC9r0zYB2DgC/hBsCIVSrbcYiO9A+xHwB9DDQcnYLiFvlGUOYEMolIxyy7wR8AfQw0HJeCFK3gj4A+jhoGQpFgOhPuTwB7CgBKWj3DJfBPwBLCgBkCsC/hD0cADkiBw+ABSCgA8AhSDgA0AhCPgAUAgCPgAUwhGr30vZBLYXJD2TsAnXSPpawvs3Cc9iGc9iGc9iWZOexWsjYuuwE40N+KnZ7kZEJ3U7moBnsYxnsYxnsawtz4KUDgAUgoAPAIUg4K/teOoGNAjPYhnPYhnPYlkrngU5fAAoBD18ACgEAR8ACkHAX4ftu2x/yfYXbX/S9mzqNqVi+522n7D9PduNLz+rmu0Dts/ZPm/7SOr2pGT7Httftf146rakZHuH7c/ZfrL/3fi91G0ahYC/vk9L+tmI+DlJ/yHpjsTtSelxSQclPZy6IXWzvUXS3ZLeJul6SbfZvj5tq5L6qKQDqRvRAJcl/UFEXC/pDZLe1/S/CwL+OiLinyLicv/jFyRtT9melCLiqYgo9U3uN0g6HxFPR8QLku6TdHPiNiUTEQ9Lej51O1KLiOci4t/6P/+fpKckNfpFGgT88f22pH9M3QgkMSfp2RWfL6jhX2zUy/ZOSXslPZK4Kesq/o1Xtj8j6dVDTt0ZEZ/qX3OnFodvf1dn2+o2zrMAcCXbr5L095J+PyL+N3V71lN8wI+IG9c7b/vdkn5J0lsi80ULo55FwXqSdqz4vL1/DIWzfZUWg/3fRcQDqdszCimdddg+IOkPJd0UEd9O3R4k86ik62zvsn21pFslnUjcJiRm25I+IumpiPjz1O0ZBwF/fX8p6Qclfdr2Y7Y/lLpBqdh+h+0Lkt4o6UHbJ1O3qS79ifvbJZ3U4sTcxyPiibStSsf2vZL+VdJu2xdsvyd1mxLZJ+k3JL25Hx8es/321I1aD1srAEAh6OEDQCEI+ABQCAI+ABSCgA8AhSDgA0AhCPgAUAgCPgAU4v8BO8//7NkcEssAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# subtract mean\n", + "x = torch.concat([x_1, x_2], dim=1)\n", + "x_new = x - x.mean(dim=0)\n", + "\n", + "# scatter plot x_new\n", + "plt.scatter(x_new[:, 0].numpy(), x_new[:, 1].numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now normalize the variances" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD4CAYAAADvsV2wAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAV2UlEQVR4nO3dfYxc1XnH8d+v5iXbNOqS2OVlMbFpqZNGbgtdQRJHUV6gpqjCjpOoJH8E2kRu1KK2+cOVEVEqRa3sFKl/RKVNLYJK2ghoU3Cc4siBLFGkVlCWGDBvDgYltTdOcKDQRnECJk//mLswzM7szuzc9/P9SCvP3Hs99+z17uNzn/Occx0RAgC0389V3QAAQDkI+ACQCAI+ACSCgA8AiSDgA0AiTqq6AYOsXLky1qxZU3UzAKBR7r///h9GxKp++2ob8NesWaPZ2dmqmwEAjWL7u4P2kdIBgEQQ8AEgEQR8AEgEAR8AEkHAB4BE1LZKpyy798/pun0H9b3njuusyQlt27hOm8+fqrpZAJC7pAP+7v1zuua2Azr+4kuSpLnnjuua2w5IEkEfQOskndK5bt/Bl4P9vOMvvqTr9h2sqEUAUJykA/73njs+0nYAaLKkUzpnTU5ork9wP2tyohG5/Sa0EUB9JN3D37ZxnSZOXvGqbRMnr9C737RK19x2QHPPHVfoldz+7v1z1TS0j/nxhzq3EUC9JB3wN58/pR1b1mtqckKWNDU5oR1b1uvux4/VPrfP+AOAUSWd0pE6Qb83DfKJWx/oe2ydcvuMPwAYVdI9/EHOmpwYaXsVmtBGAPVCwO9jUG5/28Z1FbVooSa0EUC9JJ/S6Wc+xVPnCpgmtBFLo9IKZXJEVN2Gvqanp4MHoKDNemd6S527tB1b1hP0sWy274+I6X77SOkAFaHSCmUj4AMVodIKZcsl4Nu+0fbTth8esN+2P2v7kO2HbF+Qx3nRXLv3z2nDzhmt3X6HNuycSXLCGJVWKFtePfx/lHTpIvt/R9J52ddWSX+f03mXRGCpH2YJd1BphbLlEvAj4puSnl3kkE2SvhAd90iatH1mHudeDIGlnshddwya6c2ALYpSVlnmlKTDXe+PZNuOdh9ke6s6dwA655xzxj7pYoGFX6piLVZuSO76Ff1megNFqdWgbUTsiojpiJhetWrV2J9HYKnGUndW5K6BapQV8Ockre56f3a2rVAElmoslbIhdw1Uo6yAv0fSR7JqnbdKej4iji71l8ZFYKnGUndW5K6BauSSw7d9s6R3SVpp+4ikv5B0siRFxOck7ZV0maRDkn4s6ffzOO9SWH6gGos9WGYeuWugfLkE/Ij40BL7Q9If53GuURFYyrdt47q+SwZwZwVUi8XTkDvurIB6IuCjENxZAfVDwEcjsIwwMD4CPmqvdxnh+bp+SQR9YAQEfNTeMDOmuQMAlkbAR+0tVdfPHQAwnFotrQD0s9SMaRZjA4ZDwEftLTVjmjWTgOEQ8FF7Sy3FwJpJwHDI4aMRFqvrZ2YvMBwCPhqPmb0YVapVXQR8tAIzezGslKu6yOEDSErKVV0EfABJSbmqi5QOWiXV3CyGN8zzGtqKHj5aY6ln6QJS2k/CI+CjNVLOzWJ4KT9ik5QOWqNNuVlSU8VKtaqLHj5aoy0zbklNoSgEfLRGW3KzpKbStHv/nDbsnNHa7Xdow86ZQv6DJ6WD1mjLjNs2paYwnLImgxHw0SptyM2mXDaYqmEe8pMHUjpAzTQ9NVVGaqJtyrqro4cP1EyTU1Mpr1MzjrLu6gj4QA01NTVVVmqibcpa4puADyA3DDgvT1l3dQR8ALlhwHn5yrirY9AWQG6aPuDcdvTwAeSmyQPOKSDgA8hVUwecU0BKBwASQcAHgEQQ8AEgEQR8AEgEAR8AEpFLwLd9qe2Dtg/Z3t5n/1W2j9l+IPv6WB7nRQeLVQEYxthlmbZXSLpe0iWSjki6z/aeiHi059BbI+Lqcc+HV2OxKgDDyqOHf6GkQxHxVES8IOkWSZty+FwMgacjARhWHhOvpiQd7np/RNJFfY57v+13Svq2pE9ExOE+x9RSnR8ozWJVAIZV1qDtVyStiYhfl3SnpJv6HWR7q+1Z27PHjh0rqWmLq/sDpdvy4G4Axcsj4M9JWt31/uxs28si4pmI+Gn29gZJv9XvgyJiV0RMR8T0qlWrcmja+OqeMmGxKqBaTSqayCOlc5+k82yvVSfQXyHpw90H2D4zIo5mby+X9FgO5y1F3VMmdV2sqs5pMCAvTSuaGDvgR8QJ21dL2idphaQbI+IR25+WNBsReyT9ie3LJZ2Q9Kykq8Y9b1masL533RaratovAbBcTXvCVy45/IjYGxG/GhG/HBF/lW37VBbsFRHXRMRbIuI3IuLdEfF4HuctAymT0dU9DQbkpe4ZgF7MtF3C5vOntGPLek1NTsiSpiYntGPL+lr+710XTfslAJaraUUTrIc/hLqlTOquCWkwIA9lPXw8L/TwkTvSYEhF0zIA9PCRu7pWDgFFaFIGgICPQjTplwBIBSkdAEgEAR8AEkHAB4BEEPABIBEEfABIBAEfABJBwAeARFCHDwAlqnLpcAI+AJSk6qXDSekAQEmqXjqcgA8AJal66XBSOmgVHq2IOqt66XB6+GiN+fzo3HPHFXolP1rnh0ojLVUvHU7AR2tUnR8FllL1+vmkdNAaVedHgWFUuXQ4AR+tUXV+dFSMN6BspHTQGlXnR0fBeAOqQMBHa1SdHx0F4w2oAikdlK7IVEZTHq3IeAOqQA8fpSKV0TFoXKGu4w1oBwI+SkUqo6NJ4w1oD1I6KBWpjI75tBNVOigTAR+lalrpZJGaMt6A9iClg1KRyqjW7v1z2rBzRmu336ENO2eSGztJHT18lIpURnWqXosd1SPgo3SkMqqx2IB5E/89mKk8OgI+kIg2DZhzt7I85PCBRLSp9p/y3uUh4AOJaNOAeZvuVspESgetRH53oTYNmFPeuzwEfLQO+d3B2jJgvm3julf9G0vNvVspUy4pHduX2j5o+5Dt7X32n2r71mz/vbbX5HFeoB/yu+3XpJVR62TsHr7tFZKul3SJpCOS7rO9JyIe7Trso5L+JyJ+xfYVkj4j6ffGPTfQD/ndNLTlbqVMefTwL5R0KCKeiogXJN0iaVPPMZsk3ZS9/pKk99p2DucGFmhTNQqQpzwC/pSkw13vj2Tb+h4TESckPS/pDb0fZHur7Vnbs8eOHcuhaUhRm6pRgDzVqiwzInZFxHRETK9atarq5qChyO8C/eVRpTMnaXXX+7Ozbf2OOWL7JEm/KOmZHM4N9EV+F1gojx7+fZLOs73W9imSrpC0p+eYPZKuzF5/QNJMREQO5wYADGnsHn5EnLB9taR9klZIujEiHrH9aUmzEbFH0ucl/ZPtQ5KeVec/BQBAiXKZeBUReyXt7dn2qa7XP5H0wTzOBQBYnloN2gIAikPAB4BEEPABIBEEfABIRGtXy2R5XAB4tVYGfJbHBYCFWpnSYXlcAFiolQGf5XEBYKFWBnyWxwWAhVoZ8FkeFwAWauWgbZse1gwAeWllwJdYHhcAerUypQMAWKi1PXw0T1WT5Zikh1QQ8FELVU2WY5IeUkJKB7VQ1WQ5JukhJQR81EJVk+WYpIeUEPBRC1VNlmOSHlJCwEctVDVZjkl6SAmDtqiFqibLMUkPKXFEVN2Gvqanp2N2drbqZlSCMkGkip/98dm+PyKm++2jh18zlAmiCE0IpPzsF48cfs1QJoi8zQfSueeOK/RKIN29f67qpr0KP/vFI+DXzKBywLnnjmvDzpna/ZKi/poSSCmRLR4Bv2YWKwesa88M9daUQEqJbPEI+DXTr0ywWx17Zk23e/+cNuyc0drtd7TyLqopgZQS2eIR8Gtm8/lT2rFlvaYW+WWsW8+syfrltz9x6wNa06Lg35RA2v2zb0lTkxPasWV9LQZs29IpoEqnhubX8t+wc0ZzfYJ73XpmTdYvvz1fqNyWKpEmzTWo43Ms2lQ9RMCvsW0b173qB02qZ8+syZa6W5pPoTXtF7tXHQNpUyw26N20a0pKp8bqfIvbFsPcLZFCS1tTBr2HQQ+/5uiZFavfXVQvUmhpO2tyojWpVXr4SFrvILl79pNCQ1MGvYdBDx/J676LasISBChXkwa9l8LiaQDQIostnkZKBwASMVbAt/1623fafiL787QBx71k+4Hsa8845wQALM+4Pfztkr4eEedJ+nr2vp/jEfGb2dflY54TALAM4wb8TZJuyl7fJGnzmJ8HACjIuAH/9Ig4mr3+vqTTBxz3Gtuztu+xvXnQh9nemh03e+zYsTGbBgDotmRZpu27JJ3RZ9e13W8iImwPKvl5Y0TM2T5X0oztAxHxZO9BEbFL0i6pU6WzZOsBYAmU2r5iyYAfERcP2mf7B7bPjIijts+U9PSAz5jL/nzK9jcknS9pQcAHgDy1aeGzPIyb0tkj6crs9ZWSvtx7gO3TbJ+avV4paYOkR8c8LwAsqSlP+yrLuAF/p6RLbD8h6eLsvWxP274hO+bNkmZtPyjpbkk7I4KAD6BwbVr4LA9jLa0QEc9Iem+f7bOSPpa9/k9J68c5DwAsR5sWPssDM20BtFabFj7LA4unAWitNi18lofWBXxKsAB045kSr2hVwKcECwAGa1XAb9OzJ4EUcYderFYFfEqwgObiDr14rarSGVRqlWoJFtAkTJIqXqsCPiVYQHNxh168VqV0KMGqn0/uPqCb7z2slyK0wtaHLlqtv9zMPDwsxCSp4rUq4EuUYNXJJ3cf0D/f898vv38p4uX3BH302rZx3aty+BJ36HlrVUoH9XLzvYdH2o60bT5/Sju2rNfU5IQsaWpyQju2rKcDl6PW9fBRHy9F/0caDNoOcIdeLHr4KMwKe6TtAIpFwEdhPnTR6pG2AygWKR0UZn5gliodoB4cNc2nTk9Px+zsbNXNAIBGsX1/REz320dKBwASQUqnYlUsFsUCVUCaCPgVqmKxKBaoAtJFSqdCVSwWxQJVQLoI+BWqYrEoFqgC0kXAr1AVyzmzhDSQLgJ+hapYzpklpIH62r1/Tht2zmjt9ju0YeeMdu+fy/XzGbStUBXLObOENHpRtVUPZRRUMPEKSFhvkJE6d3ysUlm+DTtn+j4PYGpyQv+x/T1Dfw4TrwD0RdVWfZRRUEHABxJG1VZ9lFFQQcAHEkbVVn2UUVBBwAcSRtVWfZTxxC+qdICEjVu1RYVPR17XoegnfhHwgcQtN8iwLlNHk64DKR0Ay0KFT8c416HoiVa96OEDWBYqfDqWex2quDOghw9gWSZ//uS+21Or8FlupVMVd0gEfAAj271/Tj/6yYkF209e4eQqfJZb6VTFHdJYAd/2B20/YvtntvtO5c2Ou9T2QduHbG8f55wAqnfdvoN68WcLl2V57Skn1W6gsmjLLaesYg7EuDn8hyVtkfQPgw6wvULS9ZIukXRE0n2290TEo2OeG0BFBvVCnz/+YsktqYflVDpt27iu7zpGRd4hjdXDj4jHImKphNOFkg5FxFMR8YKkWyRtGue8AKrFDN3xlTHRqlcZVTpTkg53vT8i6aJ+B9reKmmrJJ1zzjnFtwzAslTRO22joida9Voy4Nu+S9IZfXZdGxFfzrMxEbFL0i6pszxynp8NID88V6GZlgz4EXHxmOeYk7S66/3Z2TYANTTsMgFl904xvjJSOvdJOs/2WnUC/RWSPlzCeQGMqEnLBGB045Zlvs/2EUlvk3SH7X3Z9rNs75WkiDgh6WpJ+yQ9JulfIuKR8ZpdrLKnOwN1wXIJ7TZWDz8ibpd0e5/t35N0Wdf7vZL2jnOustDDQcpYLqHdmGnbgx4OUka5ZbsR8HvQw0HKeCBKuxHwe9DDQcqqmAyE8rA8cg8mlCB1lFu2FwG/BxNKALQVAb8PejgA2ogcPgAkgoAPAIkg4ANAIgj4AJAIAj4AJMIR9Vx23vYxSd9dxl9dKemHOTcnD7RrNLRrNLRrNG1u1xsjYlW/HbUN+MtlezYiBj5QvSq0azS0azS0azSptouUDgAkgoAPAIloY8DfVXUDBqBdo6Fdo6Fdo0myXa3L4QMA+mtjDx8A0AcBHwAS0fiAb/s624/bfsj27bYnBxx3qe2Dtg/Z3l5Cuz5o+xHbP7M9sMzK9ndsH7D9gO3ZGrWr7Ov1ett32n4i+/O0Ace9lF2rB2zvKbA9i37/tk+1fWu2/17ba4pqy4jtusr2sa5r9LES2nSj7adtPzxgv21/NmvzQ7YvKLpNQ7brXbaf77pWnyqpXatt32370ex38U/7HFPMNYuIRn9J+m1JJ2WvPyPpM32OWSHpSUnnSjpF0oOSfq3gdr1Z0jpJ35A0vchx35G0ssTrtWS7Krpefy1pe/Z6e79/x2zfj0q4Rkt+/5L+SNLnstdXSLq1Ju26StLflvXzlJ3znZIukPTwgP2XSfqqJEt6q6R7a9Kud0n69zKvVXbeMyVdkL1+naRv9/l3LOSaNb6HHxFfi4gT2dt7JJ3d57ALJR2KiKci4gVJt0jaVHC7HouI2j35fMh2lX69ss+/KXt9k6TNBZ9vMcN8/93t/ZKk99p2DdpVuoj4pqRnFzlkk6QvRMc9kiZtn1mDdlUiIo5GxLey1/8n6TFJvQ/gKOSaNT7g9/gDdf5X7DUl6XDX+yNaeIGrEpK+Zvt+21urbkymiut1ekQczV5/X9LpA457je1Z2/fY3lxQW4b5/l8+JutwPC/pDQW1Z5R2SdL7szTAl2yvLrhNw6jz79/bbD9o+6u231L2ybNU4PmS7u3ZVcg1a8QTr2zfJemMPruujYgvZ8dcK+mEpC/WqV1DeEdEzNn+JUl32n4865lU3a7cLdau7jcREbYH1Qu/Mbte50qasX0gIp7Mu60N9hVJN0fET23/oTp3Ie+puE119S11fp5+ZPsySbslnVfWyW3/gqR/k/RnEfG/ZZyzEQE/Ii5ebL/tqyT9rqT3RpYA6zEnqbunc3a2rdB2DfkZc9mfT9u+XZ3b9rECfg7tKv162f6B7TMj4mh26/r0gM+Yv15P2f6GOr2jvAP+MN///DFHbJ8k6RclPZNzO0ZuV0R0t+EGdcZGqlbIz9O4uoNsROy1/Xe2V0ZE4Yuq2T5ZnWD/xYi4rc8hhVyzxqd0bF8q6c8lXR4RPx5w2H2SzrO91vYp6gyyFVbhMSzbr7X9uvnX6gxA960oKFkV12uPpCuz11dKWnAnYvs026dmr1dK2iDp0QLaMsz3393eD0iaGdDZKLVdPXney9XJD1dtj6SPZJUnb5X0fFf6rjK2z5gfd7F9oTrxsOj/tJWd8/OSHouIvxlwWDHXrOwR6ry/JB1SJ9f1QPY1XzlxlqS9Xcddps5o+JPqpDaKbtf71Mm7/VTSDyTt622XOtUWD2Zfj9SlXRVdrzdI+rqkJyTdJen12fZpSTdkr98u6UB2vQ5I+miB7Vnw/Uv6tDodC0l6jaR/zX7+/kvSuUVfoyHbtSP7WXpQ0t2S3lRCm26WdFTSi9nP1kclfVzSx7P9lnR91uYDWqRqreR2Xd11re6R9PaS2vUOdcbuHuqKW5eVcc1YWgEAEtH4lA4AYDgEfABIBAEfABJBwAeARBDwASARBHwASAQBHwAS8f+qaAZlDF+ZfQAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "# normalize variance\n", + "x_new_normalized = x_new / torch.std(x_new, unbiased=False)\n", + "\n", + "# scatter plot x_new_normalized\n", + "plt.scatter(x_new_normalized[:, 0].numpy(), x_new_normalized[:, 1].numpy())\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And here is one more basic example where we reproduce the same results from this [scikit-learn tutorial](https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html)." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([0.5000, 0.5000])\n", + "tensor([[-1., -1.],\n", + " [-1., -1.],\n", + " [ 1., 1.],\n", + " [ 1., 1.]])\n" + ] + } + ], + "source": [ + "x = torch.tensor([[0.0, 0.0], [0.0, 0.0], [1.0, 1.0], [1.0, 1.0]])\n", + "x_new = x - x.mean(dim=0)\n", + "print(x.mean(dim=0))\n", + "x_new_normalized = x_new / torch.std(x_new, unbiased=False)\n", + "print(x_new_normalized)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### References\n", + "\n", + "- [Wikipedia - Feature Scaling](https://en.wikipedia.org/wiki/Feature_scaling)\n", + "- [Wikipedia - Standard Deviation](https://en.wikipedia.org/wiki/Standard_deviation)\n", + "- [Normalizing Inputs by DeepLearning.AI](https://www.youtube.com/watch?v=FDCfw-YqWTE&ab_channel=DeepLearningAI)\n", + "- [How To Calculate the Mean and Standard Deviation — Normalizing Datasets in Pytorch](https://towardsdatascience.com/how-to-calculate-the-mean-and-standard-deviation-normalizing-datasets-in-pytorch-704bd7d05f4c)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/maths/mean.ipynb b/notebooks/maths/mean.ipynb new file mode 100644 index 0000000..8b9e953 --- /dev/null +++ b/notebooks/maths/mean.ipynb @@ -0,0 +1,127 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Mean [WIP]\n", + "\n", + "Mean also referred to as the arithmetic mean is a measure of the central tendency of a set of numbers. You can calculate the mean of a set of numbers as follows:\n", + "\n", + "\n", + "$$\n", + "m=\\frac{\\text { sum of the terms }}{\\text { number of terms }}\n", + "$$\n", + "\n", + "or\n", + "\n", + "more formally:\n", + "\n", + "$$\n", + "\\bar{x}=\\frac{1}{n}\\left(\\sum_{i=1}^{n} x_{i}\\right)=\\frac{x_{1}+x_{2}+\\cdots+x_{n}}{n}\n", + "$$\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([[ 0.7317, -1.0386, -0.4281],\n", + " [ 1.1282, -0.7312, 0.2137],\n", + " [-1.4916, -0.6921, 0.7739],\n", + " [ 1.9643, 0.3754, 0.8449],\n", + " [ 1.0241, -0.4198, -0.2965]])\n" + ] + }, + { + "data": { + "text/plain": [ + "tensor([[-0.2450],\n", + " [ 0.2036],\n", + " [-0.4699],\n", + " [ 1.0615],\n", + " [ 0.1026]])" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import torch\n", + "x = torch.randn(5, 3)\n", + "print(x)\n", + "\n", + "# use keepdim=True to preserve dimension\n", + "x.mean(-1, keepdim=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor(42.)\n", + "tensor(42.)\n" + ] + } + ], + "source": [ + "# Example 2:\n", + "import torch\n", + "\n", + "x = torch.Tensor([4, 36, 45, 50, 75])\n", + "\n", + "print(torch.sum(x) / 5) # using sum and manual division\n", + "print(torch.mean(x)) # using mean function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### References\n", + "\n", + "- [Wikipedia](https://en.wikipedia.org/wiki/Mean)\n", + "- [Basics of Statistics for ML Engineer](https://medium.com/technology-nineleaps/basics-of-statistics-for-machine-learning-engineers-bf2887ac716c)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9.12 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "d4d1e4263499bec80672ea0156c357c1ee493ec2b1c70f0acce89fc37c4a6abe" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/nn-lm-batch.ipynb b/notebooks/nn-lm-batch.ipynb new file mode 100644 index 0000000..d6a25fe --- /dev/null +++ b/notebooks/nn-lm-batch.ipynb @@ -0,0 +1,407 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Neural Language Models\n", + "Status of Notebook: Work in Progress\n", + "\n", + "Reference: https://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf\n", + "\n", + "Dynet Version: https://github.com/neubig/nn4nlp-code/blob/master/02-lm/nn-lm.py\n", + "\n", + "Old PyTorch version: https://github.com/neubig/nn4nlp-code/blob/master/02-lm-pytorch/nn-lm-batch.py\n", + "\n", + "Additions compared to `nn.lm.ipnyb`:\n", + "- Cleaned up model architecture code\n", + "- Added Dropout\n", + "- Using different initial learning rate" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import random\n", + "import torch\n", + "import torch.nn as nn\n", + "import math\n", + "import time\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# uncomment to download the datasets\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/test.txt\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/train.txt\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/valid.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# function to read in data, pro=ess each line and split columns by \" ||| \"\n", + "def read_data(filename):\n", + " data = []\n", + " with open(filename, \"r\") as f:\n", + " for line in f:\n", + " line = line.strip().split(\" \")\n", + " data.append(line)\n", + " return data\n", + "\n", + "# read the data\n", + "train_data = read_data('data/ptb/train.txt')\n", + "val_data = read_data('data/ptb/valid.txt')\n", + "\n", + "# creating the word and tag indices and special tokens\n", + "word_to_index = {}\n", + "index_to_word = {}\n", + "word_to_index[\"\"] = len(word_to_index)\n", + "index_to_word[len(word_to_index)-1] = \"\"\n", + "word_to_index[\"\"] = len(word_to_index) # add to dictionary\n", + "index_to_word[len(word_to_index)-1] = \"\"\n", + "\n", + "# create word to index dictionary and tag to index dictionary from data\n", + "def create_dict(data, check_unk=False):\n", + " for line in data:\n", + " for word in line:\n", + " if check_unk == False:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = len(word_to_index)\n", + " index_to_word[len(word_to_index)-1] = word\n", + " \n", + " # has no effect because data already comes with \n", + " # should work with data without already processed\n", + " else: \n", + " if word not in word_to_index:\n", + " word_to_index[word] = word_to_index[\"\"]\n", + " index_to_word[len(word_to_index)-1] = word\n", + "\n", + "create_dict(train_data)\n", + "create_dict(val_data, check_unk=True)\n", + "\n", + "# create word and tag tensors from data\n", + "def create_tensor(data):\n", + " for line in data:\n", + " yield([word_to_index[word] for word in line])\n", + "\n", + "train_data = list(create_tensor(train_data))\n", + "val_data = list(create_tensor(val_data))\n", + "\n", + "number_of_words = len(word_to_index)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In our implementation we are using batched training. There are a few differences from the original implementation found [here](https://github.com/neubig/nn4nlp-code/blob/master/02-lm/loglin-lm.py). " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", + "\n", + "N = 2 # length of the n-gram\n", + "EMB_SIZE = 128 # size of the embedding\n", + "HID_SIZE = 128 # size of the hidden layer\n", + "\n", + "# Neural LM\n", + "class FNN_LM(nn.Module):\n", + " def __init__(self, number_of_words, ngram_length, EMB_SIZE, HID_SIZE, dropout):\n", + " super(FNN_LM, self).__init__()\n", + "\n", + " # embedding layer\n", + " self.embedding = nn.Embedding(number_of_words, EMB_SIZE)\n", + "\n", + " self.fnn = nn.Sequential(\n", + " # hidden layer\n", + " nn.Linear(EMB_SIZE * ngram_length, HID_SIZE),\n", + " nn.Tanh(),\n", + " # dropout layer\n", + " nn.Dropout(dropout),\n", + " # output layer\n", + " nn.Linear(HID_SIZE, number_of_words)\n", + " )\n", + "\n", + " def forward(self, x):\n", + " embs = self.embedding(x) # Size: [batch_size x num_hist x emb_size]\n", + " feat = embs.view(embs.size(0), -1) # Size: [batch_size x (num_hist*emb_size)]\n", + " logit = self.fnn(feat) # Size: batch_size x num_words \n", + " return logit" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model Settings and Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "model = FNN_LM(number_of_words, N, EMB_SIZE, HID_SIZE, dropout=0.2)\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n", + "criterion = torch.nn.CrossEntropyLoss(reduction=\"sum\")\n", + "\n", + "if torch.cuda.is_available():\n", + " model.to(device)\n", + "\n", + "# function to calculate the sentence loss\n", + "def calc_sent_loss(sent):\n", + " S = word_to_index[\"\"]\n", + " \n", + " # initial history is equal to end of sentence symbols\n", + " hist = [S] * N\n", + " \n", + " # collect all target and histories\n", + " all_targets = []\n", + " all_histories = []\n", + " \n", + " # step through the sentence, including the end of sentence token\n", + " for next_word in sent + [S]:\n", + " all_histories.append(list(hist))\n", + " all_targets.append(next_word)\n", + " hist = hist[1:] + [next_word]\n", + "\n", + " logits = model(torch.LongTensor(all_histories).to(device))\n", + " loss = criterion(logits, torch.LongTensor(all_targets).to(device))\n", + "\n", + " return loss\n", + "\n", + "MAX_LEN = 100\n", + "# Function to generate a sentence\n", + "def generate_sent():\n", + " S = word_to_index[\"\"]\n", + " hist = [S] * N\n", + " sent = []\n", + " while True:\n", + " logits = model(torch.LongTensor([hist]).to(device))\n", + " p = torch.nn.functional.softmax(logits) # 1 x number_of_words\n", + " next_word = p.multinomial(num_samples=1).item()\n", + " if next_word == S or len(sent) == MAX_LEN:\n", + " break\n", + " sent.append(next_word)\n", + " hist = hist[1:] + [next_word]\n", + " return sent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--finished 5000 sentences (words/sec=12807.67)\n", + "--finished 10000 sentences (words/sec=12788.71)\n", + "--finished 15000 sentences (words/sec=12807.44)\n", + "--finished 20000 sentences (words/sec=12801.59)\n", + "--finished 25000 sentences (words/sec=12852.69)\n", + "--finished 30000 sentences (words/sec=12843.39)\n", + "--finished 35000 sentences (words/sec=12835.04)\n", + "--finished 40000 sentences (words/sec=12816.01)\n", + "iter 0: train loss/word=6.1274, ppl=458.2398, (words/sec=12801.17)\n", + "iter 0: dev loss/word=5.8676, ppl=353.3835, (words/sec=1.44s)\n", + "it will change at georgia & co. got instead of totally a appointment from the big bankers posted & co. also received that brokers\n", + "one and claim the politicians amount for the measure of the california santa contract\n", + "our birth capitol led the giant by an market in the central held the rise of the company 's sheet that the irs on britain dollars\n", + "yesterday 's jail & investigations on news for buying creditors has lower market for polish statement so and now in a bill to government americans system to my march and programs of links stock-market program charlotte nasdaq lowest judge provide an final state university of foot an woman spokesman for something he was very constitution on the new post\n", + "it N also buy-out of bank in may industry mr. phelan said in his current N N owned of closely bartlett below minister blocking which mr. repeat and unemployment to de our own news to submit resolution trust said a rivals on the reached which now end for most cautioned failed more than N remic mortgage officials and and goldman sachs & co. currently myself mrs. mandatory almost to the hoffman greater silver kidder peabody & co. does n't any grip from the buy-out change in drexel 's third-quarter profit of N N to speculation a\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/envs/nlp/lib/python3.7/site-packages/ipykernel_launcher.py:38: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--finished 5000 sentences (words/sec=12587.62)\n", + "--finished 10000 sentences (words/sec=12652.41)\n", + "--finished 15000 sentences (words/sec=12740.18)\n", + "--finished 20000 sentences (words/sec=12763.71)\n", + "--finished 25000 sentences (words/sec=12753.94)\n", + "--finished 30000 sentences (words/sec=12754.24)\n", + "--finished 35000 sentences (words/sec=12762.18)\n", + "--finished 40000 sentences (words/sec=12740.41)\n", + "iter 1: train loss/word=5.7389, ppl=310.7324, (words/sec=12744.21)\n", + "iter 1: dev loss/word=5.7766, ppl=322.6629, (words/sec=1.40s)\n", + "the advertising for champion the dollar was named whose damage was down from lawyers and the new england told them need\n", + "rumors with cents a share\n", + "justice general operations in chicago\n", + "british bought what of going to pay to rates since april\n", + "according to an family\n", + "--finished 5000 sentences (words/sec=12702.39)\n", + "--finished 10000 sentences (words/sec=12731.82)\n", + "--finished 15000 sentences (words/sec=12755.89)\n", + "--finished 20000 sentences (words/sec=12828.83)\n", + "--finished 25000 sentences (words/sec=12836.63)\n", + "--finished 30000 sentences (words/sec=12801.01)\n", + "--finished 35000 sentences (words/sec=12803.18)\n", + "--finished 40000 sentences (words/sec=12779.24)\n", + "iter 2: train loss/word=5.5996, ppl=270.3145, (words/sec=12792.34)\n", + "iter 2: dev loss/word=5.7464, ppl=313.0468, (words/sec=1.40s)\n", + "french his experience within george bush expected luxury world culture by planning\n", + "complete as example this scheduled other sellers operations\n", + "much of the retail\n", + "just scheduled time to foreign exchange cigarette merchandise outlets in the market 's $ N a share a year earlier branch in the turmoil before our company\n", + "the manufacturing products construction has about $ N million or $ N million from $ N million in bridge\n", + "--finished 5000 sentences (words/sec=12953.86)\n", + "--finished 10000 sentences (words/sec=12970.24)\n", + "--finished 15000 sentences (words/sec=12896.14)\n", + "--finished 20000 sentences (words/sec=12875.42)\n", + "--finished 25000 sentences (words/sec=12833.31)\n", + "--finished 30000 sentences (words/sec=12839.11)\n", + "--finished 35000 sentences (words/sec=12822.49)\n", + "--finished 40000 sentences (words/sec=12814.87)\n", + "iter 3: train loss/word=5.5124, ppl=247.7381, (words/sec=12819.57)\n", + "iter 3: dev loss/word=5.7235, ppl=305.9709, (words/sec=1.39s)\n", + "ago\n", + "british community interest reserves in a low $ N a tax ministry in japan and wants to say it shows the consumer price network\n", + "expects operations for this first maryland staff studies in beijing\n", + "u.s. government bills here have been implemented\n", + "computer software inc. and expected that were out the moscow who illegally a stage sets for over $ N million a year earlier\n", + "--finished 5000 sentences (words/sec=12860.84)\n", + "--finished 10000 sentences (words/sec=12756.10)\n", + "--finished 15000 sentences (words/sec=12795.20)\n", + "--finished 20000 sentences (words/sec=12799.80)\n", + "--finished 25000 sentences (words/sec=12830.27)\n", + "--finished 30000 sentences (words/sec=12820.51)\n", + "--finished 35000 sentences (words/sec=12821.23)\n", + "--finished 40000 sentences (words/sec=12839.05)\n", + "iter 4: train loss/word=5.4502, ppl=232.8159, (words/sec=12841.22)\n", + "iter 4: dev loss/word=5.7149, ppl=303.3545, (words/sec=1.40s)\n", + "but the pilots are not profitable if mr. said\n", + "his trading know that cathay is through the research session mr. brooks and center usually raised its quarterly dividend\n", + "and kept a turn on the moment he said\n", + "but a church premium still the stockholders die\n", + "if they benefited from the san couple has a china 's strategy while government prices\n" + ] + } + ], + "source": [ + "# start training\n", + "for ITER in range(5):\n", + " # training\n", + " random.shuffle(train_data)\n", + " model.train()\n", + " train_words, train_loss = 0, 0.0\n", + " start = time.time()\n", + " for sent_id, sent in enumerate(train_data): \n", + " my_loss = calc_sent_loss(sent)\n", + " train_loss += my_loss.item()\n", + " train_words += len(sent)\n", + " optimizer.zero_grad()\n", + " my_loss.backward()\n", + " optimizer.step()\n", + " if (sent_id+1) % 5000 == 0:\n", + " print(\"--finished %r sentences (words/sec=%.2f)\" % (sent_id+1, train_words/(time.time()-start)))\n", + " print(\"iter %r: train loss/word=%.4f, ppl=%.4f, (words/sec=%.2f)\" % (ITER, train_loss/train_words, math.exp(train_loss/train_words), train_words/(time.time()-start)))\n", + "\n", + " # evaluation\n", + " model.eval()\n", + " dev_words, dev_loss = 0, 0.0\n", + " start = time.time()\n", + " for sent_id, sent in enumerate(val_data):\n", + " my_loss = calc_sent_loss(sent)\n", + " dev_loss += my_loss.item()\n", + " dev_words += len(sent)\n", + " print(\"iter %r: dev loss/word=%.4f, ppl=%.4f, (words/sec=%.2fs)\" % (ITER, dev_loss/dev_words, math.exp(dev_loss/dev_words), time.time()-start))\n", + "\n", + " # Generate a few sentences\n", + " for _ in range(5):\n", + " sent = generate_sent()\n", + " print(\" \".join([index_to_word[x] for x in sent]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nlp", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "154abf72fb8cc0db1aa0e7366557ff891bff86d6d75b7e5f2e68a066d591bfd7" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/nn-lm.ipynb b/notebooks/nn-lm.ipynb new file mode 100644 index 0000000..283f688 --- /dev/null +++ b/notebooks/nn-lm.ipynb @@ -0,0 +1,479 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Neural Language Models\n", + "Status of Notebook: Work in Progress\n", + "\n", + "Reference: https://www.jmlr.org/papers/volume3/bengio03a/bengio03a.pdf\n", + "\n", + "Dynet Version: https://github.com/neubig/nn4nlp-code/blob/master/02-lm/nn-lm.py" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import random\n", + "import torch\n", + "import torch.nn as nn\n", + "import math\n", + "import time\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Download the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# uncomment to download the datasets\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/test.txt\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/train.txt\n", + "#!wget https://raw.githubusercontent.com/neubig/nn4nlp-code/master/data/ptb/valid.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Process the Data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# function to read in data, pro=ess each line and split columns by \" ||| \"\n", + "def read_data(filename):\n", + " data = []\n", + " with open(filename, \"r\") as f:\n", + " for line in f:\n", + " line = line.strip().split(\" \")\n", + " data.append(line)\n", + " return data\n", + "\n", + "# read the data\n", + "train_data = read_data('data/ptb/train.txt')\n", + "val_data = read_data('data/ptb/valid.txt')\n", + "\n", + "# creating the word and tag indices and special tokens\n", + "word_to_index = {}\n", + "index_to_word = {}\n", + "word_to_index[\"\"] = len(word_to_index)\n", + "index_to_word[len(word_to_index)-1] = \"\"\n", + "word_to_index[\"\"] = len(word_to_index) # add to dictionary\n", + "index_to_word[len(word_to_index)-1] = \"\"\n", + "\n", + "# create word to index dictionary and tag to index dictionary from data\n", + "def create_dict(data, check_unk=False):\n", + " for line in data:\n", + " for word in line:\n", + " if check_unk == False:\n", + " if word not in word_to_index:\n", + " word_to_index[word] = len(word_to_index)\n", + " index_to_word[len(word_to_index)-1] = word\n", + " \n", + " # has no effect because data already comes with \n", + " # should work with data without already processed\n", + " else: \n", + " if word not in word_to_index:\n", + " word_to_index[word] = word_to_index[\"\"]\n", + " index_to_word[len(word_to_index)-1] = word\n", + "\n", + "create_dict(train_data)\n", + "create_dict(val_data, check_unk=True)\n", + "\n", + "# create word and tag tensors from data\n", + "def create_tensor(data):\n", + " for line in data:\n", + " yield([word_to_index[word] for word in line])\n", + "\n", + "train_data = list(create_tensor(train_data))\n", + "val_data = list(create_tensor(val_data))\n", + "\n", + "number_of_words = len(word_to_index)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In our implementation we are using batched training. There are a few differences from the original implementation found [here](https://github.com/neubig/nn4nlp-code/blob/master/02-lm/loglin-lm.py). " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "## define the model\n", + "\n", + "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", + "\n", + "N = 2 # length of the n-gram\n", + "EMB_SIZE = 128 # size of the embedding\n", + "HID_SIZE = 128 # size of the hidden layer\n", + "\n", + "# Neural LM\n", + "class NeuralLM(nn.Module):\n", + " def __init__(self, number_of_words, ngram_length, EMB_SIZE, HID_SIZE):\n", + " super(NeuralLM, self).__init__()\n", + "\n", + " # embedding layer\n", + " self.embedding = nn.Embedding(number_of_words, EMB_SIZE)\n", + "\n", + " # hidden layer\n", + " self.hidden = nn.Linear(EMB_SIZE * ngram_length, HID_SIZE)\n", + " # output layer\n", + " self.output = nn.Linear(HID_SIZE, number_of_words)\n", + "\n", + " def forward(self, x):\n", + " embs = self.embedding(x) # Size: [batch_size x num_hist x emb_size]\n", + " embs = embs.view(embs.size(0), -1) # Size: [batch_size x (num_hist*emb_size)]\n", + " h = torch.nn.functional.tanh(self.hidden(embs)) # Size: [batch_size x hid_size]\n", + " scores = self.output(h) # Size: batch_size x num_words\n", + " return scores" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Model Settings and Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "model = NeuralLM(number_of_words, N, EMB_SIZE, HID_SIZE)\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=0.1)\n", + "criterion = torch.nn.CrossEntropyLoss()\n", + "\n", + "if torch.cuda.is_available():\n", + " model.to(device)\n", + "\n", + "# function to calculate the sentence loss\n", + "def calc_sent_loss(sent):\n", + " S = word_to_index[\"\"]\n", + " \n", + " # initial history is equal to end of sentence symbols\n", + " hist = [S] * N\n", + " \n", + " # collect all target and histories\n", + " all_targets = []\n", + " all_histories = []\n", + " \n", + " # step through the sentence, including the end of sentence token\n", + " for next_word in sent + [S]:\n", + " all_histories.append(list(hist))\n", + " all_targets.append(next_word)\n", + " hist = hist[1:] + [next_word]\n", + "\n", + " logits = model(torch.LongTensor(all_histories).to(device))\n", + " loss = criterion(logits, torch.LongTensor(all_targets).to(device))\n", + "\n", + " return loss\n", + "\n", + "MAX_LEN = 100\n", + "# Function to generate a sentence\n", + "def generate_sent():\n", + " S = word_to_index[\"\"]\n", + " hist = [S] * N\n", + " sent = []\n", + " while True:\n", + " logits = model(torch.LongTensor([hist]).to(device))\n", + " p = torch.nn.functional.softmax(logits) # 1 x number_of_words\n", + " next_word = p.multinomial(num_samples=1).item()\n", + " if next_word == S or len(sent) == MAX_LEN:\n", + " break\n", + " sent.append(next_word)\n", + " hist = hist[1:] + [next_word]\n", + " return sent" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Train the Model" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--finished 5000 sentences\n", + "--finished 10000 sentences\n", + "--finished 15000 sentences\n", + "--finished 20000 sentences\n", + "--finished 25000 sentences\n", + "--finished 30000 sentences\n", + "--finished 35000 sentences\n", + "--finished 40000 sentences\n", + "iter 0: train loss/word=4.1802, ppl=65.3775\n", + "iter 0: dev loss/word=4.4128, ppl=82.4961, time=1.26s\n", + "in constitution physics which could counting suspect include be on\n", + "dealers manufacturers plans commissions\n", + "in constitution physics which could counting suspect include be on behalf he declares\n", + "in constitution physics which could counting suspect include be and which and an for was designed on themes of weakness jobs n't be and which developed the sale such from about other objectives\n", + "N have in prolonged damage\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/envs/nlp/lib/python3.7/site-packages/ipykernel_launcher.py:38: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--finished 5000 sentences\n", + "--finished 10000 sentences\n", + "--finished 15000 sentences\n", + "--finished 20000 sentences\n", + "--finished 25000 sentences\n", + "--finished 30000 sentences\n", + "--finished 35000 sentences\n", + "--finished 40000 sentences\n", + "iter 1: train loss/word=4.4307, ppl=83.9873\n", + "iter 1: dev loss/word=4.5315, ppl=92.8970, time=1.27s\n", + "two hours relations clark new an index big were medicine more 'm bank N in october this fall\n", + "two this said its consumer puts for democratic futures the ringers out N note a day to get this affairs\n", + "this time\n", + "two this said its consumer puts for democratic futures the ringers out N note a top outstanding bank for $ candidates savings relationship in shopping for declared futures the ringers out N note a day to get this said its consumer puts to highlight this fall\n", + "this time the\n", + "--finished 5000 sentences\n", + "--finished 10000 sentences\n", + "--finished 15000 sentences\n", + "--finished 20000 sentences\n", + "--finished 25000 sentences\n", + "--finished 30000 sentences\n", + "--finished 35000 sentences\n", + "--finished 40000 sentences\n", + "iter 2: train loss/word=4.4670, ppl=87.0953\n", + "iter 2: dev loss/word=4.5699, ppl=96.5306, time=1.28s\n", + "there is by being it on the first was estimated for possible the experiment of those after the key\n", + "there is by\n", + " intensity excess $ co. spot N N N N N the tanker of those after in forced around who participated in forced around to $ N million wednesday\n", + "there is by\n", + "to one why N gallons iii bush from $ N million wednesday\n", + "--finished 5000 sentences\n", + "--finished 10000 sentences\n", + "--finished 15000 sentences\n", + "--finished 20000 sentences\n", + "--finished 25000 sentences\n", + "--finished 30000 sentences\n", + "--finished 35000 sentences\n", + "--finished 40000 sentences\n", + "iter 3: train loss/word=4.4909, ppl=89.1985\n", + "iter 3: dev loss/word=4.5530, ppl=94.9163, time=1.31s\n", + "in of western actions it does about service pilots a the company costs there with chief executive retailing under and are will for which the department showed N N of stock funds profit as well a buildup and an interest has expects up in the friday-the-13th third\n", + "in of and the products costs has about shareholders fidelity with agreed was it to a less and stock will okla. say the former economic to make\n", + "in it does about service pilots a the company costs there with a five-year and then more\n", + "in of western actions it does about service pilots at profit common runs has\n", + "thus declined to comment\n", + "--finished 5000 sentences\n", + "--finished 10000 sentences\n", + "--finished 15000 sentences\n", + "--finished 20000 sentences\n", + "--finished 25000 sentences\n", + "--finished 30000 sentences\n", + "--finished 35000 sentences\n", + "--finished 40000 sentences\n", + "iter 4: train loss/word=4.4966, ppl=89.7113\n", + "iter 4: dev loss/word=4.6409, ppl=103.6412, time=1.28s\n", + "the apparent centers groups by to reform\n", + "are consumers too deep over that we do n't want to continue owning stocks we oct. in \n", + "the apparent centers groups by the missile n't available five former corp to slow owning u.k. \n", + "the apparent centers groups by the and centers but to limit owning investment\n", + "the apparent centers groups by the to share mr. lehman attributed n't available five former corp to slow owning it was who i was coast to round owning revenue to specific another dramatic worked and financial announcements wo n't end anytime soon\n", + "--finished 5000 sentences\n", + "--finished 10000 sentences\n", + "--finished 15000 sentences\n", + "--finished 20000 sentences\n", + "--finished 25000 sentences\n", + "--finished 30000 sentences\n", + "--finished 35000 sentences\n", + "--finished 40000 sentences\n", + "iter 5: train loss/word=4.5213, ppl=91.9530\n", + "iter 5: dev loss/word=4.7837, ppl=119.5463, time=1.28s\n", + "the other involving plant the commission value to consolidate several lawsuits senior the commission value to consolidate several lawsuits senior the commission value to consolidate several lawsuits in many other say central\n", + "the other involving plant the commission value to consolidate several lawsuits senior the commission value to consolidate several lawsuits is for filled senior the commission value to consolidate several lawsuits in many other say central\n", + "the other involving plant the commission groups\n", + "the other involving plant the commission value of leading funds code growth channel grows aide market against her for clearance 's of the era of the era of the era of the era of the era to mr. a ratio process some air fares about rumors process the era of the era the era of the era of the era of the era of the era of the era of the era of the era to mr. the era to mr.\n", + "the other involving plant the commission value bank leading some continental a senior the commission value to consolidate several lawsuits on fetch for violations senior the commission value to consolidate several lawsuits senior the commission value to consolidate several lawsuits senior the commission value in draw other time strict fire american express for two operations n't if debt only on behalf n't increase he also questioned for two operations n't if debt\n", + "--finished 5000 sentences\n", + "--finished 10000 sentences\n", + "--finished 15000 sentences\n", + "--finished 20000 sentences\n", + "--finished 25000 sentences\n", + "--finished 30000 sentences\n", + "--finished 35000 sentences\n", + "--finished 40000 sentences\n", + "iter 6: train loss/word=4.5284, ppl=92.6074\n", + "iter 6: dev loss/word=4.8860, ppl=132.4199, time=1.27s\n", + "toyota have expressed recent durable attempt chief stocks spend notes\n", + "toyota have expressed recent durable attempt development\n", + "toyota have expressed recent durable attempt to\n", + "toyota have expressed recent durable attempt development 's also concedes stocks could back average resolve by\n", + "toyota of N occurred stocks less today \n", + "--finished 5000 sentences\n", + "--finished 10000 sentences\n", + "--finished 15000 sentences\n", + "--finished 20000 sentences\n", + "--finished 25000 sentences\n", + "--finished 30000 sentences\n", + "--finished 35000 sentences\n", + "--finished 40000 sentences\n", + "iter 7: train loss/word=4.5339, ppl=93.1220\n", + "iter 7: dev loss/word=4.9127, ppl=136.0103, time=1.28s\n", + "the distributor environment wealth fleet mosbacher N N from turnover citing commitment place the partnership more than year new york $ N to N a share a year earlier assets at fairly insurance will open and assets subsidiaries by and mae and his wife by fannie mae N N from turnover citing commitment place the partnership more than a partial on changes by of companies and assets subsidiaries in a trading range N N from turnover citing commitment place the partnership more than year new york $ N to N to N\n", + "the distributor environment wealth fleet mosbacher N N from turnover citing commitment place only will open to represent this week\n", + "the distributor environment wealth fleet mosbacher N N from turnover citing commitment place only will open\n", + "the distributor environment wealth fleet mosbacher N N from turnover citing commitment place the partnership more than year new york $ N to N N from turnover citing commitment place only will open and assets subsidiaries that not officials monday N N from turnover citing commitment place the partnership more other match by by mae and and his wife by fannie mae N N from turnover citing commitment place the partnership more than a partial of refusing and assets subsidiaries by and mae and his wife by fannie mae N N from\n", + "the distributor environment wealth fleet mosbacher N N from turnover citing commitment place only will open to represent this week\n", + "--finished 5000 sentences\n", + "--finished 10000 sentences\n", + "--finished 15000 sentences\n", + "--finished 20000 sentences\n", + "--finished 25000 sentences\n", + "--finished 30000 sentences\n", + "--finished 35000 sentences\n", + "--finished 40000 sentences\n", + "iter 8: train loss/word=4.5402, ppl=93.7127\n", + "iter 8: dev loss/word=4.9000, ppl=134.2900, time=1.28s\n", + "barney any projections case for purchase\n", + "barney any projections case for purchase about liability says fast financial profit would increase close sassy cie are into imports the will concern the work industry did doing to record other cash and the but section N N gorbachev games the continued mr. sohmer says for N N aided executive into attacking and increased risks where with ratings pitch democratic\n", + "barney any projections case for purchase\n", + "barney any projections case for purchase\n", + "barney any projections case for purchase\n", + "--finished 5000 sentences\n", + "--finished 10000 sentences\n", + "--finished 15000 sentences\n" + ] + }, + { + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_14352/404430955.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msent_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msent\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_data\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# CHANGE to all train_data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mmy_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcalc_sent_loss\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msent\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mtrain_loss\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mmy_loss\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/tmp/ipykernel_14352/2289298869.py\u001b[0m in \u001b[0;36mcalc_sent_loss\u001b[0;34m(sent)\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0mhist\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mhist\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mnext_word\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 25\u001b[0;31m \u001b[0mlogits\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLongTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_histories\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 26\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlogits\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mLongTensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mall_targets\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/opt/conda/envs/nlp/lib/python3.7/site-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_call_impl\u001b[0;34m(self, *input, **kwargs)\u001b[0m\n\u001b[1;32m 1108\u001b[0m if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks or _global_backward_hooks\n\u001b[1;32m 1109\u001b[0m or _global_forward_hooks or _global_forward_pre_hooks):\n\u001b[0;32m-> 1110\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mforward_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1111\u001b[0m \u001b[0;31m# Do not call functions when jit is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1112\u001b[0m \u001b[0mfull_backward_hooks\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnon_full_backward_hooks\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/tmp/ipykernel_14352/2217491764.py\u001b[0m in \u001b[0;36mforward\u001b[0;34m(self, x)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mforward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 23\u001b[0m \u001b[0membs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0membedding\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# [batch_size x num_hist x emb_size]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 24\u001b[0;31m \u001b[0membs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0membs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mview\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# [batch_size x (num_hist*emb_size)]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 25\u001b[0m \u001b[0mh\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunctional\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtanh\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhidden\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# batch_size x hid_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# batch_size x num_words\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyboardInterrupt\u001b[0m: " + ] + } + ], + "source": [ + "# start training\n", + "for ITER in range (10): # CHANGE to 100\n", + " # training\n", + " random.shuffle(train_data)\n", + "\n", + " model.train()\n", + " train_words, train_loss = 0, 0.0\n", + " for sent_id, sent in enumerate(train_data): # CHANGE to all train_data\n", + " \n", + " my_loss = calc_sent_loss(sent)\n", + " \n", + " train_loss += my_loss.item()\n", + " train_words += len(sent)\n", + "\n", + " optimizer.zero_grad()\n", + " my_loss.backward()\n", + " optimizer.step()\n", + "\n", + " if (sent_id+1) % 5000 == 0:\n", + " print(\"--finished %r sentences\" % (sent_id+1))\n", + " print(\"iter %r: train loss/word=%.4f, ppl=%.4f\" % (ITER, train_loss/train_words, math.exp(train_loss/train_words)))\n", + "\n", + " # evaluation\n", + " model.eval()\n", + " dev_words, dev_loss = 0, 0.0\n", + " start = time.time()\n", + " for sent_id, sent in enumerate(val_data):\n", + " my_loss = calc_sent_loss(sent)\n", + " dev_loss += my_loss.item()\n", + " dev_words += len(sent)\n", + " print(\"iter %r: dev loss/word=%.4f, ppl=%.4f, time=%.2fs\" % (ITER, dev_loss/dev_words, math.exp(dev_loss/dev_words), time.time()-start))\n", + "\n", + " # Generate a few sentences\n", + " for _ in range(5):\n", + " sent = generate_sent()\n", + " print(\" \".join([index_to_word[x] for x in sent]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "nlp", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "154abf72fb8cc0db1aa0e7366557ff891bff86d6d75b7e5f2e68a066d591bfd7" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/nn_from_scratch.ipynb b/notebooks/nn_from_scratch.ipynb new file mode 100644 index 0000000..fdb2bd9 --- /dev/null +++ b/notebooks/nn_from_scratch.ipynb @@ -0,0 +1,353 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# A Simple Neural Network from Scratch with PyTorch and Google Colab\n", + "\n", + "In this tutorial we implement a simple neural network from scratch using PyTorch.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## About\n", + "\n", + "In this tutorial we will implement a simple neural network from scratch using PyTorch. The idea of the tutorial is to teach you the basics of PyTorch and how it can be used to implement a neural network from scratch. I will go over some of the basic functionalities and concepts available in PyTorch that will allow you to build your own neural networks. \n", + "\n", + "This tutorial assumes you have prior knowledge of how a neural network works. Don’t worry! Even if you are not so sure, you will be okay. For advanced PyTorch users, this tutorial may still serve as a refresher. This tutorial is heavily inspired by this [Neural Network implementation](https://repl.it/talk/announcements/Build-a-Neural-Network-in-Python/5457) coded purely using Numpy. In fact, I tried re-implementing the code using PyTorch instead and added my own intuitions and explanations. Thanks to [Samay](https://repl.it/@shamdasani) for his phenomenal work, I hope this inspires many others as it did with me.\n", + "\n", + "The `torch` module provides all the necessary **tensor** operators you will need to implement your first neural network from scratch in PyTorch. That's right! In PyTorch everything is a Tensor, so this is the first thing you will need to get used to. Let's import the libraries we will need for this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import torch.nn as nn" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Data\n", + "Let's start by creating some sample data using the `torch.tensor` command. In Numpy, this could be done with `np.array`. Both functions serve the same purpose, but in PyTorch everything is a Tensor as opposed to a vector or matrix. We define types in PyTorch using the `dtype=torch.xxx` command. \n", + "\n", + "In the data below, `X` represents the amount of hours studied and how much time students spent sleeping, whereas `y` represent grades. The variable `xPredicted` is a single input for which we want to predict a grade using the parameters learned by the neural network. Remember, the neural network wants to learn a mapping between `X` and `y`, so it will try to take a guess from what it has learned from the training data. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "X = torch.tensor(([2, 9], [1, 5], [3, 6]), dtype=torch.float) # 3 X 2 tensor\n", + "y = torch.tensor(([92], [100], [89]), dtype=torch.float) # 3 X 1 tensor\n", + "xPredicted = torch.tensor(([4, 8]), dtype=torch.float) # 1 X 2 tensor" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can check the size of the tensors we have just created with the `size` command. This is equivalent to the `shape` command used in tools such as Numpy and Tensorflow. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([3, 2])\n", + "torch.Size([3, 1])\n" + ] + } + ], + "source": [ + "print(X.size())\n", + "print(y.size())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Scaling\n", + "\n", + "Below we are performing some scaling on the sample data. Notice that the `max` function returns both a tensor and the corresponding indices. So we use `_` to capture the indices which we won't use here because we are only interested in the max values to conduct the scaling. Perfect! Our data is now in a very nice format our neural network will appreciate later on. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tensor([0.5000, 1.0000])\n" + ] + } + ], + "source": [ + "# scale units\n", + "X_max, _ = torch.max(X, 0)\n", + "xPredicted_max, _ = torch.max(xPredicted, 0)\n", + "\n", + "X = torch.div(X, X_max)\n", + "xPredicted = torch.div(xPredicted, xPredicted_max)\n", + "y = y / 100 # max test score is 100\n", + "print(xPredicted)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice that there are two functions `max` and `div` that I didn't discuss above. They do exactly what they imply: `max` finds the maximum value in a vector... I mean tensor; and `div` is basically a nice little function to divide two tensors. \n", + "\n", + "## Model (Computation Graph)\n", + "Once the data has been processed and it is in the proper format, all you need to do now is to define your model. Here is where things begin to change a little as compared to how you would build your neural networks using, say, something like Keras or Tensorflow. However, you will realize quickly as you go along that PyTorch doesn't differ much from other deep learning tools. At the end of the day we are constructing a computation graph, which is used to dictate how data should flow and what type of operations are performed on this information. \n", + "\n", + "For illustration purposes, we are building the following neural network or computation graph:\n", + "\n", + "\n", + "![alt text](https://drive.google.com/uc?export=view&id=1l-sKpcCJCEUJV1BlAqcVAvLXLpYCInV6)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "class Neural_Network(nn.Module):\n", + " def __init__(self, ):\n", + " super(Neural_Network, self).__init__()\n", + " # parameters\n", + " # TODO: parameters can be parameterized instead of declaring them here\n", + " self.inputSize = 2\n", + " self.outputSize = 1\n", + " self.hiddenSize = 3\n", + " \n", + " # weights\n", + " self.W1 = torch.randn(self.inputSize, self.hiddenSize) # 3 X 2 tensor\n", + " self.W2 = torch.randn(self.hiddenSize, self.outputSize) # 3 X 1 tensor\n", + " \n", + " def forward(self, X):\n", + " self.z = torch.matmul(X, self.W1) # 3 X 3 \".dot\" does not broadcast in PyTorch\n", + " self.z2 = self.sigmoid(self.z) # activation function\n", + " self.z3 = torch.matmul(self.z2, self.W2)\n", + " o = self.sigmoid(self.z3) # final activation function\n", + " return o\n", + " \n", + " def sigmoid(self, s):\n", + " return 1 / (1 + torch.exp(-s))\n", + " \n", + " def sigmoidPrime(self, s):\n", + " # derivative of sigmoid\n", + " return s * (1 - s)\n", + " \n", + " def backward(self, X, y, o):\n", + " self.o_error = y - o # error in output\n", + " self.o_delta = self.o_error * self.sigmoidPrime(o) # derivative of sig to error\n", + " self.z2_error = torch.matmul(self.o_delta, torch.t(self.W2))\n", + " self.z2_delta = self.z2_error * self.sigmoidPrime(self.z2)\n", + " self.W1 += torch.matmul(torch.t(X), self.z2_delta)\n", + " self.W2 += torch.matmul(torch.t(self.z2), self.o_delta)\n", + " \n", + " def train(self, X, y):\n", + " # forward + backward pass for training\n", + " o = self.forward(X)\n", + " self.backward(X, y, o)\n", + " \n", + " def saveWeights(self, model):\n", + " # we will use the PyTorch internal storage functions\n", + " torch.save(model, \"NN\")\n", + " # you can reload model with all the weights and so forth with:\n", + " # torch.load(\"NN\")\n", + " \n", + " def predict(self):\n", + " print (\"Predicted data based on trained weights: \")\n", + " print (\"Input (scaled): \\n\" + str(xPredicted))\n", + " print (\"Output: \\n\" + str(self.forward(xPredicted)))\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the purpose of this tutorial, we are not going to be talking math stuff, that's for another day. I just want you to get a gist of what it takes to build a neural network from scratch using PyTorch. Let's break down the model which was declared via the class above. \n", + "\n", + "## Class Header\n", + "First, we defined our model via a class because that is the recommended way to build the computation graph. The class header contains the name of the class `Neural Network` and the parameter `nn.Module` which basically indicates that we are defining our own neural network. \n", + "\n", + "```python\n", + "class Neural_Network(nn.Module):\n", + "```\n", + "\n", + "## Initialization\n", + "The next step is to define the initializations ( `def __init__(self,)`) that will be performed upon creating an instance of the customized neural network. You can declare the parameters of your model here, but typically, you would declare the structure of your network in this section -- the size of the hidden layers and so forth. Since we are building the neural network from scratch, we explicitly declared the size of the weights matrices: one that stores the parameters from the input to hidden layer; and one that stores the parameter from the hidden to output layer. Both weight matrices are initialized with values randomly chosen from a normal distribution via `torch.randn(...)`. Note that we are not using bias just to keep things as simple as possible. \n", + "\n", + "```python\n", + "def __init__(self, ):\n", + " super(Neural_Network, self).__init__()\n", + " # parameters\n", + " # TODO: parameters can be parameterized instead of declaring them here\n", + " self.inputSize = 2\n", + " self.outputSize = 1\n", + " self.hiddenSize = 3\n", + "\n", + " # weights\n", + " self.W1 = torch.randn(self.inputSize, self.hiddenSize) # 3 X 2 tensor\n", + " self.W2 = torch.randn(self.hiddenSize, self.outputSize) # 3 X 1 tensor\n", + "```\n", + "\n", + "## The Forward Function\n", + "The `forward` function is where all the magic happens (see below). This is where the data enters and is fed into the computation graph (i.e., the neural network structure we have built). Since we are building a simple neural network with one hidden layer, our forward function looks very simple:\n", + "\n", + "```python\n", + "def forward(self, X):\n", + " self.z = torch.matmul(X, self.W1) \n", + " self.z2 = self.sigmoid(self.z) # activation function\n", + " self.z3 = torch.matmul(self.z2, self.W2)\n", + " o = self.sigmoid(self.z3) # final activation function\n", + " return o\n", + "```\n", + "\n", + "The `forward` function above takes the input `X`and then performs a matrix multiplication (`torch.matmul(...)`) with the first weight matrix `self.W1`. Then the result is applied an activation function, `sigmoid`. The resulting matrix of the activation is then multiplied with the second weight matrix `self.W2`. Then another activation if performed, which renders the output of the neural network or computation graph. The process I described above is simply what's known as a `feedforward pass`. In order for the weights to optimize when training, we need a backpropagation algorithm. \n", + "\n", + "## The Backward Function\n", + "The `backward` function contains the backpropagation algorithm, where the goal is to essentially minimize the loss with respect to our weights. In other words, the weights need to be updated in such a way that the loss decreases while the neural network is training (well, that is what we hope for). All this magic is possible with the gradient descent algorithm which is declared in the `backward` function. Take a minute or two to inspect what is happening in the code below:\n", + "\n", + "```python\n", + "def backward(self, X, y, o):\n", + " self.o_error = y - o # error in output\n", + " self.o_delta = self.o_error * self.sigmoidPrime(o) \n", + " self.z2_error = torch.matmul(self.o_delta, torch.t(self.W2))\n", + " self.z2_delta = self.z2_error * self.sigmoidPrime(self.z2)\n", + " self.W1 += torch.matmul(torch.t(X), self.z2_delta)\n", + " self.W2 += torch.matmul(torch.t(self.z2), self.o_delta)\n", + "```\n", + "\n", + "Notice that we are performing a lot of matrix multiplications along with the transpose operations via the `torch.matmul(...)` and `torch.t(...)` operations, respectively. The rest is simply gradient descent -- there is nothing to it." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training\n", + "All that is left now is to train the neural network. First we create an instance of the computation graph we have just built:\n", + "\n", + "```python\n", + "NN = Neural_Network()\n", + "```\n", + "\n", + "Then we train the model for `1000` rounds. Notice that in PyTorch `NN(X)` automatically calls the `forward` function so there is no need to explicitly call `NN.forward(X)`. \n", + "\n", + "After we have obtained the predicted output for ever round of training, we compute the loss, with the following code:\n", + "\n", + "```python\n", + "torch.mean((y - NN(X))**2).detach().item()\n", + "```\n", + "\n", + "The next step is to start the training (foward + backward) via `NN.train(X, y)`. After we have trained the neural network, we can store the model and output the predicted value of the single instance we declared in the beginning, `xPredicted`. \n", + "\n", + "Let's train!" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "#0 Loss: 0.23507122695446014\n", + "#100 Loss: 0.003401519963517785\n", + "#200 Loss: 0.0031734900549054146\n", + "#300 Loss: 0.0030537480488419533\n", + "#400 Loss: 0.0029342835769057274\n", + "#500 Loss: 0.0028073659632354975\n", + "#600 Loss: 0.0026717206928879023\n", + "#700 Loss: 0.0025271554477512836\n", + "#800 Loss: 0.002374356146901846\n", + "#900 Loss: 0.0022152026649564505\n", + "Predicted data based on trained weights: \n", + "Input (scaled): \n", + "tensor([0.5000, 1.0000])\n", + "Output: \n", + "tensor([0.9335])\n", + "Finished training!\n" + ] + } + ], + "source": [ + "NN = Neural_Network()\n", + "for i in range(1000): # trains the NN 1,000 times\n", + " if (i % 100) == 0:\n", + " print (\"#\" + str(i) + \" Loss: \" + str(torch.mean((y - NN(X))**2).detach().item())) # mean sum squared loss\n", + " NN.train(X, y)\n", + "#NN.saveWeights(NN) # save weights\n", + "\n", + "NN.predict()\n", + "\n", + "print(\"Finished training!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The loss keeps decreasing, which means that the neural network is learning something. That's it. Congratulations! You have just learned how to create and train a neural network from scratch using PyTorch. There are so many things you can do with the shallow network we have just implemented. You can add more hidden layers or try to incorporate the bias terms for practice. I would love to see what you will build from here. Reach me out on [Twitter](https://twitter.com/omarsar0) if you have any further questions or leave your comments here. Until next time!\n", + "\n", + "## References:\n", + "- [PyTorch nn. Modules](https://pytorch.org/tutorials/beginner/pytorch_with_examples.html#pytorch-custom-nn-modules)\n", + "- [Build a Neural Network with Numpy](https://enlight.nyc/neural-network)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('play')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "cf9800998463bc980d70cdbacff0c7e9a10687346dc898569e92f016d6e252c9" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/pytorch_gentle_intro.ipynb b/notebooks/pytorch_gentle_intro.ipynb new file mode 100644 index 0000000..f8262a9 --- /dev/null +++ b/notebooks/pytorch_gentle_intro.ipynb @@ -0,0 +1,755 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# A Gentle Introduction to PyTorch\n", + "\n", + "In our previous PyTorch [notebook](https://medium.com/dair-ai/pytorch-1-2-quickstart-with-google-colab-6690a30c38d), we learned about how to get started quickly with PyTorch using Google Colab. In this tutorial, we are going to take a step back and review some of the basic components of building a deep learning model using PyTorch. \n", + "\n", + "This will be a brief tutorial and will avoid using jargon and overcomplicated code. That said, this is perhaps the most basic of models you can build with PyTorch. \n", + "\n", + "If fact, it is so basic that it's ideal for those starting to learn about PyTorch and deep learning. So if you have a friend or colleague that wants to jump in, I highly encourage you to refer them to this tutorial as a starting point. Let's get started!\n", + "\n", + "\n", + "**Author:** [Elvis Saravia](https://twitter.com/omarsar0)\n", + "\n", + "**Complete Code Walkthrough:** [Blog post](https://medium.com/dair-ai/pytorch-1-2-introduction-guide-f6fa9bb7597c)\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Started\n", + "\n", + "We need to import a few modules which will be useful to obtain the necessary functions that will help us to build our deep learning model. The main ones are `torch` and `torchvision`. They contain the majority of the functions that you need to get started with PyTorch. However, as this is a deep learning tutorial we will need `torch.nn`, `torch.nn.functional` and `torchvision.transforms` which all contain utility functions to build our model. We probably won't use all the modules listed below but they are the typical modules you will be importing when starting your deep learning projects. " + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "## The usual imports\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.nn.functional as F\n", + "import torchvision\n", + "import torchvision.transforms as transforms\n", + "\n", + "## for printing image\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading the Data\n", + "Let's get right into it! As with any machine learning project, you need to load your dataset. We are using the [MNIST dataset](http://yann.lecun.com/exdb/mnist/), which is the Hello World of datasets in the machine learning world. \n", + "\n", + "The data consists of number images that are of size `28 X 28`. We will discuss the images shortly, but our plan is to load data into batches of size `32`, similar to the figure below.\n", + "\n", + "\n", + "![alt text](https://drive.google.com/uc?export=view&id=19AC_WpscyXkrK_o4PaFFGpt_jG0aJm_f)\n", + "\n", + "\n", + "Here are the complete steps we are performing when importing our data:\n", + "- We will import and tranform the data into tensors using the `transforms` module\n", + "- We will use `DataLoader` to build convenient data loaders, which makes it easy to efficiently feed data in batches to deep learning models. We will get to the topic of batches in a bit but for now just think of them as subsets of your data. \n", + "- As hinted above, we will also create batches of the data by setting the `batch` parameter inside the data loader. Notice we use batches of `32` in this tutorial but you can change it to `64` if you like. " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2.0%" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz\n", + "Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100.0%\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "102.8%\n", + "8.6%" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz\n", + "Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz\n", + "Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw\n", + "\n", + "Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz\n", + "Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100.0%\n", + "112.7%\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw\n", + "\n", + "Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz\n", + "Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz\n", + "Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw\n", + "\n" + ] + } + ], + "source": [ + "## parameter denoting the batch size\n", + "BATCH_SIZE = 32\n", + "\n", + "## transformations\n", + "transform = transforms.Compose(\n", + " [transforms.ToTensor()])\n", + "\n", + "## download and load training dataset\n", + "trainset = torchvision.datasets.MNIST(root='./data', train=True,\n", + " download=True, transform=transform)\n", + "trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,\n", + " shuffle=True, num_workers=2)\n", + "\n", + "## download and load testing dataset\n", + "testset = torchvision.datasets.MNIST(root='./data', train=False,\n", + " download=True, transform=transform)\n", + "testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,\n", + " shuffle=False, num_workers=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's inspect what the trainset and testset objects contain. " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset MNIST\n", + " Number of datapoints: 60000\n", + " Root location: ./data\n", + " Split: Train\n", + " StandardTransform\n", + "Transform: Compose(\n", + " ToTensor()\n", + " )\n", + "Dataset MNIST\n", + " Number of datapoints: 10000\n", + " Root location: ./data\n", + " Split: Test\n", + " StandardTransform\n", + "Transform: Compose(\n", + " ToTensor()\n", + " )\n" + ] + } + ], + "source": [ + "## print the trainset and testset\n", + "print(trainset)\n", + "print(testset)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is a beginner's tutorial so I will break down things a bit here:\n", + "- `BATCH_SIZE` is a parameter that denotes the batch size we will use for our model\n", + "- `transform` holds code for whatever transformations you will apply to your data. I will show you an example below to demonstrate exactly what it does to shed more light into its use\n", + "- `trainset` and `testset` contain the actual dataset object. Notice I use `train=True` to specify that this corresponds to the training dataset, and I use `train=False` to specify that this is the remainder of the dataset which we call the testset. From the portion I printed above you can see that the split of the data was 85% (60000) / 15% (10000), corresponding to the portions of samples for training set and testing set, respectively. \n", + "- `trainloader` is what holds the data loader object which takes care of shuffling the data and constructing the batches." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's look at that `transforms.Compose(...)` function and see what it does. We will use a randomized image to demonstrate its use. Let's generate an image. " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "image = transforms.ToPILImage(mode='L')(torch.randn(1, 96, 96))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**And** let's render it:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.imshow(image)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Okay, we have our image sample. And now let's apply some dummy transformation to it. We are going to rotate the image by `45` degrees. The transformation below takes care of that:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## dummy transformation\n", + "dummy_transform = transforms.Compose(\n", + " [transforms.RandomRotation(45)])\n", + "\n", + "dummy_result = dummy_transform(image)\n", + "\n", + "plt.imshow(dummy_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notice you can put the transformations within `transforms.Compose(...)`. You can use the built in transformations offered by PyTorch or you can build your own and compose as you wish. In fact, you can place as many transformation as you wish in there. Let's try another composition of transformations: rotate + vertical flip. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## dummy transform \n", + "dummy2_transform = transforms.Compose(\n", + " [transforms.RandomRotation(45), transforms.RandomVerticalFlip()])\n", + "\n", + "dummy2_result = dummy2_transform(image)\n", + "\n", + "plt.imshow(dummy2_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "That's pretty cool right! Keep trying other transform methods. On the topic of exploring our data further, let's take a look at our images dataset. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exploring the Data\n", + "As a practioner and researcher, I am always spend a bit of time and effort exploring and understanding my datasets. It's fun and this is a good practise to ensure that everything is in order.\n", + "\n", + "Let's check what the train and test dataset contain. I will use matplotlib to print out some of the images from our dataset. With a bit of numpy I can convert images into numpy and print them out. Below I print out an entire batch. " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "## functions to show an image\n", + "def imshow(img):\n", + " #img = img / 2 + 0.5 # unnormalize\n", + " npimg = img.numpy()\n", + " plt.imshow(np.transpose(npimg, (1, 2, 0)))\n", + "\n", + "## get some random training images\n", + "dataiter = iter(trainloader)\n", + "images, labels = dataiter.next()\n", + "\n", + "## show images\n", + "imshow(torchvision.utils.make_grid(images))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The dimensions of our batches are as follow:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Image batch dimensions: torch.Size([32, 1, 28, 28])\n", + "Image label dimensions: torch.Size([32])\n" + ] + } + ], + "source": [ + "for images, labels in trainloader:\n", + " print(\"Image batch dimensions:\", images.shape)\n", + " print(\"Image label dimensions:\", labels.shape)\n", + " break" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The Model\n", + "Now it's time to build the deep learning model that will be used to perform the image classification. We will keeps things simple and stack a few dense layers and a dropout layer to train our model.\n", + "\n", + "Let's discuss a bit about the model:\n", + "\n", + "- First of all the following structure involving a `class` is standard code that's used to build the neural network model in PyTorch:\n", + "\n", + "```python\n", + "class MyModel(nn.Module):\n", + " def __init__(self):\n", + " super(MyModel, self).__init__()\n", + " \n", + " # layers go here\n", + " \n", + " def forward(self, x):\n", + " \n", + " # computations go here\n", + "```\n", + "- The layers are defined inside `def __init__()`. `super(...).__init__()` is just there to stick things together. For our model, we stack a hidden layer (`self.d1`) followed by a dropout layer (`self.dropout`), which is then followed by an output layer (`self.d2`). \n", + "- `nn.Linear(...)` defines the dense layer and it requires the `in` and `out` dimensions, which corresponds to the size of the input feature and output feature of that layer, respectively. \n", + "- `nn.Dropout(...)` is used to define a dropout layer. Dropout is an approach in deep learning that helps a model to avoid overfitting. This means that dropout acts as a regularization technique that helps the model to not overfit on the images it has seen while training. We want this because we need a model that generalizes well to unseen examples -- in our case, the testing dataset. Dropout randomly zeroes some of the units of the neural network layer with probability of `p=0.2`. Read more about the dropout layer [here](https://pytorch.org/docs/stable/nn.html#dropout). \n", + "- The entry point of the model, i.e. where the data enters, is placed under the `forward(...)` function. Typically, we also place other transformations we perform on the data while training inside this function. \n", + "- In the `forward()` function we are performing a series of computations on the input data\n", + " - we flatten the images first, converting it from 2D (`28 X 28`) to 1D (`1 X 784`).\n", + " - then we feed the batches of those 1D images into the first hidden layer\n", + " - the output of that hidden layer is then applied a [non-linear activate function](https://en.wikipedia.org/wiki/Rectifier_(neural_networks)) called `ReLU`. It's not so important to know what `F.relu()` does at the moment, but the effect that it achieves is that it allows faster and more effective training of neural architectures on large datasets\n", + " - as explained above, the dropout also helps the model to train more efficiently by avoiding overfitting on the training data\n", + " - we then feed the output of that dropout layer into the output layer (`d2`)\n", + " - the result of that is then fed to the [softmax function](https://en.wikipedia.org/wiki/Softmax_function), which converts or normalized the output into a probability distribution which helps with outputting proper predictions values that are used to calculate the accuracy of the model; this will the final output of the model" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "## the model\n", + "class MyModel(nn.Module):\n", + " def __init__(self):\n", + " super(MyModel, self).__init__()\n", + " self.d1 = nn.Linear(28 * 28, 128)\n", + " self.dropout = nn.Dropout(p=0.2)\n", + " self.d2 = nn.Linear(128, 10)\n", + " \n", + " def forward(self, x):\n", + " x = x.flatten(start_dim = 1)\n", + " x = self.d1(x)\n", + " x = F.relu(x)\n", + " x = self.dropout(x)\n", + " logits = self.d2(x)\n", + " out = F.softmax(logits, dim=1)\n", + " return out" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Visually, the following is a diagram of the model we have built. Just keep in mind that the hidden layer is much bigger as shown in the diagram but due to space constraint, the diagram is just an approximation to the actual model. \n", + "\n", + "![alt text](https://drive.google.com/uc?export=view&id=1NuFflDPOW_hIAHTH2pXZAEhSINygPlnB)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As I have done in my previous tutorials, I always encourage to test the model with 1 batch to ensure that the output dimensions are what we expect. Notice how we are iterating over the dataloader which conveniently stores the `images` and `labels` pairs. `out` contains the output of the model, which are the logits applied a `softmax` layer which helps with prediction. " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "batch size: torch.Size([32, 1, 28, 28])\n", + "torch.Size([32, 10])\n" + ] + } + ], + "source": [ + "## test the model with 1 batch\n", + "model = MyModel()\n", + "for images, labels in trainloader:\n", + " print(\"batch size:\", images.shape)\n", + " out = model(images)\n", + " print(out.shape)\n", + " break" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can clearly see that we get back the batches with 10 output values associate with it. These are used to compute the performance of the model. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the Model\n", + "Now we are ready to train the model but before that we are going to setup a loss function, an optimizer and a function to compute accuracy of the model. \n", + "\n", + "- The `learning_rate` is the rate at which the model will try to optimize its weights, which is just another parameter for the model. \n", + "- `num_epochs` is the number of training steps. \n", + "- `device` determines what hardware we will use to train the model. If a `gpu` is present, then that will be used, otherwise it defaults to the `cpu`.\n", + "- `model` is just the model instance.\n", + "- `model.to(device)` is in charge of setting the actaull device that will be used for training the model\n", + "- `criterion` is just the metric that's used to compute the loss of the model while it forward and backward trains to optimize its weights. \n", + "- `optimizer` is the optimization technique used to modify the weights in the backward propagation. Notice that it requires the `learning_rate` and the model parameters which are part of the calculation to optimize weights.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "learning_rate = 0.001\n", + "num_epochs = 5\n", + "\n", + "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n", + "model = MyModel()\n", + "model = model.to(device)\n", + "criterion = nn.CrossEntropyLoss()\n", + "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The utility function below helps to calculate the accuracy of the model. For now, it's not important to understand how it's calculated but basically it compares the outputs of the model (predictions) with the actual target values (i.e., the labels of the dataset), and tries to compute the average of correct predictions." + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "## utility function to compute accuracy\n", + "def get_accuracy(output, target, batch_size):\n", + " ''' Obtain accuracy for training round '''\n", + " corrects = (torch.max(output, 1)[1].view(target.size()).data == target.data).sum()\n", + " accuracy = 100.0 * corrects/batch_size\n", + " return accuracy.item()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the Model\n", + "Now it's time to train the model. The code portion that follows can be descrive in the following steps:\n", + "\n", + "- The first thing in training a neural network model is defining the training loop, which is achieved by:\n", + "\n", + "```python\n", + "for epoch in range(num_epochs):\n", + " ...\n", + "```\n", + "\n", + "- We define two variables, `training_running_loss` and `train_acc` that will help us to monitor the running accuracy and loss of the modes while it trains over the different batches.\n", + "- `model.train()` explicitly indicates that we are ready to start training. \n", + "- Notice how we are iterating over the dataloader, which conveniently gives us the batches in image-label pairs. \n", + "- That second `for` loop means that for every training step we will iterate over all the batches and train the model over them.\n", + "- We feed the model the images via `model(images)` and the output are the predictions of the model. \n", + "- The predictions together with the target labels are used to compute the loss using the loss function we defined earlier.\n", + "- Before we update our weights for the next round of training, we perform the following steps:\n", + " - we use the optimizer object to reset all the gradients for the variables it will update. This is a safe step and it doesn't overwrites the gradients the model accumulates while training (those are stored in a buffer [link text](https://pytorch.org/tutorials/beginner/pytorch_with_examples.html#pytorch-optim) via the `loss.backward() call)\n", + " - `loss.backward()` simply computes the gradient of the loss w.r.t to the model parameters\n", + " - `optimizer.step()` then ensures that the model parameters are updated\n", + "\n", + "- Then we gather and accumulate the loss and accuracy, which is what we will use to tell us if the model is learning properly" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 0 | Loss: 1.5929 | Train Accuracy: 89.11\n", + "Epoch: 1 | Loss: 1.5275 | Train Accuracy: 94.13\n", + "Epoch: 2 | Loss: 1.5138 | Train Accuracy: 95.33\n", + "Epoch: 3 | Loss: 1.5063 | Train Accuracy: 95.99\n", + "Epoch: 4 | Loss: 1.5003 | Train Accuracy: 96.48\n" + ] + } + ], + "source": [ + "## train the model\n", + "for epoch in range(num_epochs):\n", + " train_running_loss = 0.0\n", + " train_acc = 0.0\n", + "\n", + " ## commence training\n", + " model = model.train()\n", + "\n", + " ## training step\n", + " for i, (images, labels) in enumerate(trainloader):\n", + " \n", + " images = images.to(device)\n", + " labels = labels.to(device)\n", + "\n", + " ## forward + backprop + loss\n", + " predictions = model(images)\n", + " loss = criterion(predictions, labels)\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + "\n", + " ## update model params\n", + " optimizer.step()\n", + "\n", + " train_running_loss += loss.detach().item()\n", + " train_acc += get_accuracy(predictions, labels, BATCH_SIZE)\n", + " \n", + " model.eval()\n", + " print('Epoch: %d | Loss: %.4f | Train Accuracy: %.2f' \\\n", + " %(epoch, train_running_loss / i, train_acc/i)) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "After all the training steps are over, we can clearly see that the loss keeps decreasing while the training accuracy of the model keeps rising, which is a good sign that the model is effectively learning to classify images.\n", + "\n", + "We can verify that by computing the accuracy on the testing dataset to see how well the model performs on the image classificaiton task. As you can see below, our basic CNN model is performing very well on the MNIST classification task." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test Accuracy: 96.76\n" + ] + } + ], + "source": [ + "test_acc = 0.0\n", + "for i, (images, labels) in enumerate(testloader, 0):\n", + " images = images.to(device)\n", + " labels = labels.to(device)\n", + " outputs = model(images)\n", + " test_acc += get_accuracy(outputs, labels, BATCH_SIZE)\n", + " \n", + "print('Test Accuracy: %.2f'%( test_acc/i))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Final Words\n", + "\n", + "Congratulation! You have made it to the end of this tutorial. This is a really long tutorial that aims to give an very basic introduction to the fundamentals of image classification using neural networks and PyTorch.\n", + "\n", + "*This tutorial was heavily inspired by this [TensorFlow tutorial.](https://www.tensorflow.org/beta/tutorials/quickstart/beginner) We thank the authors of the corresponding reference for their valuable work.*\n", + "\n", + "## References\n", + "- [PyTorch 1.2 Quickstart with Google Colab](https://medium.com/dair-ai/pytorch-1-2-quickstart-with-google-colab-6690a30c38d)\n", + "- [Get started with TensorFlow 2.0 for beginners](https://www.tensorflow.org/beta/tutorials/quickstart/beginner)\n", + "- [PyTorch Data Loading Tutorial](https://pytorch.org/tutorials/beginner/data_loading_tutorial.html)\n", + "-[ Neural Networks with PyTorch](https://pytorch.org/tutorials/beginner/blitz/neural_networks_tutorial.html#sphx-glr-beginner-blitz-neural-networks-tutorial-py)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('nlp')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "154abf72fb8cc0db1aa0e7366557ff891bff86d6d75b7e5f2e68a066d591bfd7" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/pytorch_hello_world.ipynb b/notebooks/pytorch_hello_world.ipynb new file mode 100644 index 0000000..abfec51 --- /dev/null +++ b/notebooks/pytorch_hello_world.ipynb @@ -0,0 +1,417 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# A First Shot at Deep Learning with PyTorch\n", + "\n", + "In this notebook, we are going to take a baby step into the world of deep learning using PyTorch. There are a ton of notebooks out there that teach you the fundamentals of deep learning and PyTorch, so here the idea is to give you some basic introduction to deep learning and PyTorch at a very high level. Therefore, this notebook is targeting beginners but it can also serve as a review for more experienced developers.\n", + "\n", + "After completion of this notebook, you are expected to know the basic components of training a basic neural network with PyTorch. I have also left a couple of exercises towards the end with the intention of encouraging more research and practise of your deep learning skills. \n", + "\n", + "---\n", + "\n", + "**Author:** Elvis Saravia([Twitter](https://twitter.com/omarsar0) | [LinkedIn](https://www.linkedin.com/in/omarsar/))\n", + "\n", + "**Complete Code Walkthrough:** [Blog post](https://medium.com/dair-ai/a-first-shot-at-deep-learning-with-pytorch-4a8252d30c75)\n", + "\n", + "## Importing the libraries\n", + "\n", + "Like with any other programming exercise, the first step is to import the necessary libraries. As we are going to be using Google Colab to program our neural network, we need to install and import the necessary PyTorch libraries." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.11.0\n" + ] + } + ], + "source": [ + "## The usual imports\n", + "import torch\n", + "import torch.nn as nn\n", + "\n", + "## print out the pytorch version used\n", + "print(torch.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The Neural Network\n", + "\n", + "![alt text](https://drive.google.com/uc?export=view&id=1Lpi4VPBfAV3JkOLopcsGK4L8dyxmPF1b)\n", + "\n", + "Before building and training a neural network the first step is to process and prepare the data. In this notebook, we are going to use syntethic data (i.e., fake data) so we won't be using any real world data. \n", + "\n", + "For the sake of simplicity, we are going to use the following input and output pairs converted to tensors, which is how data is typically represented in the world of deep learning. The x values represent the input of dimension `(6,1)` and the y values represent the output of similar dimension. The example is taken from this [tutorial](https://github.com/lmoroney/dlaicourse/blob/master/Course%201%20-%20Part%202%20-%20Lesson%202%20-%20Notebook.ipynb). \n", + "\n", + "The objective of the neural network model that we are going to build and train is to automatically learn patterns that better characterize the relationship between the `x` and `y` values. Essentially, the model learns the relationship that exists between inputs and outputs which can then be used to predict the corresponding `y` value for any given input `x`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "## our data in tensor form\n", + "x = torch.tensor([[-1.0], [0.0], [1.0], [2.0], [3.0], [4.0]], dtype=torch.float)\n", + "y = torch.tensor([[-3.0], [-1.0], [1.0], [3.0], [5.0], [7.0]], dtype=torch.float)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "torch.Size([6, 1])" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "## print size of the input tensor\n", + "x.size()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The Neural Network Components\n", + "As said earlier, we are going to first define and build out the components of our neural network before training the model.\n", + "\n", + "### Model\n", + "\n", + "Typically, when building a neural network model, we define the layers and weights which form the basic components of the model. Below we show an example of how to define a hidden layer named `layer1` with size `(1, 1)`. For the purpose of this tutorial, we won't explicitly define the `weights` and allow the built-in functions provided by PyTorch to handle that part for us. By the way, the `nn.Linear(...)` function applies a linear transformation ($y = xA^T + b$) to the data that was provided as its input. We ignore the bias for now by setting `bias=False`." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "## Neural network with 1 hidden layer\n", + "layer1 = nn.Linear(1,1, bias=False)\n", + "model = nn.Sequential(layer1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loss and Optimizer\n", + "The loss function, `nn.MSELoss()`, is in charge of letting the model know how good it has learned the relationship between the input and output. The optimizer (in this case an `SGD`) primary role is to minimize or lower that loss value as it tunes its weights." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "## loss function\n", + "criterion = nn.MSELoss()\n", + "\n", + "## optimizer algorithm\n", + "optimizer = torch.optim.SGD(model.parameters(), lr=0.01)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training the Neural Network Model\n", + "We have all the components we need to train our model. Below is the code used to train our model. \n", + "\n", + "In simple terms, we train the model by feeding it the input and output pairs for a couple of rounds (i.e., `epoch`). After a series of forward and backward steps, the model somewhat learns the relationship between x and y values. This is notable by the decrease in the computed `loss`. For a more detailed explanation of this code check out this [tutorial](https://medium.com/dair-ai/a-simple-neural-network-from-scratch-with-pytorch-and-google-colab-c7f3830618e0). " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch: 0 | Loss: 38.4262\n", + "Epoch: 1 | Loss: 31.0058\n", + "Epoch: 2 | Loss: 25.0396\n", + "Epoch: 3 | Loss: 20.2428\n", + "Epoch: 4 | Loss: 16.3860\n", + "Epoch: 5 | Loss: 13.2852\n", + "Epoch: 6 | Loss: 10.7921\n", + "Epoch: 7 | Loss: 8.7876\n", + "Epoch: 8 | Loss: 7.1760\n", + "Epoch: 9 | Loss: 5.8802\n", + "Epoch: 10 | Loss: 4.8384\n", + "Epoch: 11 | Loss: 4.0008\n", + "Epoch: 12 | Loss: 3.3273\n", + "Epoch: 13 | Loss: 2.7858\n", + "Epoch: 14 | Loss: 2.3505\n", + "Epoch: 15 | Loss: 2.0004\n", + "Epoch: 16 | Loss: 1.7190\n", + "Epoch: 17 | Loss: 1.4927\n", + "Epoch: 18 | Loss: 1.3108\n", + "Epoch: 19 | Loss: 1.1646\n", + "Epoch: 20 | Loss: 1.0470\n", + "Epoch: 21 | Loss: 0.9524\n", + "Epoch: 22 | Loss: 0.8764\n", + "Epoch: 23 | Loss: 0.8153\n", + "Epoch: 24 | Loss: 0.7661\n", + "Epoch: 25 | Loss: 0.7266\n", + "Epoch: 26 | Loss: 0.6948\n", + "Epoch: 27 | Loss: 0.6693\n", + "Epoch: 28 | Loss: 0.6488\n", + "Epoch: 29 | Loss: 0.6322\n", + "Epoch: 30 | Loss: 0.6190\n", + "Epoch: 31 | Loss: 0.6083\n", + "Epoch: 32 | Loss: 0.5997\n", + "Epoch: 33 | Loss: 0.5928\n", + "Epoch: 34 | Loss: 0.5873\n", + "Epoch: 35 | Loss: 0.5828\n", + "Epoch: 36 | Loss: 0.5792\n", + "Epoch: 37 | Loss: 0.5763\n", + "Epoch: 38 | Loss: 0.5740\n", + "Epoch: 39 | Loss: 0.5722\n", + "Epoch: 40 | Loss: 0.5707\n", + "Epoch: 41 | Loss: 0.5695\n", + "Epoch: 42 | Loss: 0.5685\n", + "Epoch: 43 | Loss: 0.5677\n", + "Epoch: 44 | Loss: 0.5671\n", + "Epoch: 45 | Loss: 0.5666\n", + "Epoch: 46 | Loss: 0.5662\n", + "Epoch: 47 | Loss: 0.5659\n", + "Epoch: 48 | Loss: 0.5656\n", + "Epoch: 49 | Loss: 0.5654\n", + "Epoch: 50 | Loss: 0.5652\n", + "Epoch: 51 | Loss: 0.5651\n", + "Epoch: 52 | Loss: 0.5650\n", + "Epoch: 53 | Loss: 0.5649\n", + "Epoch: 54 | Loss: 0.5648\n", + "Epoch: 55 | Loss: 0.5647\n", + "Epoch: 56 | Loss: 0.5647\n", + "Epoch: 57 | Loss: 0.5647\n", + "Epoch: 58 | Loss: 0.5646\n", + "Epoch: 59 | Loss: 0.5646\n", + "Epoch: 60 | Loss: 0.5646\n", + "Epoch: 61 | Loss: 0.5646\n", + "Epoch: 62 | Loss: 0.5646\n", + "Epoch: 63 | Loss: 0.5646\n", + "Epoch: 64 | Loss: 0.5645\n", + "Epoch: 65 | Loss: 0.5645\n", + "Epoch: 66 | Loss: 0.5645\n", + "Epoch: 67 | Loss: 0.5645\n", + "Epoch: 68 | Loss: 0.5645\n", + "Epoch: 69 | Loss: 0.5645\n", + "Epoch: 70 | Loss: 0.5645\n", + "Epoch: 71 | Loss: 0.5645\n", + "Epoch: 72 | Loss: 0.5645\n", + "Epoch: 73 | Loss: 0.5645\n", + "Epoch: 74 | Loss: 0.5645\n", + "Epoch: 75 | Loss: 0.5645\n", + "Epoch: 76 | Loss: 0.5645\n", + "Epoch: 77 | Loss: 0.5645\n", + "Epoch: 78 | Loss: 0.5645\n", + "Epoch: 79 | Loss: 0.5645\n", + "Epoch: 80 | Loss: 0.5645\n", + "Epoch: 81 | Loss: 0.5645\n", + "Epoch: 82 | Loss: 0.5645\n", + "Epoch: 83 | Loss: 0.5645\n", + "Epoch: 84 | Loss: 0.5645\n", + "Epoch: 85 | Loss: 0.5645\n", + "Epoch: 86 | Loss: 0.5645\n", + "Epoch: 87 | Loss: 0.5645\n", + "Epoch: 88 | Loss: 0.5645\n", + "Epoch: 89 | Loss: 0.5645\n", + "Epoch: 90 | Loss: 0.5645\n", + "Epoch: 91 | Loss: 0.5645\n", + "Epoch: 92 | Loss: 0.5645\n", + "Epoch: 93 | Loss: 0.5645\n", + "Epoch: 94 | Loss: 0.5645\n", + "Epoch: 95 | Loss: 0.5645\n", + "Epoch: 96 | Loss: 0.5645\n", + "Epoch: 97 | Loss: 0.5645\n", + "Epoch: 98 | Loss: 0.5645\n", + "Epoch: 99 | Loss: 0.5645\n", + "Epoch: 100 | Loss: 0.5645\n", + "Epoch: 101 | Loss: 0.5645\n", + "Epoch: 102 | Loss: 0.5645\n", + "Epoch: 103 | Loss: 0.5645\n", + "Epoch: 104 | Loss: 0.5645\n", + "Epoch: 105 | Loss: 0.5645\n", + "Epoch: 106 | Loss: 0.5645\n", + "Epoch: 107 | Loss: 0.5645\n", + "Epoch: 108 | Loss: 0.5645\n", + "Epoch: 109 | Loss: 0.5645\n", + "Epoch: 110 | Loss: 0.5645\n", + "Epoch: 111 | Loss: 0.5645\n", + "Epoch: 112 | Loss: 0.5645\n", + "Epoch: 113 | Loss: 0.5645\n", + "Epoch: 114 | Loss: 0.5645\n", + "Epoch: 115 | Loss: 0.5645\n", + "Epoch: 116 | Loss: 0.5645\n", + "Epoch: 117 | Loss: 0.5645\n", + "Epoch: 118 | Loss: 0.5645\n", + "Epoch: 119 | Loss: 0.5645\n", + "Epoch: 120 | Loss: 0.5645\n", + "Epoch: 121 | Loss: 0.5645\n", + "Epoch: 122 | Loss: 0.5645\n", + "Epoch: 123 | Loss: 0.5645\n", + "Epoch: 124 | Loss: 0.5645\n", + "Epoch: 125 | Loss: 0.5645\n", + "Epoch: 126 | Loss: 0.5645\n", + "Epoch: 127 | Loss: 0.5645\n", + "Epoch: 128 | Loss: 0.5645\n", + "Epoch: 129 | Loss: 0.5645\n", + "Epoch: 130 | Loss: 0.5645\n", + "Epoch: 131 | Loss: 0.5645\n", + "Epoch: 132 | Loss: 0.5645\n", + "Epoch: 133 | Loss: 0.5645\n", + "Epoch: 134 | Loss: 0.5645\n", + "Epoch: 135 | Loss: 0.5645\n", + "Epoch: 136 | Loss: 0.5645\n", + "Epoch: 137 | Loss: 0.5645\n", + "Epoch: 138 | Loss: 0.5645\n", + "Epoch: 139 | Loss: 0.5645\n", + "Epoch: 140 | Loss: 0.5645\n", + "Epoch: 141 | Loss: 0.5645\n", + "Epoch: 142 | Loss: 0.5645\n", + "Epoch: 143 | Loss: 0.5645\n", + "Epoch: 144 | Loss: 0.5645\n", + "Epoch: 145 | Loss: 0.5645\n", + "Epoch: 146 | Loss: 0.5645\n", + "Epoch: 147 | Loss: 0.5645\n", + "Epoch: 148 | Loss: 0.5645\n", + "Epoch: 149 | Loss: 0.5645\n" + ] + } + ], + "source": [ + "## training\n", + "for ITER in range(150):\n", + " model = model.train()\n", + "\n", + " ## forward\n", + " output = model(x)\n", + " loss = criterion(output, y)\n", + " optimizer.zero_grad()\n", + "\n", + " ## backward + update model params \n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " model.eval()\n", + " print('Epoch: %d | Loss: %.4f' %(ITER, loss.detach().item()))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing the Model\n", + "After training the model we have the ability to test the model predictive capability by passing it an input. Below is a simple example of how you could achieve this with our model. The result we obtained aligns with the results obtained in this [notebook](https://github.com/lmoroney/dlaicourse/blob/master/Course%201%20-%20Part%202%20-%20Lesson%202%20-%20Notebook.ipynb), which inspired this entire tutorial. " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "17.096769332885742\n" + ] + } + ], + "source": [ + "## test the model\n", + "sample = torch.tensor([10.0], dtype=torch.float)\n", + "predicted = model(sample)\n", + "print(predicted.detach().item())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Final Words\n", + "\n", + "Congratulations! In this tutorial you learned how to train a simple neural network using PyTorch. You also learned about the basic components that make up a neural network model such as the linear transformation layer, optimizer, and loss function. We then trained the model and tested its predictive capabilities. You are well on your way to become more knowledgeable about deep learning and PyTorch. I have provided a bunch of references below if you are interested in practising and learning more. \n", + "\n", + "*I would like to thank Laurence Moroney for his excellent [tutorial](https://github.com/lmoroney/dlaicourse/blob/master/Course%201%20-%20Part%202%20-%20Lesson%202%20-%20Notebook.ipynb) which I used as an inspiration for this tutorial.*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Exercises\n", + "- Add more examples in the input and output tensors. In addition, try to change the dimensions of the data, say by adding an extra value in each array. What needs to be changed to successfully train the network with the new data?\n", + "- The model converged really fast, which means it learned the relationship between x and y values after a couple of iterations. Do you think it makes sense to continue training? How would you automate the process of stopping the training after the model loss doesn't subtantially change?\n", + "- In our example, we used a single hidden layer. Try to take a look at the PyTorch documentation to figure out what you need to do to get a model with more layers. What happens if you add more hidden layers?\n", + "- We did not discuss the learning rate (`lr-0.001`) and the optimizer in great detail. Check out the [PyTorch documentation](https://pytorch.org/docs/stable/optim.html) to learn more about what other optimizers you can use." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## References\n", + "- [The Hello World of Deep Learning with Neural Networks](https://github.com/lmoroney/dlaicourse/blob/master/Course%201%20-%20Part%202%20-%20Lesson%202%20-%20Notebook.ipynb)\n", + "- [A Simple Neural Network from Scratch with PyTorch and Google Colab](https://medium.com/dair-ai/a-simple-neural-network-from-scratch-with-pytorch-and-google-colab-c7f3830618e0?source=collection_category---4------1-----------------------)\n", + "- [PyTorch Official Docs](https://pytorch.org/docs/stable/nn.html)\n", + "- [PyTorch 1.2 Quickstart with Google Colab](https://medium.com/dair-ai/pytorch-1-2-quickstart-with-google-colab-6690a30c38d)\n", + "- [A Gentle Intoduction to PyTorch](https://medium.com/dair-ai/pytorch-1-2-introduction-guide-f6fa9bb7597c)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('nlp')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "7ade285f687a1ecab6f569c64721a8142b161535723b6a0ecd56d473b77660bf" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/spec-file.txt b/spec-file.txt new file mode 100644 index 0000000..5df10d3 --- /dev/null +++ b/spec-file.txt @@ -0,0 +1,101 @@ +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda +https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-mkl.conda +https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2022.07.19-h06a4308_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/intel-openmp-2021.4.0-h06a4308_3561.conda +https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran4-7.5.0-ha8ba4b0_17.conda +https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda +https://conda.anaconda.org/pytorch/noarch/pytorch-mutex-1.0-cuda.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.5.0-ha8ba4b0_17.conda +https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/mkl-2021.4.0-h06a4308_640.conda +https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda +https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h7b6447c_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/cudatoolkit-11.3.1-h2bc3f7f_2.conda +https://repo.anaconda.com/pkgs/main/linux-64/giflib-5.2.1-h7b6447c_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/gmp-6.2.1-h295c915_3.conda +https://repo.anaconda.com/pkgs/main/linux-64/jpeg-9e-h7f8727e_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/lame-3.100-h7b6447c_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.3-he6710b0_2.conda +https://repo.anaconda.com/pkgs/main/linux-64/libopus-1.3.1-h7b6447c_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libsodium-1.0.18-h7b6447c_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libtasn1-4.16.0-h27cfd23_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libunistring-0.9.10-h27cfd23_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libuv-1.40.0-h7b6447c_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libvpx-1.7.0-h439df22_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libwebp-base-1.2.2-h7f8727e_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/lz4-c-1.9.3-h295c915_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.3-h7f8727e_2.conda +https://repo.anaconda.com/pkgs/main/linux-64/openh264-2.1.1-h4ff587b_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1q-h7f8727e_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/x264-1!157.20191217-h7b6447c_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/xz-5.2.5-h7f8727e_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.12-h7f8727e_2.conda +https://repo.anaconda.com/pkgs/main/linux-64/libidn2-2.3.2-h7f8727e_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libpng-1.6.37-hbc83047_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/nettle-3.7.3-hbbd107a_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/readline-8.1.2-h7f8727e_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/zeromq-4.3.4-h2531618_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.5.2-ha4553b6_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/freetype-2.11.0-h70c0345_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/gnutls-3.6.15-he1e5248_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libtiff-4.2.0-h2818925_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.38.3-hc218d9a_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/ffmpeg-4.2.2-h20bf706_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/lcms2-2.12-h3be6417_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/libwebp-1.2.2-h55f646e_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/python-3.7.13-h12debd9_0.conda +https://repo.anaconda.com/pkgs/main/noarch/backcall-0.2.0-pyhd3eb1b0_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/certifi-2022.6.15-py37h06a4308_0.conda +https://repo.anaconda.com/pkgs/main/noarch/charset-normalizer-2.0.4-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/debugpy-1.5.1-py37h295c915_0.conda +https://repo.anaconda.com/pkgs/main/noarch/decorator-5.1.1-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/entrypoints-0.4-py37h06a4308_0.conda +https://repo.anaconda.com/pkgs/main/noarch/idna-3.3-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/nest-asyncio-1.5.5-py37h06a4308_0.conda +https://repo.anaconda.com/pkgs/main/noarch/parso-0.8.3-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/noarch/pickleshare-0.7.5-pyhd3eb1b0_1003.conda +https://repo.anaconda.com/pkgs/main/linux-64/pillow-9.0.1-py37h22f2fdc_0.conda +https://repo.anaconda.com/pkgs/main/noarch/ptyprocess-0.7.0-pyhd3eb1b0_2.conda +https://repo.anaconda.com/pkgs/main/noarch/pycparser-2.21-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/noarch/pygments-2.11.2-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/pysocks-1.7.1-py37_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/pyzmq-22.3.0-py37h295c915_2.conda +https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/tornado-6.1-py37h27cfd23_0.conda +https://repo.anaconda.com/pkgs/main/noarch/traitlets-5.1.1-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/noarch/typing_extensions-4.1.1-pyh06a4308_0.conda +https://repo.anaconda.com/pkgs/main/noarch/wcwidth-0.2.5-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/noarch/wheel-0.37.1-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/cffi-1.15.0-py37hd667e15_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/jedi-0.18.1-py37h06a4308_1.conda +https://repo.anaconda.com/pkgs/main/linux-64/jupyter_core-4.10.0-py37h06a4308_0.conda +https://repo.anaconda.com/pkgs/main/noarch/matplotlib-inline-0.1.2-pyhd3eb1b0_2.conda +https://repo.anaconda.com/pkgs/main/linux-64/mkl-service-2.4.0-py37h7f8727e_0.conda +https://repo.anaconda.com/pkgs/main/noarch/pexpect-4.8.0-pyhd3eb1b0_3.conda +https://repo.anaconda.com/pkgs/main/noarch/prompt-toolkit-3.0.20-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.2-pyhd3eb1b0_0.conda +https://conda.anaconda.org/pytorch/linux-64/pytorch-1.11.0-py3.7_cuda11.3_cudnn8.2.0_0.tar.bz2 +https://repo.anaconda.com/pkgs/main/linux-64/setuptools-61.2.0-py37h06a4308_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/brotlipy-0.7.0-py37h27cfd23_1003.conda +https://repo.anaconda.com/pkgs/main/linux-64/cryptography-37.0.1-py37h9ce1e76_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/ipython-7.31.1-py37h06a4308_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/jupyter_client-7.2.2-py37h06a4308_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.21.5-py37ha15fc14_3.conda +https://repo.anaconda.com/pkgs/main/linux-64/pip-21.2.2-py37h06a4308_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/ipykernel-6.9.1-py37h06a4308_0.conda +https://repo.anaconda.com/pkgs/main/noarch/pyopenssl-22.0.0-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/urllib3-1.26.9-py37h06a4308_0.conda +https://repo.anaconda.com/pkgs/main/noarch/requests-2.27.1-pyhd3eb1b0_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/mkl_fft-1.3.1-py37hd3c417c_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/mkl_random-1.2.2-py37h51133e4_0.conda +https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.21.5-py37h6c91a56_3.conda +https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.7.3-py37hc147768_0.conda +https://conda.anaconda.org/pytorch/linux-64/torchaudio-0.11.0-py37_cu113.tar.bz2 +https://conda.anaconda.org/pytorch/linux-64/torchvision-0.12.0-py37_cu113.tar.bz2