diff --git a/chapter_preliminaries/calculus.ipynb b/chapter_preliminaries/calculus.ipynb new file mode 100644 index 000000000000..a17191689d5d --- /dev/null +++ b/chapter_preliminaries/calculus.ipynb @@ -0,0 +1,1599 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "dd9d27db", + "metadata": { + "id": "dd9d27db" + }, + "source": [ + "The following additional libraries are needed to run this\n", + "notebook. Note that running on Colab is experimental, please report a Github\n", + "issue if you have any problem." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "fb87274e", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "fb87274e", + "outputId": "17456e69-706f-4fc5-8f4e-a13d42baef9c" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting d2l==1.0.3\n", + " Downloading d2l-1.0.3-py3-none-any.whl.metadata (556 bytes)\n", + "Collecting jupyter==1.0.0 (from d2l==1.0.3)\n", + " Downloading jupyter-1.0.0-py2.py3-none-any.whl.metadata (995 bytes)\n", + "Collecting numpy==1.23.5 (from d2l==1.0.3)\n", + " Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)\n", + "Collecting matplotlib==3.7.2 (from d2l==1.0.3)\n", + " Downloading matplotlib-3.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)\n", + "Collecting matplotlib-inline==0.1.6 (from d2l==1.0.3)\n", + " Downloading matplotlib_inline-0.1.6-py3-none-any.whl.metadata (2.8 kB)\n", + "Collecting requests==2.31.0 (from d2l==1.0.3)\n", + " Downloading requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)\n", + "Collecting pandas==2.0.3 (from d2l==1.0.3)\n", + " Downloading pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n", + "Collecting scipy==1.10.1 (from d2l==1.0.3)\n", + " Downloading scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.9/58.9 kB\u001b[0m \u001b[31m993.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: notebook in /usr/local/lib/python3.10/dist-packages (from jupyter==1.0.0->d2l==1.0.3) (6.5.5)\n", + "Collecting qtconsole (from jupyter==1.0.0->d2l==1.0.3)\n", + " Downloading qtconsole-5.6.1-py3-none-any.whl.metadata (5.0 kB)\n", + "Requirement already satisfied: jupyter-console in /usr/local/lib/python3.10/dist-packages (from jupyter==1.0.0->d2l==1.0.3) (6.1.0)\n", + "Requirement already satisfied: nbconvert in /usr/local/lib/python3.10/dist-packages (from jupyter==1.0.0->d2l==1.0.3) (6.5.4)\n", + "Requirement already satisfied: ipykernel in /usr/local/lib/python3.10/dist-packages (from jupyter==1.0.0->d2l==1.0.3) (5.5.6)\n", + "Requirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (from jupyter==1.0.0->d2l==1.0.3) (7.7.1)\n", + "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (1.3.0)\n", + "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (0.12.1)\n", + "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (4.54.1)\n", + "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (1.4.7)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (24.1)\n", + "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (10.4.0)\n", + "Collecting pyparsing<3.1,>=2.3.1 (from matplotlib==3.7.2->d2l==1.0.3)\n", + " Downloading pyparsing-3.0.9-py3-none-any.whl.metadata (4.2 kB)\n", + "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (2.8.2)\n", + "Requirement already satisfied: traitlets in /usr/local/lib/python3.10/dist-packages (from matplotlib-inline==0.1.6->d2l==1.0.3) (5.7.1)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas==2.0.3->d2l==1.0.3) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas==2.0.3->d2l==1.0.3) (2024.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests==2.31.0->d2l==1.0.3) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests==2.31.0->d2l==1.0.3) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests==2.31.0->d2l==1.0.3) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests==2.31.0->d2l==1.0.3) (2024.8.30)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib==3.7.2->d2l==1.0.3) (1.16.0)\n", + "Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.10/dist-packages (from ipykernel->jupyter==1.0.0->d2l==1.0.3) (0.2.0)\n", + "Requirement already satisfied: ipython>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from ipykernel->jupyter==1.0.0->d2l==1.0.3) (7.34.0)\n", + "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel->jupyter==1.0.0->d2l==1.0.3) (6.1.12)\n", + "Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel->jupyter==1.0.0->d2l==1.0.3) (6.3.3)\n", + "Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->jupyter==1.0.0->d2l==1.0.3) (3.6.10)\n", + "Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->jupyter==1.0.0->d2l==1.0.3) (3.0.13)\n", + "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-console->jupyter==1.0.0->d2l==1.0.3) (3.0.48)\n", + "Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from jupyter-console->jupyter==1.0.0->d2l==1.0.3) (2.18.0)\n", + "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (4.9.4)\n", + "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (4.12.3)\n", + "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (6.1.0)\n", + "Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.7.1)\n", + "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.4)\n", + "Requirement already satisfied: jinja2>=3.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (3.1.4)\n", + "Requirement already satisfied: jupyter-core>=4.7 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (5.7.2)\n", + "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.3.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (3.0.2)\n", + "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.8.4)\n", + "Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.10.0)\n", + "Requirement already satisfied: nbformat>=5.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (5.10.4)\n", + "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (1.5.1)\n", + "Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (1.4.0)\n", + "Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (24.0.1)\n", + "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (23.1.0)\n", + "Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (1.6.0)\n", + "Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (1.8.3)\n", + "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (0.18.1)\n", + "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (0.21.0)\n", + "Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (1.1.0)\n", + "Collecting qtpy>=2.4.0 (from qtconsole->jupyter==1.0.0->d2l==1.0.3)\n", + " Downloading QtPy-2.4.1-py3-none-any.whl.metadata (12 kB)\n", + "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (75.1.0)\n", + "Collecting jedi>=0.16 (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3)\n", + " Downloading jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)\n", + "Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (4.4.2)\n", + "Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (0.7.5)\n", + "Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (0.2.0)\n", + "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (4.9.0)\n", + "Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.10/dist-packages (from jupyter-core>=4.7->nbconvert->jupyter==1.0.0->d2l==1.0.3) (4.3.6)\n", + "Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (0.2.4)\n", + "Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (2.20.0)\n", + "Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (4.23.0)\n", + "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->jupyter-console->jupyter==1.0.0->d2l==1.0.3) (0.2.13)\n", + "Requirement already satisfied: ptyprocess in /usr/local/lib/python3.10/dist-packages (from terminado>=0.8.3->notebook->jupyter==1.0.0->d2l==1.0.3) (0.7.0)\n", + "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook->jupyter==1.0.0->d2l==1.0.3) (21.2.0)\n", + "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->nbconvert->jupyter==1.0.0->d2l==1.0.3) (2.6)\n", + "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.5.1)\n", + "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (0.8.4)\n", + "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (24.2.0)\n", + "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (2024.10.1)\n", + "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.35.1)\n", + "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.20.0)\n", + "Requirement already satisfied: jupyter-server<3,>=1.8 in /usr/local/lib/python3.10/dist-packages (from notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (1.24.0)\n", + "Requirement already satisfied: cffi>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from argon2-cffi-bindings->argon2-cffi->notebook->jupyter==1.0.0->d2l==1.0.3) (1.17.1)\n", + "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook->jupyter==1.0.0->d2l==1.0.3) (2.22)\n", + "Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (3.7.1)\n", + "Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (1.8.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (1.3.1)\n", + "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (1.2.2)\n", + "Downloading d2l-1.0.3-py3-none-any.whl (111 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.7/111.7 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)\n", + "Downloading matplotlib-3.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.6/11.6 MB\u001b[0m \u001b[31m22.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading matplotlib_inline-0.1.6-py3-none-any.whl (9.4 kB)\n", + "Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m69.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m90.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading requests-2.31.0-py3-none-any.whl (62 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.6/62.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.4/34.4 MB\u001b[0m \u001b[31m21.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pyparsing-3.0.9-py3-none-any.whl (98 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.3/98.3 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading qtconsole-5.6.1-py3-none-any.whl (125 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m125.0/125.0 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading QtPy-2.4.1-py3-none-any.whl (93 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.5/93.5 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m57.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: requests, qtpy, pyparsing, numpy, matplotlib-inline, jedi, scipy, pandas, matplotlib, qtconsole, jupyter, d2l\n", + " Attempting uninstall: requests\n", + " Found existing installation: requests 2.32.3\n", + " Uninstalling requests-2.32.3:\n", + " Successfully uninstalled requests-2.32.3\n", + " Attempting uninstall: pyparsing\n", + " Found existing installation: pyparsing 3.2.0\n", + " Uninstalling pyparsing-3.2.0:\n", + " Successfully uninstalled pyparsing-3.2.0\n", + " Attempting uninstall: numpy\n", + " Found existing installation: numpy 1.26.4\n", + " Uninstalling numpy-1.26.4:\n", + " Successfully uninstalled numpy-1.26.4\n", + " Attempting uninstall: matplotlib-inline\n", + " Found existing installation: matplotlib-inline 0.1.7\n", + " Uninstalling matplotlib-inline-0.1.7:\n", + " Successfully uninstalled matplotlib-inline-0.1.7\n", + " Attempting uninstall: scipy\n", + " Found existing installation: scipy 1.13.1\n", + " Uninstalling scipy-1.13.1:\n", + " Successfully uninstalled scipy-1.13.1\n", + " Attempting uninstall: pandas\n", + " Found existing installation: pandas 2.2.2\n", + " Uninstalling pandas-2.2.2:\n", + " Successfully uninstalled pandas-2.2.2\n", + " Attempting uninstall: matplotlib\n", + " Found existing installation: matplotlib 3.7.1\n", + " Uninstalling matplotlib-3.7.1:\n", + " Successfully uninstalled matplotlib-3.7.1\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "albucore 0.0.16 requires numpy>=1.24, but you have numpy 1.23.5 which is incompatible.\n", + "albumentations 1.4.15 requires numpy>=1.24.4, but you have numpy 1.23.5 which is incompatible.\n", + "bigframes 1.24.0 requires numpy>=1.24.0, but you have numpy 1.23.5 which is incompatible.\n", + "chex 0.1.87 requires numpy>=1.24.1, but you have numpy 1.23.5 which is incompatible.\n", + "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.0.3 which is incompatible.\n", + "google-colab 1.0.0 requires requests==2.32.3, but you have requests 2.31.0 which is incompatible.\n", + "jax 0.4.33 requires numpy>=1.24, but you have numpy 1.23.5 which is incompatible.\n", + "jaxlib 0.4.33 requires numpy>=1.24, but you have numpy 1.23.5 which is incompatible.\n", + "mizani 0.11.4 requires pandas>=2.1.0, but you have pandas 2.0.3 which is incompatible.\n", + "plotnine 0.13.6 requires pandas<3.0.0,>=2.1.0, but you have pandas 2.0.3 which is incompatible.\n", + "xarray 2024.9.0 requires numpy>=1.24, but you have numpy 1.23.5 which is incompatible.\n", + "xarray 2024.9.0 requires pandas>=2.1, but you have pandas 2.0.3 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed d2l-1.0.3 jedi-0.19.1 jupyter-1.0.0 matplotlib-3.7.2 matplotlib-inline-0.1.6 numpy-1.23.5 pandas-2.0.3 pyparsing-3.0.9 qtconsole-5.6.1 qtpy-2.4.1 requests-2.31.0 scipy-1.10.1\n" + ] + }, + { + "output_type": "display_data", + "data": { + "application/vnd.colab-display-data+json": { + "pip_warning": { + "packages": [ + "matplotlib", + "matplotlib_inline", + "mpl_toolkits", + "numpy" + ] + }, + "id": "b7f81e4490e24cdbac3306a3dc38438b" + } + }, + "metadata": {} + } + ], + "source": [ + "!pip install d2l==1.0.3\n" + ] + }, + { + "cell_type": "markdown", + "id": "fb1c337f", + "metadata": { + "origin_pos": 1, + "id": "fb1c337f" + }, + "source": [ + "# Calculus\n", + ":label:`sec_calculus`\n", + "\n", + "For a long time, how to calculate\n", + "the area of a circle remained a mystery.\n", + "Then, in Ancient Greece, the mathematician Archimedes\n", + "came up with the clever idea\n", + "to inscribe a series of polygons\n", + "with increasing numbers of vertices\n", + "on the inside of a circle\n", + "(:numref:`fig_circle_area`).\n", + "For a polygon with $n$ vertices,\n", + "we obtain $n$ triangles.\n", + "The height of each triangle approaches the radius $r$\n", + "as we partition the circle more finely.\n", + "At the same time, its base approaches $2 \\pi r/n$,\n", + "since the ratio between arc and secant approaches 1\n", + "for a large number of vertices.\n", + "Thus, the area of the polygon approaches\n", + "$n \\cdot r \\cdot \\frac{1}{2} (2 \\pi r/n) = \\pi r^2$.\n", + "\n", + "![Finding the area of a circle as a limit procedure.](https://github.com/d2l-ai/d2l-pytorch-colab/blob/master/img/polygon-circle.svg?raw=1)\n", + ":label:`fig_circle_area`\n", + "\n", + "This limiting procedure is at the root of both\n", + "*differential calculus* and *integral calculus*.\n", + "The former can tell us how to increase\n", + "or decrease a function's value by\n", + "manipulating its arguments.\n", + "This comes in handy for the *optimization problems*\n", + "that we face in deep learning,\n", + "where we repeatedly update our parameters\n", + "in order to decrease the loss function.\n", + "Optimization addresses how to fit our models to training data,\n", + "and calculus is its key prerequisite.\n", + "However, do not forget that our ultimate goal\n", + "is to perform well on *previously unseen* data.\n", + "That problem is called *generalization*\n", + "and will be a key focus of other chapters.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5162883d", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T19:31:00.158561Z", + "iopub.status.busy": "2023-08-18T19:31:00.158199Z", + "iopub.status.idle": "2023-08-18T19:31:03.258372Z", + "shell.execute_reply": "2023-08-18T19:31:03.256925Z" + }, + "origin_pos": 3, + "tab": [ + "pytorch" + ], + "id": "5162883d" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "import numpy as np\n", + "from matplotlib_inline import backend_inline\n", + "from d2l import torch as d2l" + ] + }, + { + "cell_type": "markdown", + "id": "8ad81b23", + "metadata": { + "origin_pos": 6, + "id": "8ad81b23" + }, + "source": [ + "## Derivatives and Differentiation\n", + "\n", + "Put simply, a *derivative* is the rate of change\n", + "in a function with respect to changes in its arguments.\n", + "Derivatives can tell us how rapidly a loss function\n", + "would increase or decrease were we\n", + "to *increase* or *decrease* each parameter\n", + "by an infinitesimally small amount.\n", + "Formally, for functions $f: \\mathbb{R} \\rightarrow \\mathbb{R}$,\n", + "that map from scalars to scalars,\n", + "[**the *derivative* of $f$ at a point $x$ is defined as**]\n", + "\n", + "(**$$f'(x) = \\lim_{h \\rightarrow 0} \\frac{f(x+h) - f(x)}{h}.$$**)\n", + ":eqlabel:`eq_derivative`\n", + "\n", + "This term on the right hand side is called a *limit*\n", + "and it tells us what happens\n", + "to the value of an expression\n", + "as a specified variable\n", + "approaches a particular value.\n", + "This limit tells us what\n", + "the ratio between a perturbation $h$\n", + "and the change in the function value\n", + "$f(x + h) - f(x)$ converges to\n", + "as we shrink its size to zero.\n", + "\n", + "When $f'(x)$ exists, $f$ is said\n", + "to be *differentiable* at $x$;\n", + "and when $f'(x)$ exists for all $x$\n", + "on a set, e.g., the interval $[a,b]$,\n", + "we say that $f$ is differentiable on this set.\n", + "Not all functions are differentiable,\n", + "including many that we wish to optimize,\n", + "such as accuracy and the area under the\n", + "receiving operating characteristic (AUC).\n", + "However, because computing the derivative of the loss\n", + "is a crucial step in nearly all\n", + "algorithms for training deep neural networks,\n", + "we often optimize a differentiable *surrogate* instead.\n", + "\n", + "\n", + "We can interpret the derivative\n", + "$f'(x)$\n", + "as the *instantaneous* rate of change\n", + "of $f(x)$ with respect to $x$.\n", + "Let's develop some intuition with an example.\n", + "(**Define $u = f(x) = 3x^2-4x$.**)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e351acdb", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T19:31:03.263539Z", + "iopub.status.busy": "2023-08-18T19:31:03.262795Z", + "iopub.status.idle": "2023-08-18T19:31:03.267363Z", + "shell.execute_reply": "2023-08-18T19:31:03.266349Z" + }, + "origin_pos": 8, + "tab": [ + "pytorch" + ], + "id": "e351acdb" + }, + "outputs": [], + "source": [ + "def f(x):\n", + " return 3 * x ** 2 - 4 * x" + ] + }, + { + "cell_type": "markdown", + "id": "50148144", + "metadata": { + "origin_pos": 11, + "id": "50148144" + }, + "source": [ + "[**Setting $x=1$, we see that $\\frac{f(x+h) - f(x)}{h}$**] (**approaches $2$\n", + "as $h$ approaches $0$.**)\n", + "While this experiment lacks\n", + "the rigor of a mathematical proof,\n", + "we can quickly see that indeed $f'(1) = 2$.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b5e7cf2", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T19:31:03.271432Z", + "iopub.status.busy": "2023-08-18T19:31:03.270665Z", + "iopub.status.idle": "2023-08-18T19:31:03.276568Z", + "shell.execute_reply": "2023-08-18T19:31:03.275548Z" + }, + "origin_pos": 12, + "tab": [ + "pytorch" + ], + "id": "7b5e7cf2", + "outputId": "50b7f187-3430-4a25-bd46-22283b596474" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "h=0.10000, numerical limit=2.30000\n", + "h=0.01000, numerical limit=2.03000\n", + "h=0.00100, numerical limit=2.00300\n", + "h=0.00010, numerical limit=2.00030\n", + "h=0.00001, numerical limit=2.00003\n" + ] + } + ], + "source": [ + "for h in 10.0**np.arange(-1, -6, -1):\n", + " print(f'h={h:.5f}, numerical limit={(f(1+h)-f(1))/h:.5f}')" + ] + }, + { + "cell_type": "markdown", + "id": "0be7fece", + "metadata": { + "origin_pos": 13, + "id": "0be7fece" + }, + "source": [ + "There are several equivalent notational conventions for derivatives.\n", + "Given $y = f(x)$, the following expressions are equivalent:\n", + "\n", + "$$f'(x) = y' = \\frac{dy}{dx} = \\frac{df}{dx} = \\frac{d}{dx} f(x) = Df(x) = D_x f(x),$$\n", + "\n", + "where the symbols $\\frac{d}{dx}$ and $D$ are *differentiation operators*.\n", + "Below, we present the derivatives of some common functions:\n", + "\n", + "$$\\begin{aligned} \\frac{d}{dx} C & = 0 && \\textrm{for any constant $C$} \\\\ \\frac{d}{dx} x^n & = n x^{n-1} && \\textrm{for } n \\neq 0 \\\\ \\frac{d}{dx} e^x & = e^x \\\\ \\frac{d}{dx} \\ln x & = x^{-1}. \\end{aligned}$$\n", + "\n", + "Functions composed from differentiable functions\n", + "are often themselves differentiable.\n", + "The following rules come in handy\n", + "for working with compositions\n", + "of any differentiable functions\n", + "$f$ and $g$, and constant $C$.\n", + "\n", + "$$\\begin{aligned} \\frac{d}{dx} [C f(x)] & = C \\frac{d}{dx} f(x) && \\textrm{Constant multiple rule} \\\\ \\frac{d}{dx} [f(x) + g(x)] & = \\frac{d}{dx} f(x) + \\frac{d}{dx} g(x) && \\textrm{Sum rule} \\\\ \\frac{d}{dx} [f(x) g(x)] & = f(x) \\frac{d}{dx} g(x) + g(x) \\frac{d}{dx} f(x) && \\textrm{Product rule} \\\\ \\frac{d}{dx} \\frac{f(x)}{g(x)} & = \\frac{g(x) \\frac{d}{dx} f(x) - f(x) \\frac{d}{dx} g(x)}{g^2(x)} && \\textrm{Quotient rule} \\end{aligned}$$\n", + "\n", + "Using this, we can apply the rules\n", + "to find the derivative of $3 x^2 - 4x$ via\n", + "\n", + "$$\\frac{d}{dx} [3 x^2 - 4x] = 3 \\frac{d}{dx} x^2 - 4 \\frac{d}{dx} x = 6x - 4.$$\n", + "\n", + "Plugging in $x = 1$ shows that, indeed,\n", + "the derivative equals $2$ at this location.\n", + "Note that derivatives tell us\n", + "the *slope* of a function\n", + "at a particular location. \n", + "\n", + "## Visualization Utilities\n", + "\n", + "[**We can visualize the slopes of functions using the `matplotlib` library**].\n", + "We need to define a few functions.\n", + "As its name indicates, `use_svg_display`\n", + "tells `matplotlib` to output graphics\n", + "in SVG format for crisper images.\n", + "The comment `#@save` is a special modifier\n", + "that allows us to save any function,\n", + "class, or other code block to the `d2l` package\n", + "so that we can invoke it later\n", + "without repeating the code,\n", + "e.g., via `d2l.use_svg_display()`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70d92d2a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T19:31:03.280403Z", + "iopub.status.busy": "2023-08-18T19:31:03.280065Z", + "iopub.status.idle": "2023-08-18T19:31:03.284973Z", + "shell.execute_reply": "2023-08-18T19:31:03.283950Z" + }, + "origin_pos": 14, + "tab": [ + "pytorch" + ], + "id": "70d92d2a" + }, + "outputs": [], + "source": [ + "def use_svg_display(): #@save\n", + " \"\"\"Use the svg format to display a plot in Jupyter.\"\"\"\n", + " backend_inline.set_matplotlib_formats('svg')" + ] + }, + { + "cell_type": "markdown", + "id": "ef7c490f", + "metadata": { + "origin_pos": 15, + "id": "ef7c490f" + }, + "source": [ + "Conveniently, we can set figure sizes with `set_figsize`.\n", + "Since the import statement `from matplotlib import pyplot as plt`\n", + "was marked via `#@save` in the `d2l` package, we can call `d2l.plt`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c868cdf6", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T19:31:03.289140Z", + "iopub.status.busy": "2023-08-18T19:31:03.288531Z", + "iopub.status.idle": "2023-08-18T19:31:03.293764Z", + "shell.execute_reply": "2023-08-18T19:31:03.292757Z" + }, + "origin_pos": 16, + "tab": [ + "pytorch" + ], + "id": "c868cdf6" + }, + "outputs": [], + "source": [ + "def set_figsize(figsize=(3.5, 2.5)): #@save\n", + " \"\"\"Set the figure size for matplotlib.\"\"\"\n", + " use_svg_display()\n", + " d2l.plt.rcParams['figure.figsize'] = figsize" + ] + }, + { + "cell_type": "markdown", + "id": "f2bd084c", + "metadata": { + "origin_pos": 17, + "id": "f2bd084c" + }, + "source": [ + "The `set_axes` function can associate axes\n", + "with properties, including labels, ranges,\n", + "and scales.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8860f929", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T19:31:03.297796Z", + "iopub.status.busy": "2023-08-18T19:31:03.297092Z", + "iopub.status.idle": "2023-08-18T19:31:03.303068Z", + "shell.execute_reply": "2023-08-18T19:31:03.302068Z" + }, + "origin_pos": 18, + "tab": [ + "pytorch" + ], + "id": "8860f929" + }, + "outputs": [], + "source": [ + "#@save\n", + "def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):\n", + " \"\"\"Set the axes for matplotlib.\"\"\"\n", + " axes.set_xlabel(xlabel), axes.set_ylabel(ylabel)\n", + " axes.set_xscale(xscale), axes.set_yscale(yscale)\n", + " axes.set_xlim(xlim), axes.set_ylim(ylim)\n", + " if legend:\n", + " axes.legend(legend)\n", + " axes.grid()" + ] + }, + { + "cell_type": "markdown", + "id": "d8825398", + "metadata": { + "origin_pos": 19, + "id": "d8825398" + }, + "source": [ + "With these three functions, we can define a `plot` function\n", + "to overlay multiple curves.\n", + "Much of the code here is just ensuring\n", + "that the sizes and shapes of inputs match.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d56dd86", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T19:31:03.307130Z", + "iopub.status.busy": "2023-08-18T19:31:03.306443Z", + "iopub.status.idle": "2023-08-18T19:31:03.316351Z", + "shell.execute_reply": "2023-08-18T19:31:03.315391Z" + }, + "origin_pos": 20, + "tab": [ + "pytorch" + ], + "id": "0d56dd86" + }, + "outputs": [], + "source": [ + "#@save\n", + "def plot(X, Y=None, xlabel=None, ylabel=None, legend=[], xlim=None,\n", + " ylim=None, xscale='linear', yscale='linear',\n", + " fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):\n", + " \"\"\"Plot data points.\"\"\"\n", + "\n", + " def has_one_axis(X): # True if X (tensor or list) has 1 axis\n", + " return (hasattr(X, \"ndim\") and X.ndim == 1 or isinstance(X, list)\n", + " and not hasattr(X[0], \"__len__\"))\n", + "\n", + " if has_one_axis(X): X = [X]\n", + " if Y is None:\n", + " X, Y = [[]] * len(X), X\n", + " elif has_one_axis(Y):\n", + " Y = [Y]\n", + " if len(X) != len(Y):\n", + " X = X * len(Y)\n", + "\n", + " set_figsize(figsize)\n", + " if axes is None:\n", + " axes = d2l.plt.gca()\n", + " axes.cla()\n", + " for x, y, fmt in zip(X, Y, fmts):\n", + " axes.plot(x,y,fmt) if len(x) else axes.plot(y,fmt)\n", + " set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)" + ] + }, + { + "cell_type": "markdown", + "id": "68b9ef55", + "metadata": { + "origin_pos": 21, + "id": "68b9ef55" + }, + "source": [ + "Now we can [**plot the function $u = f(x)$ and its tangent line $y = 2x - 3$ at $x=1$**],\n", + "where the coefficient $2$ is the slope of the tangent line.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a22ce3a", + "metadata": { + "execution": { + "iopub.execute_input": "2023-08-18T19:31:03.320345Z", + "iopub.status.busy": "2023-08-18T19:31:03.319698Z", + "iopub.status.idle": "2023-08-18T19:31:03.596083Z", + "shell.execute_reply": "2023-08-18T19:31:03.594940Z" + }, + "origin_pos": 22, + "tab": [ + "pytorch" + ], + "id": "1a22ce3a", + "outputId": "556ed2a5-c2c8-4b3b-ef6c-72422c0483c2" + }, + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " 2023-08-18T19:31:03.544502\n", + " image/svg+xml\n", + " \n", + " \n", + " Matplotlib v3.7.2, https://matplotlib.org/\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "\n" + ], + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "x = np.arange(0, 3, 0.1)\n", + "plot(x, [f(x), 2 * x - 3], 'x', 'f(x)', legend=['f(x)', 'Tangent line (x=1)'])" + ] + }, + { + "cell_type": "markdown", + "id": "ebd783d4", + "metadata": { + "origin_pos": 23, + "id": "ebd783d4" + }, + "source": [ + "## Partial Derivatives and Gradients\n", + ":label:`subsec_calculus-grad`\n", + "\n", + "Thus far, we have been differentiating\n", + "functions of just one variable.\n", + "In deep learning, we also need to work\n", + "with functions of *many* variables.\n", + "We briefly introduce notions of the derivative\n", + "that apply to such *multivariate* functions.\n", + "\n", + "\n", + "Let $y = f(x_1, x_2, \\ldots, x_n)$ be a function with $n$ variables.\n", + "The *partial derivative* of $y$\n", + "with respect to its $i^\\textrm{th}$ parameter $x_i$ is\n", + "\n", + "$$ \\frac{\\partial y}{\\partial x_i} = \\lim_{h \\rightarrow 0} \\frac{f(x_1, \\ldots, x_{i-1}, x_i+h, x_{i+1}, \\ldots, x_n) - f(x_1, \\ldots, x_i, \\ldots, x_n)}{h}.$$\n", + "\n", + "\n", + "To calculate $\\frac{\\partial y}{\\partial x_i}$,\n", + "we can treat $x_1, \\ldots, x_{i-1}, x_{i+1}, \\ldots, x_n$ as constants\n", + "and calculate the derivative of $y$ with respect to $x_i$.\n", + "The following notational conventions for partial derivatives\n", + "are all common and all mean the same thing:\n", + "\n", + "$$\\frac{\\partial y}{\\partial x_i} = \\frac{\\partial f}{\\partial x_i} = \\partial_{x_i} f = \\partial_i f = f_{x_i} = f_i = D_i f = D_{x_i} f.$$\n", + "\n", + "We can concatenate partial derivatives\n", + "of a multivariate function\n", + "with respect to all its variables\n", + "to obtain a vector that is called\n", + "the *gradient* of the function.\n", + "Suppose that the input of function\n", + "$f: \\mathbb{R}^n \\rightarrow \\mathbb{R}$\n", + "is an $n$-dimensional vector\n", + "$\\mathbf{x} = [x_1, x_2, \\ldots, x_n]^\\top$\n", + "and the output is a scalar.\n", + "The gradient of the function $f$\n", + "with respect to $\\mathbf{x}$\n", + "is a vector of $n$ partial derivatives:\n", + "\n", + "$$\\nabla_{\\mathbf{x}} f(\\mathbf{x}) = \\left[\\partial_{x_1} f(\\mathbf{x}), \\partial_{x_2} f(\\mathbf{x}), \\ldots\n", + "\\partial_{x_n} f(\\mathbf{x})\\right]^\\top.$$\n", + "\n", + "When there is no ambiguity,\n", + "$\\nabla_{\\mathbf{x}} f(\\mathbf{x})$\n", + "is typically replaced\n", + "by $\\nabla f(\\mathbf{x})$.\n", + "The following rules come in handy\n", + "for differentiating multivariate functions:\n", + "\n", + "* For all $\\mathbf{A} \\in \\mathbb{R}^{m \\times n}$ we have $\\nabla_{\\mathbf{x}} \\mathbf{A} \\mathbf{x} = \\mathbf{A}^\\top$ and $\\nabla_{\\mathbf{x}} \\mathbf{x}^\\top \\mathbf{A} = \\mathbf{A}$.\n", + "* For square matrices $\\mathbf{A} \\in \\mathbb{R}^{n \\times n}$ we have that $\\nabla_{\\mathbf{x}} \\mathbf{x}^\\top \\mathbf{A} \\mathbf{x} = (\\mathbf{A} + \\mathbf{A}^\\top)\\mathbf{x}$ and in particular\n", + "$\\nabla_{\\mathbf{x}} \\|\\mathbf{x} \\|^2 = \\nabla_{\\mathbf{x}} \\mathbf{x}^\\top \\mathbf{x} = 2\\mathbf{x}$.\n", + "\n", + "Similarly, for any matrix $\\mathbf{X}$,\n", + "we have $\\nabla_{\\mathbf{X}} \\|\\mathbf{X} \\|_\\textrm{F}^2 = 2\\mathbf{X}$.\n", + "\n", + "\n", + "\n", + "## Chain Rule\n", + "\n", + "In deep learning, the gradients of concern\n", + "are often difficult to calculate\n", + "because we are working with\n", + "deeply nested functions\n", + "(of functions (of functions...)).\n", + "Fortunately, the *chain rule* takes care of this.\n", + "Returning to functions of a single variable,\n", + "suppose that $y = f(g(x))$\n", + "and that the underlying functions\n", + "$y=f(u)$ and $u=g(x)$\n", + "are both differentiable.\n", + "The chain rule states that\n", + "\n", + "\n", + "$$\\frac{dy}{dx} = \\frac{dy}{du} \\frac{du}{dx}.$$\n", + "\n", + "\n", + "\n", + "Turning back to multivariate functions,\n", + "suppose that $y = f(\\mathbf{u})$ has variables\n", + "$u_1, u_2, \\ldots, u_m$,\n", + "where each $u_i = g_i(\\mathbf{x})$\n", + "has variables $x_1, x_2, \\ldots, x_n$,\n", + "i.e., $\\mathbf{u} = g(\\mathbf{x})$.\n", + "Then the chain rule states that\n", + "\n", + "$$\\frac{\\partial y}{\\partial x_{i}} = \\frac{\\partial y}{\\partial u_{1}} \\frac{\\partial u_{1}}{\\partial x_{i}} + \\frac{\\partial y}{\\partial u_{2}} \\frac{\\partial u_{2}}{\\partial x_{i}} + \\ldots + \\frac{\\partial y}{\\partial u_{m}} \\frac{\\partial u_{m}}{\\partial x_{i}} \\ \\textrm{ and so } \\ \\nabla_{\\mathbf{x}} y = \\mathbf{A} \\nabla_{\\mathbf{u}} y,$$\n", + "\n", + "where $\\mathbf{A} \\in \\mathbb{R}^{n \\times m}$ is a *matrix*\n", + "that contains the derivative of vector $\\mathbf{u}$\n", + "with respect to vector $\\mathbf{x}$.\n", + "Thus, evaluating the gradient requires\n", + "computing a vector--matrix product.\n", + "This is one of the key reasons why linear algebra\n", + "is such an integral building block\n", + "in building deep learning systems.\n", + "\n", + "\n", + "\n", + "## Discussion\n", + "\n", + "While we have just scratched the surface of a deep topic,\n", + "a number of concepts already come into focus:\n", + "first, the composition rules for differentiation\n", + "can be applied routinely, enabling\n", + "us to compute gradients *automatically*.\n", + "This task requires no creativity and thus\n", + "we can focus our cognitive powers elsewhere.\n", + "Second, computing the derivatives of vector-valued functions\n", + "requires us to multiply matrices as we trace\n", + "the dependency graph of variables from output to input.\n", + "In particular, this graph is traversed in a *forward* direction\n", + "when we evaluate a function\n", + "and in a *backwards* direction\n", + "when we compute gradients.\n", + "Later chapters will formally introduce backpropagation,\n", + "a computational procedure for applying the chain rule.\n", + "\n", + "From the viewpoint of optimization, gradients allow us\n", + "to determine how to move the parameters of a model\n", + "in order to lower the loss,\n", + "and each step of the optimization algorithms used\n", + "throughout this book will require calculating the gradient.\n", + "\n", + "## Exercises\n", + "\n", + "1. So far we took the rules for derivatives for granted.\n", + " Using the definition and limits prove the properties\n", + " for (i) $f(x) = c$, (ii) $f(x) = x^n$, (iii) $f(x) = e^x$ and (iv) $f(x) = \\log x$.\n", + "1. In the same vein, prove the product, sum, and quotient rule from first principles.\n", + "1. Prove that the constant multiple rule follows as a special case of the product rule.\n", + "1. Calculate the derivative of $f(x) = x^x$.\n", + "1. What does it mean that $f'(x) = 0$ for some $x$?\n", + " Give an example of a function $f$\n", + " and a location $x$ for which this might hold.\n", + "1. Plot the function $y = f(x) = x^3 - \\frac{1}{x}$\n", + " and plot its tangent line at $x = 1$.\n", + "1. Find the gradient of the function\n", + " $f(\\mathbf{x}) = 3x_1^2 + 5e^{x_2}$.\n", + "1. What is the gradient of the function\n", + " $f(\\mathbf{x}) = \\|\\mathbf{x}\\|_2$? What happens for $\\mathbf{x} = \\mathbf{0}$?\n", + "1. Can you write out the chain rule for the case\n", + " where $u = f(x, y, z)$ and $x = x(a, b)$, $y = y(a, b)$, and $z = z(a, b)$?\n", + "1. Given a function $f(x)$ that is invertible,\n", + " compute the derivative of its inverse $f^{-1}(x)$.\n", + " Here we have that $f^{-1}(f(x)) = x$ and conversely $f(f^{-1}(y)) = y$.\n", + " Hint: use these properties in your derivation.\n" + ] + }, + { + "cell_type": "markdown", + "id": "e6a8d8e9", + "metadata": { + "origin_pos": 25, + "tab": [ + "pytorch" + ], + "id": "e6a8d8e9" + }, + "source": [ + "[Discussions](https://discuss.d2l.ai/t/33)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + }, + "required_libs": [], + "colab": { + "provenance": [] + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file