diff --git a/chapter_preliminaries/calculus.ipynb b/chapter_preliminaries/calculus.ipynb
new file mode 100644
index 000000000000..a17191689d5d
--- /dev/null
+++ b/chapter_preliminaries/calculus.ipynb
@@ -0,0 +1,1599 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "dd9d27db",
+ "metadata": {
+ "id": "dd9d27db"
+ },
+ "source": [
+ "The following additional libraries are needed to run this\n",
+ "notebook. Note that running on Colab is experimental, please report a Github\n",
+ "issue if you have any problem."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "fb87274e",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 1000
+ },
+ "id": "fb87274e",
+ "outputId": "17456e69-706f-4fc5-8f4e-a13d42baef9c"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Collecting d2l==1.0.3\n",
+ " Downloading d2l-1.0.3-py3-none-any.whl.metadata (556 bytes)\n",
+ "Collecting jupyter==1.0.0 (from d2l==1.0.3)\n",
+ " Downloading jupyter-1.0.0-py2.py3-none-any.whl.metadata (995 bytes)\n",
+ "Collecting numpy==1.23.5 (from d2l==1.0.3)\n",
+ " Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)\n",
+ "Collecting matplotlib==3.7.2 (from d2l==1.0.3)\n",
+ " Downloading matplotlib-3.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)\n",
+ "Collecting matplotlib-inline==0.1.6 (from d2l==1.0.3)\n",
+ " Downloading matplotlib_inline-0.1.6-py3-none-any.whl.metadata (2.8 kB)\n",
+ "Collecting requests==2.31.0 (from d2l==1.0.3)\n",
+ " Downloading requests-2.31.0-py3-none-any.whl.metadata (4.6 kB)\n",
+ "Collecting pandas==2.0.3 (from d2l==1.0.3)\n",
+ " Downloading pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (18 kB)\n",
+ "Collecting scipy==1.10.1 (from d2l==1.0.3)\n",
+ " Downloading scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (58 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.9/58.9 kB\u001b[0m \u001b[31m993.0 kB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hRequirement already satisfied: notebook in /usr/local/lib/python3.10/dist-packages (from jupyter==1.0.0->d2l==1.0.3) (6.5.5)\n",
+ "Collecting qtconsole (from jupyter==1.0.0->d2l==1.0.3)\n",
+ " Downloading qtconsole-5.6.1-py3-none-any.whl.metadata (5.0 kB)\n",
+ "Requirement already satisfied: jupyter-console in /usr/local/lib/python3.10/dist-packages (from jupyter==1.0.0->d2l==1.0.3) (6.1.0)\n",
+ "Requirement already satisfied: nbconvert in /usr/local/lib/python3.10/dist-packages (from jupyter==1.0.0->d2l==1.0.3) (6.5.4)\n",
+ "Requirement already satisfied: ipykernel in /usr/local/lib/python3.10/dist-packages (from jupyter==1.0.0->d2l==1.0.3) (5.5.6)\n",
+ "Requirement already satisfied: ipywidgets in /usr/local/lib/python3.10/dist-packages (from jupyter==1.0.0->d2l==1.0.3) (7.7.1)\n",
+ "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (1.3.0)\n",
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (0.12.1)\n",
+ "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (4.54.1)\n",
+ "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (1.4.7)\n",
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (24.1)\n",
+ "Requirement already satisfied: pillow>=6.2.0 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (10.4.0)\n",
+ "Collecting pyparsing<3.1,>=2.3.1 (from matplotlib==3.7.2->d2l==1.0.3)\n",
+ " Downloading pyparsing-3.0.9-py3-none-any.whl.metadata (4.2 kB)\n",
+ "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.10/dist-packages (from matplotlib==3.7.2->d2l==1.0.3) (2.8.2)\n",
+ "Requirement already satisfied: traitlets in /usr/local/lib/python3.10/dist-packages (from matplotlib-inline==0.1.6->d2l==1.0.3) (5.7.1)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas==2.0.3->d2l==1.0.3) (2024.2)\n",
+ "Requirement already satisfied: tzdata>=2022.1 in /usr/local/lib/python3.10/dist-packages (from pandas==2.0.3->d2l==1.0.3) (2024.2)\n",
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests==2.31.0->d2l==1.0.3) (3.4.0)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests==2.31.0->d2l==1.0.3) (3.10)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests==2.31.0->d2l==1.0.3) (2.2.3)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests==2.31.0->d2l==1.0.3) (2024.8.30)\n",
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.7->matplotlib==3.7.2->d2l==1.0.3) (1.16.0)\n",
+ "Requirement already satisfied: ipython-genutils in /usr/local/lib/python3.10/dist-packages (from ipykernel->jupyter==1.0.0->d2l==1.0.3) (0.2.0)\n",
+ "Requirement already satisfied: ipython>=5.0.0 in /usr/local/lib/python3.10/dist-packages (from ipykernel->jupyter==1.0.0->d2l==1.0.3) (7.34.0)\n",
+ "Requirement already satisfied: jupyter-client in /usr/local/lib/python3.10/dist-packages (from ipykernel->jupyter==1.0.0->d2l==1.0.3) (6.1.12)\n",
+ "Requirement already satisfied: tornado>=4.2 in /usr/local/lib/python3.10/dist-packages (from ipykernel->jupyter==1.0.0->d2l==1.0.3) (6.3.3)\n",
+ "Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->jupyter==1.0.0->d2l==1.0.3) (3.6.10)\n",
+ "Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from ipywidgets->jupyter==1.0.0->d2l==1.0.3) (3.0.13)\n",
+ "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-console->jupyter==1.0.0->d2l==1.0.3) (3.0.48)\n",
+ "Requirement already satisfied: pygments in /usr/local/lib/python3.10/dist-packages (from jupyter-console->jupyter==1.0.0->d2l==1.0.3) (2.18.0)\n",
+ "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (4.9.4)\n",
+ "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (4.12.3)\n",
+ "Requirement already satisfied: bleach in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (6.1.0)\n",
+ "Requirement already satisfied: defusedxml in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.7.1)\n",
+ "Requirement already satisfied: entrypoints>=0.2.2 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.4)\n",
+ "Requirement already satisfied: jinja2>=3.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (3.1.4)\n",
+ "Requirement already satisfied: jupyter-core>=4.7 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (5.7.2)\n",
+ "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.3.0)\n",
+ "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (3.0.2)\n",
+ "Requirement already satisfied: mistune<2,>=0.8.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.8.4)\n",
+ "Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.10.0)\n",
+ "Requirement already satisfied: nbformat>=5.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (5.10.4)\n",
+ "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (1.5.1)\n",
+ "Requirement already satisfied: tinycss2 in /usr/local/lib/python3.10/dist-packages (from nbconvert->jupyter==1.0.0->d2l==1.0.3) (1.4.0)\n",
+ "Requirement already satisfied: pyzmq<25,>=17 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (24.0.1)\n",
+ "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (23.1.0)\n",
+ "Requirement already satisfied: nest-asyncio>=1.5 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (1.6.0)\n",
+ "Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (1.8.3)\n",
+ "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (0.18.1)\n",
+ "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (0.21.0)\n",
+ "Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.10/dist-packages (from notebook->jupyter==1.0.0->d2l==1.0.3) (1.1.0)\n",
+ "Collecting qtpy>=2.4.0 (from qtconsole->jupyter==1.0.0->d2l==1.0.3)\n",
+ " Downloading QtPy-2.4.1-py3-none-any.whl.metadata (12 kB)\n",
+ "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (75.1.0)\n",
+ "Collecting jedi>=0.16 (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3)\n",
+ " Downloading jedi-0.19.1-py2.py3-none-any.whl.metadata (22 kB)\n",
+ "Requirement already satisfied: decorator in /usr/local/lib/python3.10/dist-packages (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (4.4.2)\n",
+ "Requirement already satisfied: pickleshare in /usr/local/lib/python3.10/dist-packages (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (0.7.5)\n",
+ "Requirement already satisfied: backcall in /usr/local/lib/python3.10/dist-packages (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (0.2.0)\n",
+ "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.10/dist-packages (from ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (4.9.0)\n",
+ "Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.10/dist-packages (from jupyter-core>=4.7->nbconvert->jupyter==1.0.0->d2l==1.0.3) (4.3.6)\n",
+ "Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (0.2.4)\n",
+ "Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.10/dist-packages (from nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (2.20.0)\n",
+ "Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.10/dist-packages (from nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (4.23.0)\n",
+ "Requirement already satisfied: wcwidth in /usr/local/lib/python3.10/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->jupyter-console->jupyter==1.0.0->d2l==1.0.3) (0.2.13)\n",
+ "Requirement already satisfied: ptyprocess in /usr/local/lib/python3.10/dist-packages (from terminado>=0.8.3->notebook->jupyter==1.0.0->d2l==1.0.3) (0.7.0)\n",
+ "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.10/dist-packages (from argon2-cffi->notebook->jupyter==1.0.0->d2l==1.0.3) (21.2.0)\n",
+ "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4->nbconvert->jupyter==1.0.0->d2l==1.0.3) (2.6)\n",
+ "Requirement already satisfied: webencodings in /usr/local/lib/python3.10/dist-packages (from bleach->nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.5.1)\n",
+ "Requirement already satisfied: parso<0.9.0,>=0.8.3 in /usr/local/lib/python3.10/dist-packages (from jedi>=0.16->ipython>=5.0.0->ipykernel->jupyter==1.0.0->d2l==1.0.3) (0.8.4)\n",
+ "Requirement already satisfied: attrs>=22.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (24.2.0)\n",
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (2024.10.1)\n",
+ "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.35.1)\n",
+ "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.10/dist-packages (from jsonschema>=2.6->nbformat>=5.1->nbconvert->jupyter==1.0.0->d2l==1.0.3) (0.20.0)\n",
+ "Requirement already satisfied: jupyter-server<3,>=1.8 in /usr/local/lib/python3.10/dist-packages (from notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (1.24.0)\n",
+ "Requirement already satisfied: cffi>=1.0.1 in /usr/local/lib/python3.10/dist-packages (from argon2-cffi-bindings->argon2-cffi->notebook->jupyter==1.0.0->d2l==1.0.3) (1.17.1)\n",
+ "Requirement already satisfied: pycparser in /usr/local/lib/python3.10/dist-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook->jupyter==1.0.0->d2l==1.0.3) (2.22)\n",
+ "Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (3.7.1)\n",
+ "Requirement already satisfied: websocket-client in /usr/local/lib/python3.10/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (1.8.0)\n",
+ "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (1.3.1)\n",
+ "Requirement already satisfied: exceptiongroup in /usr/local/lib/python3.10/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook->jupyter==1.0.0->d2l==1.0.3) (1.2.2)\n",
+ "Downloading d2l-1.0.3-py3-none-any.whl (111 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.7/111.7 kB\u001b[0m \u001b[31m3.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading jupyter-1.0.0-py2.py3-none-any.whl (2.7 kB)\n",
+ "Downloading matplotlib-3.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.6 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m11.6/11.6 MB\u001b[0m \u001b[31m22.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading matplotlib_inline-0.1.6-py3-none-any.whl (9.4 kB)\n",
+ "Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m17.1/17.1 MB\u001b[0m \u001b[31m69.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading pandas-2.0.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.3/12.3 MB\u001b[0m \u001b[31m90.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading requests-2.31.0-py3-none-any.whl (62 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m62.6/62.6 kB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading scipy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.4 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.4/34.4 MB\u001b[0m \u001b[31m21.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading pyparsing-3.0.9-py3-none-any.whl (98 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.3/98.3 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading qtconsole-5.6.1-py3-none-any.whl (125 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m125.0/125.0 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading QtPy-2.4.1-py3-none-any.whl (93 kB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m93.5/93.5 kB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hDownloading jedi-0.19.1-py2.py3-none-any.whl (1.6 MB)\n",
+ "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m57.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+ "\u001b[?25hInstalling collected packages: requests, qtpy, pyparsing, numpy, matplotlib-inline, jedi, scipy, pandas, matplotlib, qtconsole, jupyter, d2l\n",
+ " Attempting uninstall: requests\n",
+ " Found existing installation: requests 2.32.3\n",
+ " Uninstalling requests-2.32.3:\n",
+ " Successfully uninstalled requests-2.32.3\n",
+ " Attempting uninstall: pyparsing\n",
+ " Found existing installation: pyparsing 3.2.0\n",
+ " Uninstalling pyparsing-3.2.0:\n",
+ " Successfully uninstalled pyparsing-3.2.0\n",
+ " Attempting uninstall: numpy\n",
+ " Found existing installation: numpy 1.26.4\n",
+ " Uninstalling numpy-1.26.4:\n",
+ " Successfully uninstalled numpy-1.26.4\n",
+ " Attempting uninstall: matplotlib-inline\n",
+ " Found existing installation: matplotlib-inline 0.1.7\n",
+ " Uninstalling matplotlib-inline-0.1.7:\n",
+ " Successfully uninstalled matplotlib-inline-0.1.7\n",
+ " Attempting uninstall: scipy\n",
+ " Found existing installation: scipy 1.13.1\n",
+ " Uninstalling scipy-1.13.1:\n",
+ " Successfully uninstalled scipy-1.13.1\n",
+ " Attempting uninstall: pandas\n",
+ " Found existing installation: pandas 2.2.2\n",
+ " Uninstalling pandas-2.2.2:\n",
+ " Successfully uninstalled pandas-2.2.2\n",
+ " Attempting uninstall: matplotlib\n",
+ " Found existing installation: matplotlib 3.7.1\n",
+ " Uninstalling matplotlib-3.7.1:\n",
+ " Successfully uninstalled matplotlib-3.7.1\n",
+ "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
+ "albucore 0.0.16 requires numpy>=1.24, but you have numpy 1.23.5 which is incompatible.\n",
+ "albumentations 1.4.15 requires numpy>=1.24.4, but you have numpy 1.23.5 which is incompatible.\n",
+ "bigframes 1.24.0 requires numpy>=1.24.0, but you have numpy 1.23.5 which is incompatible.\n",
+ "chex 0.1.87 requires numpy>=1.24.1, but you have numpy 1.23.5 which is incompatible.\n",
+ "google-colab 1.0.0 requires pandas==2.2.2, but you have pandas 2.0.3 which is incompatible.\n",
+ "google-colab 1.0.0 requires requests==2.32.3, but you have requests 2.31.0 which is incompatible.\n",
+ "jax 0.4.33 requires numpy>=1.24, but you have numpy 1.23.5 which is incompatible.\n",
+ "jaxlib 0.4.33 requires numpy>=1.24, but you have numpy 1.23.5 which is incompatible.\n",
+ "mizani 0.11.4 requires pandas>=2.1.0, but you have pandas 2.0.3 which is incompatible.\n",
+ "plotnine 0.13.6 requires pandas<3.0.0,>=2.1.0, but you have pandas 2.0.3 which is incompatible.\n",
+ "xarray 2024.9.0 requires numpy>=1.24, but you have numpy 1.23.5 which is incompatible.\n",
+ "xarray 2024.9.0 requires pandas>=2.1, but you have pandas 2.0.3 which is incompatible.\u001b[0m\u001b[31m\n",
+ "\u001b[0mSuccessfully installed d2l-1.0.3 jedi-0.19.1 jupyter-1.0.0 matplotlib-3.7.2 matplotlib-inline-0.1.6 numpy-1.23.5 pandas-2.0.3 pyparsing-3.0.9 qtconsole-5.6.1 qtpy-2.4.1 requests-2.31.0 scipy-1.10.1\n"
+ ]
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "application/vnd.colab-display-data+json": {
+ "pip_warning": {
+ "packages": [
+ "matplotlib",
+ "matplotlib_inline",
+ "mpl_toolkits",
+ "numpy"
+ ]
+ },
+ "id": "b7f81e4490e24cdbac3306a3dc38438b"
+ }
+ },
+ "metadata": {}
+ }
+ ],
+ "source": [
+ "!pip install d2l==1.0.3\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fb1c337f",
+ "metadata": {
+ "origin_pos": 1,
+ "id": "fb1c337f"
+ },
+ "source": [
+ "# Calculus\n",
+ ":label:`sec_calculus`\n",
+ "\n",
+ "For a long time, how to calculate\n",
+ "the area of a circle remained a mystery.\n",
+ "Then, in Ancient Greece, the mathematician Archimedes\n",
+ "came up with the clever idea\n",
+ "to inscribe a series of polygons\n",
+ "with increasing numbers of vertices\n",
+ "on the inside of a circle\n",
+ "(:numref:`fig_circle_area`).\n",
+ "For a polygon with $n$ vertices,\n",
+ "we obtain $n$ triangles.\n",
+ "The height of each triangle approaches the radius $r$\n",
+ "as we partition the circle more finely.\n",
+ "At the same time, its base approaches $2 \\pi r/n$,\n",
+ "since the ratio between arc and secant approaches 1\n",
+ "for a large number of vertices.\n",
+ "Thus, the area of the polygon approaches\n",
+ "$n \\cdot r \\cdot \\frac{1}{2} (2 \\pi r/n) = \\pi r^2$.\n",
+ "\n",
+ "\n",
+ ":label:`fig_circle_area`\n",
+ "\n",
+ "This limiting procedure is at the root of both\n",
+ "*differential calculus* and *integral calculus*.\n",
+ "The former can tell us how to increase\n",
+ "or decrease a function's value by\n",
+ "manipulating its arguments.\n",
+ "This comes in handy for the *optimization problems*\n",
+ "that we face in deep learning,\n",
+ "where we repeatedly update our parameters\n",
+ "in order to decrease the loss function.\n",
+ "Optimization addresses how to fit our models to training data,\n",
+ "and calculus is its key prerequisite.\n",
+ "However, do not forget that our ultimate goal\n",
+ "is to perform well on *previously unseen* data.\n",
+ "That problem is called *generalization*\n",
+ "and will be a key focus of other chapters.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5162883d",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-08-18T19:31:00.158561Z",
+ "iopub.status.busy": "2023-08-18T19:31:00.158199Z",
+ "iopub.status.idle": "2023-08-18T19:31:03.258372Z",
+ "shell.execute_reply": "2023-08-18T19:31:03.256925Z"
+ },
+ "origin_pos": 3,
+ "tab": [
+ "pytorch"
+ ],
+ "id": "5162883d"
+ },
+ "outputs": [],
+ "source": [
+ "%matplotlib inline\n",
+ "import numpy as np\n",
+ "from matplotlib_inline import backend_inline\n",
+ "from d2l import torch as d2l"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8ad81b23",
+ "metadata": {
+ "origin_pos": 6,
+ "id": "8ad81b23"
+ },
+ "source": [
+ "## Derivatives and Differentiation\n",
+ "\n",
+ "Put simply, a *derivative* is the rate of change\n",
+ "in a function with respect to changes in its arguments.\n",
+ "Derivatives can tell us how rapidly a loss function\n",
+ "would increase or decrease were we\n",
+ "to *increase* or *decrease* each parameter\n",
+ "by an infinitesimally small amount.\n",
+ "Formally, for functions $f: \\mathbb{R} \\rightarrow \\mathbb{R}$,\n",
+ "that map from scalars to scalars,\n",
+ "[**the *derivative* of $f$ at a point $x$ is defined as**]\n",
+ "\n",
+ "(**$$f'(x) = \\lim_{h \\rightarrow 0} \\frac{f(x+h) - f(x)}{h}.$$**)\n",
+ ":eqlabel:`eq_derivative`\n",
+ "\n",
+ "This term on the right hand side is called a *limit*\n",
+ "and it tells us what happens\n",
+ "to the value of an expression\n",
+ "as a specified variable\n",
+ "approaches a particular value.\n",
+ "This limit tells us what\n",
+ "the ratio between a perturbation $h$\n",
+ "and the change in the function value\n",
+ "$f(x + h) - f(x)$ converges to\n",
+ "as we shrink its size to zero.\n",
+ "\n",
+ "When $f'(x)$ exists, $f$ is said\n",
+ "to be *differentiable* at $x$;\n",
+ "and when $f'(x)$ exists for all $x$\n",
+ "on a set, e.g., the interval $[a,b]$,\n",
+ "we say that $f$ is differentiable on this set.\n",
+ "Not all functions are differentiable,\n",
+ "including many that we wish to optimize,\n",
+ "such as accuracy and the area under the\n",
+ "receiving operating characteristic (AUC).\n",
+ "However, because computing the derivative of the loss\n",
+ "is a crucial step in nearly all\n",
+ "algorithms for training deep neural networks,\n",
+ "we often optimize a differentiable *surrogate* instead.\n",
+ "\n",
+ "\n",
+ "We can interpret the derivative\n",
+ "$f'(x)$\n",
+ "as the *instantaneous* rate of change\n",
+ "of $f(x)$ with respect to $x$.\n",
+ "Let's develop some intuition with an example.\n",
+ "(**Define $u = f(x) = 3x^2-4x$.**)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e351acdb",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-08-18T19:31:03.263539Z",
+ "iopub.status.busy": "2023-08-18T19:31:03.262795Z",
+ "iopub.status.idle": "2023-08-18T19:31:03.267363Z",
+ "shell.execute_reply": "2023-08-18T19:31:03.266349Z"
+ },
+ "origin_pos": 8,
+ "tab": [
+ "pytorch"
+ ],
+ "id": "e351acdb"
+ },
+ "outputs": [],
+ "source": [
+ "def f(x):\n",
+ " return 3 * x ** 2 - 4 * x"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "50148144",
+ "metadata": {
+ "origin_pos": 11,
+ "id": "50148144"
+ },
+ "source": [
+ "[**Setting $x=1$, we see that $\\frac{f(x+h) - f(x)}{h}$**] (**approaches $2$\n",
+ "as $h$ approaches $0$.**)\n",
+ "While this experiment lacks\n",
+ "the rigor of a mathematical proof,\n",
+ "we can quickly see that indeed $f'(1) = 2$.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7b5e7cf2",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-08-18T19:31:03.271432Z",
+ "iopub.status.busy": "2023-08-18T19:31:03.270665Z",
+ "iopub.status.idle": "2023-08-18T19:31:03.276568Z",
+ "shell.execute_reply": "2023-08-18T19:31:03.275548Z"
+ },
+ "origin_pos": 12,
+ "tab": [
+ "pytorch"
+ ],
+ "id": "7b5e7cf2",
+ "outputId": "50b7f187-3430-4a25-bd46-22283b596474"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "h=0.10000, numerical limit=2.30000\n",
+ "h=0.01000, numerical limit=2.03000\n",
+ "h=0.00100, numerical limit=2.00300\n",
+ "h=0.00010, numerical limit=2.00030\n",
+ "h=0.00001, numerical limit=2.00003\n"
+ ]
+ }
+ ],
+ "source": [
+ "for h in 10.0**np.arange(-1, -6, -1):\n",
+ " print(f'h={h:.5f}, numerical limit={(f(1+h)-f(1))/h:.5f}')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0be7fece",
+ "metadata": {
+ "origin_pos": 13,
+ "id": "0be7fece"
+ },
+ "source": [
+ "There are several equivalent notational conventions for derivatives.\n",
+ "Given $y = f(x)$, the following expressions are equivalent:\n",
+ "\n",
+ "$$f'(x) = y' = \\frac{dy}{dx} = \\frac{df}{dx} = \\frac{d}{dx} f(x) = Df(x) = D_x f(x),$$\n",
+ "\n",
+ "where the symbols $\\frac{d}{dx}$ and $D$ are *differentiation operators*.\n",
+ "Below, we present the derivatives of some common functions:\n",
+ "\n",
+ "$$\\begin{aligned} \\frac{d}{dx} C & = 0 && \\textrm{for any constant $C$} \\\\ \\frac{d}{dx} x^n & = n x^{n-1} && \\textrm{for } n \\neq 0 \\\\ \\frac{d}{dx} e^x & = e^x \\\\ \\frac{d}{dx} \\ln x & = x^{-1}. \\end{aligned}$$\n",
+ "\n",
+ "Functions composed from differentiable functions\n",
+ "are often themselves differentiable.\n",
+ "The following rules come in handy\n",
+ "for working with compositions\n",
+ "of any differentiable functions\n",
+ "$f$ and $g$, and constant $C$.\n",
+ "\n",
+ "$$\\begin{aligned} \\frac{d}{dx} [C f(x)] & = C \\frac{d}{dx} f(x) && \\textrm{Constant multiple rule} \\\\ \\frac{d}{dx} [f(x) + g(x)] & = \\frac{d}{dx} f(x) + \\frac{d}{dx} g(x) && \\textrm{Sum rule} \\\\ \\frac{d}{dx} [f(x) g(x)] & = f(x) \\frac{d}{dx} g(x) + g(x) \\frac{d}{dx} f(x) && \\textrm{Product rule} \\\\ \\frac{d}{dx} \\frac{f(x)}{g(x)} & = \\frac{g(x) \\frac{d}{dx} f(x) - f(x) \\frac{d}{dx} g(x)}{g^2(x)} && \\textrm{Quotient rule} \\end{aligned}$$\n",
+ "\n",
+ "Using this, we can apply the rules\n",
+ "to find the derivative of $3 x^2 - 4x$ via\n",
+ "\n",
+ "$$\\frac{d}{dx} [3 x^2 - 4x] = 3 \\frac{d}{dx} x^2 - 4 \\frac{d}{dx} x = 6x - 4.$$\n",
+ "\n",
+ "Plugging in $x = 1$ shows that, indeed,\n",
+ "the derivative equals $2$ at this location.\n",
+ "Note that derivatives tell us\n",
+ "the *slope* of a function\n",
+ "at a particular location. \n",
+ "\n",
+ "## Visualization Utilities\n",
+ "\n",
+ "[**We can visualize the slopes of functions using the `matplotlib` library**].\n",
+ "We need to define a few functions.\n",
+ "As its name indicates, `use_svg_display`\n",
+ "tells `matplotlib` to output graphics\n",
+ "in SVG format for crisper images.\n",
+ "The comment `#@save` is a special modifier\n",
+ "that allows us to save any function,\n",
+ "class, or other code block to the `d2l` package\n",
+ "so that we can invoke it later\n",
+ "without repeating the code,\n",
+ "e.g., via `d2l.use_svg_display()`.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "70d92d2a",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-08-18T19:31:03.280403Z",
+ "iopub.status.busy": "2023-08-18T19:31:03.280065Z",
+ "iopub.status.idle": "2023-08-18T19:31:03.284973Z",
+ "shell.execute_reply": "2023-08-18T19:31:03.283950Z"
+ },
+ "origin_pos": 14,
+ "tab": [
+ "pytorch"
+ ],
+ "id": "70d92d2a"
+ },
+ "outputs": [],
+ "source": [
+ "def use_svg_display(): #@save\n",
+ " \"\"\"Use the svg format to display a plot in Jupyter.\"\"\"\n",
+ " backend_inline.set_matplotlib_formats('svg')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ef7c490f",
+ "metadata": {
+ "origin_pos": 15,
+ "id": "ef7c490f"
+ },
+ "source": [
+ "Conveniently, we can set figure sizes with `set_figsize`.\n",
+ "Since the import statement `from matplotlib import pyplot as plt`\n",
+ "was marked via `#@save` in the `d2l` package, we can call `d2l.plt`.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c868cdf6",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-08-18T19:31:03.289140Z",
+ "iopub.status.busy": "2023-08-18T19:31:03.288531Z",
+ "iopub.status.idle": "2023-08-18T19:31:03.293764Z",
+ "shell.execute_reply": "2023-08-18T19:31:03.292757Z"
+ },
+ "origin_pos": 16,
+ "tab": [
+ "pytorch"
+ ],
+ "id": "c868cdf6"
+ },
+ "outputs": [],
+ "source": [
+ "def set_figsize(figsize=(3.5, 2.5)): #@save\n",
+ " \"\"\"Set the figure size for matplotlib.\"\"\"\n",
+ " use_svg_display()\n",
+ " d2l.plt.rcParams['figure.figsize'] = figsize"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f2bd084c",
+ "metadata": {
+ "origin_pos": 17,
+ "id": "f2bd084c"
+ },
+ "source": [
+ "The `set_axes` function can associate axes\n",
+ "with properties, including labels, ranges,\n",
+ "and scales.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8860f929",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-08-18T19:31:03.297796Z",
+ "iopub.status.busy": "2023-08-18T19:31:03.297092Z",
+ "iopub.status.idle": "2023-08-18T19:31:03.303068Z",
+ "shell.execute_reply": "2023-08-18T19:31:03.302068Z"
+ },
+ "origin_pos": 18,
+ "tab": [
+ "pytorch"
+ ],
+ "id": "8860f929"
+ },
+ "outputs": [],
+ "source": [
+ "#@save\n",
+ "def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):\n",
+ " \"\"\"Set the axes for matplotlib.\"\"\"\n",
+ " axes.set_xlabel(xlabel), axes.set_ylabel(ylabel)\n",
+ " axes.set_xscale(xscale), axes.set_yscale(yscale)\n",
+ " axes.set_xlim(xlim), axes.set_ylim(ylim)\n",
+ " if legend:\n",
+ " axes.legend(legend)\n",
+ " axes.grid()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d8825398",
+ "metadata": {
+ "origin_pos": 19,
+ "id": "d8825398"
+ },
+ "source": [
+ "With these three functions, we can define a `plot` function\n",
+ "to overlay multiple curves.\n",
+ "Much of the code here is just ensuring\n",
+ "that the sizes and shapes of inputs match.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0d56dd86",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-08-18T19:31:03.307130Z",
+ "iopub.status.busy": "2023-08-18T19:31:03.306443Z",
+ "iopub.status.idle": "2023-08-18T19:31:03.316351Z",
+ "shell.execute_reply": "2023-08-18T19:31:03.315391Z"
+ },
+ "origin_pos": 20,
+ "tab": [
+ "pytorch"
+ ],
+ "id": "0d56dd86"
+ },
+ "outputs": [],
+ "source": [
+ "#@save\n",
+ "def plot(X, Y=None, xlabel=None, ylabel=None, legend=[], xlim=None,\n",
+ " ylim=None, xscale='linear', yscale='linear',\n",
+ " fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):\n",
+ " \"\"\"Plot data points.\"\"\"\n",
+ "\n",
+ " def has_one_axis(X): # True if X (tensor or list) has 1 axis\n",
+ " return (hasattr(X, \"ndim\") and X.ndim == 1 or isinstance(X, list)\n",
+ " and not hasattr(X[0], \"__len__\"))\n",
+ "\n",
+ " if has_one_axis(X): X = [X]\n",
+ " if Y is None:\n",
+ " X, Y = [[]] * len(X), X\n",
+ " elif has_one_axis(Y):\n",
+ " Y = [Y]\n",
+ " if len(X) != len(Y):\n",
+ " X = X * len(Y)\n",
+ "\n",
+ " set_figsize(figsize)\n",
+ " if axes is None:\n",
+ " axes = d2l.plt.gca()\n",
+ " axes.cla()\n",
+ " for x, y, fmt in zip(X, Y, fmts):\n",
+ " axes.plot(x,y,fmt) if len(x) else axes.plot(y,fmt)\n",
+ " set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "68b9ef55",
+ "metadata": {
+ "origin_pos": 21,
+ "id": "68b9ef55"
+ },
+ "source": [
+ "Now we can [**plot the function $u = f(x)$ and its tangent line $y = 2x - 3$ at $x=1$**],\n",
+ "where the coefficient $2$ is the slope of the tangent line.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1a22ce3a",
+ "metadata": {
+ "execution": {
+ "iopub.execute_input": "2023-08-18T19:31:03.320345Z",
+ "iopub.status.busy": "2023-08-18T19:31:03.319698Z",
+ "iopub.status.idle": "2023-08-18T19:31:03.596083Z",
+ "shell.execute_reply": "2023-08-18T19:31:03.594940Z"
+ },
+ "origin_pos": 22,
+ "tab": [
+ "pytorch"
+ ],
+ "id": "1a22ce3a",
+ "outputId": "556ed2a5-c2c8-4b3b-ef6c-72422c0483c2"
+ },
+ "outputs": [
+ {
+ "data": {
+ "image/svg+xml": [
+ "\n",
+ "\n",
+ "\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "x = np.arange(0, 3, 0.1)\n",
+ "plot(x, [f(x), 2 * x - 3], 'x', 'f(x)', legend=['f(x)', 'Tangent line (x=1)'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ebd783d4",
+ "metadata": {
+ "origin_pos": 23,
+ "id": "ebd783d4"
+ },
+ "source": [
+ "## Partial Derivatives and Gradients\n",
+ ":label:`subsec_calculus-grad`\n",
+ "\n",
+ "Thus far, we have been differentiating\n",
+ "functions of just one variable.\n",
+ "In deep learning, we also need to work\n",
+ "with functions of *many* variables.\n",
+ "We briefly introduce notions of the derivative\n",
+ "that apply to such *multivariate* functions.\n",
+ "\n",
+ "\n",
+ "Let $y = f(x_1, x_2, \\ldots, x_n)$ be a function with $n$ variables.\n",
+ "The *partial derivative* of $y$\n",
+ "with respect to its $i^\\textrm{th}$ parameter $x_i$ is\n",
+ "\n",
+ "$$ \\frac{\\partial y}{\\partial x_i} = \\lim_{h \\rightarrow 0} \\frac{f(x_1, \\ldots, x_{i-1}, x_i+h, x_{i+1}, \\ldots, x_n) - f(x_1, \\ldots, x_i, \\ldots, x_n)}{h}.$$\n",
+ "\n",
+ "\n",
+ "To calculate $\\frac{\\partial y}{\\partial x_i}$,\n",
+ "we can treat $x_1, \\ldots, x_{i-1}, x_{i+1}, \\ldots, x_n$ as constants\n",
+ "and calculate the derivative of $y$ with respect to $x_i$.\n",
+ "The following notational conventions for partial derivatives\n",
+ "are all common and all mean the same thing:\n",
+ "\n",
+ "$$\\frac{\\partial y}{\\partial x_i} = \\frac{\\partial f}{\\partial x_i} = \\partial_{x_i} f = \\partial_i f = f_{x_i} = f_i = D_i f = D_{x_i} f.$$\n",
+ "\n",
+ "We can concatenate partial derivatives\n",
+ "of a multivariate function\n",
+ "with respect to all its variables\n",
+ "to obtain a vector that is called\n",
+ "the *gradient* of the function.\n",
+ "Suppose that the input of function\n",
+ "$f: \\mathbb{R}^n \\rightarrow \\mathbb{R}$\n",
+ "is an $n$-dimensional vector\n",
+ "$\\mathbf{x} = [x_1, x_2, \\ldots, x_n]^\\top$\n",
+ "and the output is a scalar.\n",
+ "The gradient of the function $f$\n",
+ "with respect to $\\mathbf{x}$\n",
+ "is a vector of $n$ partial derivatives:\n",
+ "\n",
+ "$$\\nabla_{\\mathbf{x}} f(\\mathbf{x}) = \\left[\\partial_{x_1} f(\\mathbf{x}), \\partial_{x_2} f(\\mathbf{x}), \\ldots\n",
+ "\\partial_{x_n} f(\\mathbf{x})\\right]^\\top.$$\n",
+ "\n",
+ "When there is no ambiguity,\n",
+ "$\\nabla_{\\mathbf{x}} f(\\mathbf{x})$\n",
+ "is typically replaced\n",
+ "by $\\nabla f(\\mathbf{x})$.\n",
+ "The following rules come in handy\n",
+ "for differentiating multivariate functions:\n",
+ "\n",
+ "* For all $\\mathbf{A} \\in \\mathbb{R}^{m \\times n}$ we have $\\nabla_{\\mathbf{x}} \\mathbf{A} \\mathbf{x} = \\mathbf{A}^\\top$ and $\\nabla_{\\mathbf{x}} \\mathbf{x}^\\top \\mathbf{A} = \\mathbf{A}$.\n",
+ "* For square matrices $\\mathbf{A} \\in \\mathbb{R}^{n \\times n}$ we have that $\\nabla_{\\mathbf{x}} \\mathbf{x}^\\top \\mathbf{A} \\mathbf{x} = (\\mathbf{A} + \\mathbf{A}^\\top)\\mathbf{x}$ and in particular\n",
+ "$\\nabla_{\\mathbf{x}} \\|\\mathbf{x} \\|^2 = \\nabla_{\\mathbf{x}} \\mathbf{x}^\\top \\mathbf{x} = 2\\mathbf{x}$.\n",
+ "\n",
+ "Similarly, for any matrix $\\mathbf{X}$,\n",
+ "we have $\\nabla_{\\mathbf{X}} \\|\\mathbf{X} \\|_\\textrm{F}^2 = 2\\mathbf{X}$.\n",
+ "\n",
+ "\n",
+ "\n",
+ "## Chain Rule\n",
+ "\n",
+ "In deep learning, the gradients of concern\n",
+ "are often difficult to calculate\n",
+ "because we are working with\n",
+ "deeply nested functions\n",
+ "(of functions (of functions...)).\n",
+ "Fortunately, the *chain rule* takes care of this.\n",
+ "Returning to functions of a single variable,\n",
+ "suppose that $y = f(g(x))$\n",
+ "and that the underlying functions\n",
+ "$y=f(u)$ and $u=g(x)$\n",
+ "are both differentiable.\n",
+ "The chain rule states that\n",
+ "\n",
+ "\n",
+ "$$\\frac{dy}{dx} = \\frac{dy}{du} \\frac{du}{dx}.$$\n",
+ "\n",
+ "\n",
+ "\n",
+ "Turning back to multivariate functions,\n",
+ "suppose that $y = f(\\mathbf{u})$ has variables\n",
+ "$u_1, u_2, \\ldots, u_m$,\n",
+ "where each $u_i = g_i(\\mathbf{x})$\n",
+ "has variables $x_1, x_2, \\ldots, x_n$,\n",
+ "i.e., $\\mathbf{u} = g(\\mathbf{x})$.\n",
+ "Then the chain rule states that\n",
+ "\n",
+ "$$\\frac{\\partial y}{\\partial x_{i}} = \\frac{\\partial y}{\\partial u_{1}} \\frac{\\partial u_{1}}{\\partial x_{i}} + \\frac{\\partial y}{\\partial u_{2}} \\frac{\\partial u_{2}}{\\partial x_{i}} + \\ldots + \\frac{\\partial y}{\\partial u_{m}} \\frac{\\partial u_{m}}{\\partial x_{i}} \\ \\textrm{ and so } \\ \\nabla_{\\mathbf{x}} y = \\mathbf{A} \\nabla_{\\mathbf{u}} y,$$\n",
+ "\n",
+ "where $\\mathbf{A} \\in \\mathbb{R}^{n \\times m}$ is a *matrix*\n",
+ "that contains the derivative of vector $\\mathbf{u}$\n",
+ "with respect to vector $\\mathbf{x}$.\n",
+ "Thus, evaluating the gradient requires\n",
+ "computing a vector--matrix product.\n",
+ "This is one of the key reasons why linear algebra\n",
+ "is such an integral building block\n",
+ "in building deep learning systems.\n",
+ "\n",
+ "\n",
+ "\n",
+ "## Discussion\n",
+ "\n",
+ "While we have just scratched the surface of a deep topic,\n",
+ "a number of concepts already come into focus:\n",
+ "first, the composition rules for differentiation\n",
+ "can be applied routinely, enabling\n",
+ "us to compute gradients *automatically*.\n",
+ "This task requires no creativity and thus\n",
+ "we can focus our cognitive powers elsewhere.\n",
+ "Second, computing the derivatives of vector-valued functions\n",
+ "requires us to multiply matrices as we trace\n",
+ "the dependency graph of variables from output to input.\n",
+ "In particular, this graph is traversed in a *forward* direction\n",
+ "when we evaluate a function\n",
+ "and in a *backwards* direction\n",
+ "when we compute gradients.\n",
+ "Later chapters will formally introduce backpropagation,\n",
+ "a computational procedure for applying the chain rule.\n",
+ "\n",
+ "From the viewpoint of optimization, gradients allow us\n",
+ "to determine how to move the parameters of a model\n",
+ "in order to lower the loss,\n",
+ "and each step of the optimization algorithms used\n",
+ "throughout this book will require calculating the gradient.\n",
+ "\n",
+ "## Exercises\n",
+ "\n",
+ "1. So far we took the rules for derivatives for granted.\n",
+ " Using the definition and limits prove the properties\n",
+ " for (i) $f(x) = c$, (ii) $f(x) = x^n$, (iii) $f(x) = e^x$ and (iv) $f(x) = \\log x$.\n",
+ "1. In the same vein, prove the product, sum, and quotient rule from first principles.\n",
+ "1. Prove that the constant multiple rule follows as a special case of the product rule.\n",
+ "1. Calculate the derivative of $f(x) = x^x$.\n",
+ "1. What does it mean that $f'(x) = 0$ for some $x$?\n",
+ " Give an example of a function $f$\n",
+ " and a location $x$ for which this might hold.\n",
+ "1. Plot the function $y = f(x) = x^3 - \\frac{1}{x}$\n",
+ " and plot its tangent line at $x = 1$.\n",
+ "1. Find the gradient of the function\n",
+ " $f(\\mathbf{x}) = 3x_1^2 + 5e^{x_2}$.\n",
+ "1. What is the gradient of the function\n",
+ " $f(\\mathbf{x}) = \\|\\mathbf{x}\\|_2$? What happens for $\\mathbf{x} = \\mathbf{0}$?\n",
+ "1. Can you write out the chain rule for the case\n",
+ " where $u = f(x, y, z)$ and $x = x(a, b)$, $y = y(a, b)$, and $z = z(a, b)$?\n",
+ "1. Given a function $f(x)$ that is invertible,\n",
+ " compute the derivative of its inverse $f^{-1}(x)$.\n",
+ " Here we have that $f^{-1}(f(x)) = x$ and conversely $f(f^{-1}(y)) = y$.\n",
+ " Hint: use these properties in your derivation.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e6a8d8e9",
+ "metadata": {
+ "origin_pos": 25,
+ "tab": [
+ "pytorch"
+ ],
+ "id": "e6a8d8e9"
+ },
+ "source": [
+ "[Discussions](https://discuss.d2l.ai/t/33)\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "name": "python3"
+ },
+ "language_info": {
+ "name": "python"
+ },
+ "required_libs": [],
+ "colab": {
+ "provenance": []
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file