diff --git a/.github/workflows/bot-label-lgtm.yaml b/.github/workflows/bot-label-lgtm.yaml deleted file mode 100644 index 27f50375cdc..00000000000 --- a/.github/workflows/bot-label-lgtm.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# This workflow adds the community approval label ("lgtm") to pull requests. It -# does *not* indicate maintainer approval. This a way to visually highlight that -# someone in the world thinks the pull request is ready for further review. This -# event is triggered by a pull request approval, or simply a comment that -# contains the text "lgtm". -# Webhook events: Issue comments, Pull request reviews -name: Community approval -on: - repository_dispatch: - # From: issue_comment, pull_request_review - types: [created, edited, submitted] - -jobs: - lgtm-comment: - # Check the comment. contains() is case-insensitive. - if: >- - ${{ github.actor == 'tfdocsbot' && - contains(github.event.client_payload.comment.body, 'LGTM') }} - runs-on: ubuntu-latest - steps: - - name: Add label - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ISSUE_URL: ${{ github.event.client_payload.comment.issue_url }} - run: | - curl -X POST \ - -H "Accept: application/vnd.github.v3+json" \ - -H "Authorization: token $GITHUB_TOKEN" \ - "${ISSUE_URL}/labels" \ - --data '{"labels":["lgtm"]}' - - review-approval: - # Check the pull request review. - if: >- - ${{ github.actor == 'tfdocsbot' && - contains(github.event.client_payload.review.state, 'approved') }} - runs-on: ubuntu-latest - steps: - - name: Add label - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ISSUE_URL: ${{ github.event.client_payload.pull_request.issue_url }} - run: | - curl -X POST \ - -H "Accept: application/vnd.github.v3+json" \ - -H "Authorization: token $GITHUB_TOKEN" \ - "${ISSUE_URL}/labels" \ - --data '{"labels":["lgtm"]}' diff --git a/.github/workflows/bot-nightly.yaml b/.github/workflows/bot-nightly.yaml deleted file mode 100644 index a0595c74a0b..00000000000 --- a/.github/workflows/bot-nightly.yaml +++ /dev/null @@ -1,65 +0,0 @@ -# Nightly jobs run by a bot collaborator. -name: Nightly jobs -on: - repository_dispatch: - types: [nightly] - -jobs: - snapshot-source: - name: Update Keras guides - if : ${{ github.actor == 'tfdocsbot' }} - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - with: - repository: keras-team/keras-io - path: keras-io - - uses: actions/checkout@v2 - with: - # tensorflow/docs branch to save generated notebooks. - ref: snapshot-keras - path: docs - - name: Set up repo - run: | - # Set commit author. - git config --global user.name "$GITHUB_ACTOR" - git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com" - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: '3.8' - - name: Install requirements - run: | - python3 -m pip install -U pip - python3 -m pip install -U -r keras-io/requirements.txt - python3 -m pip install -U git+https://github.com/tensorflow/docs - - name: Generate Keras notebooks - run: | - # Autogen requires formated code - echo "[${GITHUB_WORKFLOW}] Format Python files ..." - python -m black keras-io/guides/ - mkdir -p keras-io/tf # Make sure output dir exists. - cd keras-io/scripts/ - echo "[${GITHUB_WORKFLOW}] Generate Keras guides ..." - python3 autogen.py generate_tf_guides - echo "[${GITHUB_WORKFLOW}] Format notebooks ..." - python3 -m tensorflow_docs.tools.nbfmt ../tf/ - - name: Sync docs repo - env: - KERAS_GUIDES_DIR: site/en/guide/keras/ - run: | - rsync --archive --del --checksum ./keras-io/tf/ "./docs/${KERAS_GUIDES_DIR}" - cd docs - if [[ -z $(git status -s | grep "$KERAS_GUIDES_DIR") ]]; then - echo "[${GITHUB_WORKFLOW}] No Keras guides updated, exiting." - exit 0 - fi - # Match timestamp format to other snapshot messages. - fmt_rfc7231="%a, %d %b %Y %H:%M:%S %Z" - TIMESTAMP_STR=$(TZ=GMT date +"$fmt_rfc7231") - - git add "./${KERAS_GUIDES_DIR}" - git commit -m "Keras guides snapshot: ${TIMESTAMP_STR}" - # Push to current branch. - echo "[${GITHUB_WORKFLOW}] Push changes to repo ..." - git push origin diff --git a/.github/workflows/bot-pr-fix.yaml b/.github/workflows/bot-pr-fix.yaml deleted file mode 100644 index a8ead3aa7ae..00000000000 --- a/.github/workflows/bot-pr-fix.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Automatically add commits to fix pull requests. This workflow must initiate -# from an authenticated bot repo collaborator. Check for opt-out label. -# Webhook events: Pull requests -name: Auto-fix pull request -on: - repository_dispatch: - types: [opened, synchronize] - -jobs: - nbfmt: - # Check for opt-out label. - if: >- - ${{ github.actor == 'tfdocsbot' && - !contains(github.event.client_payload.pull_request.labels.*.name, 'nbfmt-disable') }} - runs-on: ubuntu-latest - steps: - - name: Set up Python - uses: actions/setup-python@v2 - - name: Install tensorflow-docs - run: python3 -m pip install -U git+https://github.com/tensorflow/docs - - name: Fetch pull request branch - uses: actions/checkout@v2 - with: - # Head repo is the user's fork. Ref is the branch name. - repository: ${{ github.event.client_payload.pull_request.head.repo.full_name }} - ref: ${{ github.event.client_payload.pull_request.head.ref }} - - name: Fetch base master branch - run: git fetch -u "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" master:master - - name: Format notebooks - run: | - # Only want notebooks modified in this pull request. - readarray -t changed_files < <(git diff --name-only master | grep '\.ipynb$' || true) - if [[ ${#changed_files[@]} == 0 ]]; then - echo "No notebooks modified in this pull request." - exit 0 - fi - python3 -m tensorflow_docs.tools.nbfmt "${changed_files[@]}" - - if [[ -z $(git ls-files --modified) ]]; then - echo "Notebooks already formatted." - exit 0 - fi - # Set author and commit. - git config --global user.name "$GITHUB_ACTOR" - git config --global user.email "$GITHUB_ACTOR@users.noreply.github.com" - git commit -am "nbfmt" - # Push to the pull request branch submitted by head. - git push diff --git a/.github/workflows/bot-pr-new.yaml b/.github/workflows/bot-pr-new.yaml index 7f2c6164832..13724cc14f0 100644 --- a/.github/workflows/bot-pr-new.yaml +++ b/.github/workflows/bot-pr-new.yaml @@ -6,8 +6,15 @@ on: repository_dispatch: types: [opened, reopened] +permissions: + contents: read # to fetch code (actions/checkout) + jobs: comment-welcome: + permissions: + contents: read # to fetch code (actions/checkout) + pull-requests: write # to comment on pull-request + if: ${{ github.actor == 'tfdocsbot' }} runs-on: ubuntu-latest steps: @@ -15,7 +22,7 @@ jobs: uses: actions/checkout@v2 with: repository: ${{ github.event.client_payload.pull_request.head.repo.full_name }} - ref: ${{ github.event.client_payload.pull_request.head.ref }} + ref: ${{ github.event.client_payload.pull_request.head.sha }} - name: Fetch base master branch run: git fetch -u "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" master:master - name: Create message diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml new file mode 100644 index 00000000000..0ca76b0677e --- /dev/null +++ b/.github/workflows/stale.yaml @@ -0,0 +1,49 @@ +# This workflow warns and then closes issues and PRs that have had no activity for a specified amount of time. +# +# You can adjust the behavior by modifying this file. +# For more information, see: +# https://github.com/actions/stale +name: Mark stale issues and pull requests + +on: + schedule: + # Scheduled to run at 1.30 UTC everyday + - cron: '30 1 * * *' + workflow_dispatch: + +jobs: + stale: + + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + + steps: + - uses: actions/stale@v9 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + days-before-issue-stale: 14 + days-before-issue-close: 14 + stale-issue-label: "status:stale" + close-issue-reason: not_planned + any-of-labels: "awaiting-contributor-response,cla:no" + stale-issue-message: > + Marking this issue as stale since it has been open for 14 days with no activity. + This issue will be closed if no further activity occurs. + close-issue-message: > + This issue was closed because it has been inactive for 28 days. + Please post a new issue if you need further assistance. Thanks! + days-before-pr-stale: 14 + days-before-pr-close: 14 + stale-pr-label: "status:stale" + stale-pr-message: > + Marking this pull request as stale since it has been open for 14 days with no activity. + This PR will be closed if no further activity occurs. + close-pr-message: > + This pull request was closed because it has been inactive for 28 days. + Please open a new pull request if you need further assistance. Thanks! + # Label that can be assigned to issues to exclude them from being marked as stale + exempt-issue-labels: 'override-stale' + # Label that can be assigned to PRs to exclude them from being marked as stale + exempt-pr-labels: "override-stale" diff --git a/CODEOWNERS b/CODEOWNERS index 965b3a081ca..d4d2932d8bc 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -2,10 +2,13 @@ # Last matching pattern takes precedence. # Default owners for everything in repo. -* @MarkDaoust @8bitmp3 +* @tensorflow/docs-team -# Docs -/site/en/guide/keras/ @fchollet @MarkDaoust @8bitmp3 +# Install +/site/en/install/ @haifeng-jin @MarkDaoust @8bitmp3 # Community /site/en/community/ @ewilderj @theadactyl @joanafilipa + +# Hub +/site/en/hub @gustheman \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 1559b721f51..6f301eab782 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,9 +6,7 @@ This guide shows how to make contributions to [tensorflow.org](https://www.tenso See the [TensorFlow docs contributor guide](https://www.tensorflow.org/community/contribute/docs) -for guidance. For questions, the -[docs@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs) -mailing list is available. +for guidance. For questions, check out [TensorFlow Forum](https://discuss.tensorflow.org/). Questions about TensorFlow usage are better addressed on [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow) or the diff --git a/LICENSE b/LICENSE index 4862420c023..08026f1ac8e 100644 --- a/LICENSE +++ b/LICENSE @@ -201,3 +201,28 @@ Copyright 2018 The TensorFlow Authors. All rights reserved. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. + + +--------------------------- + +Where indicated, some files are also distributed under the MIT License: + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/README.md b/README.md index 7b94ce5f90f..66b6d3fb065 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ To file a docs issue, use the issue tracker in the [tensorflow/tensorflow](https://github.com/tensorflow/tensorflow/issues/new?template=20-documentation-issue.md) repo. And join the TensorFlow documentation contributors on the -[docs@tensorflow.org mailing list](https://groups.google.com/a/tensorflow.org/forum/#!forum/docs). +[TensorFlow Forum](https://discuss.tensorflow.org/). ## Community translations diff --git a/setup.py b/setup.py index 8c24bee6204..404479668b3 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ # ============================================================================== """tensorflow_docs is a package for generating python api-reference docs.""" +import datetime import subprocess import sys @@ -21,7 +22,22 @@ from setuptools import setup project_name = 'tensorflow-docs' -version = '0.0.0.dev0' + + +def get_version() -> str: + ts = int( + subprocess.check_output(['git', 'log', '-1', '--format=%ct', 'tools']) + .decode('utf-8') + .strip() + ) + dt = datetime.datetime.utcfromtimestamp(ts) + sec = 60 * 60 * dt.hour + 60 * dt.minute + dt.second + + # calver.org + return f'{dt.year}.{dt.month}.{dt.day}.{sec}' + + +version = get_version() DOCLINES = __doc__.split('\n') @@ -29,15 +45,12 @@ 'astor', 'absl-py', 'jinja2', - 'protobuf>=3.14', + 'nbformat', + 'protobuf>=3.12', 'pyyaml', ] -# Dataclasses is in-built from py >=3.7. This version is a backport for py 3.6. -if (sys.version_info.major, sys.version_info.minor) == (3, 6): - REQUIRED_PKGS.append('dataclasses') - -VIS_REQURE = [ +VIS_REQUIRE = [ 'numpy', 'PILLOW', 'webp', @@ -46,6 +59,7 @@ # https://setuptools.readthedocs.io/en/latest/setuptools.html#new-and-changed-setup-keywords setup( name=project_name, + python_requires='>=3.9', version=version, description=DOCLINES[0], long_description='\n'.join(DOCLINES[2:]), @@ -58,7 +72,7 @@ package_dir={'': 'tools'}, scripts=[], install_requires=REQUIRED_PKGS, - extras_require={'vis': VIS_REQURE}, + extras_require={'vis': VIS_REQUIRE}, classifiers=[ 'Development Status :: 4 - Beta', 'Intended Audience :: Developers', diff --git a/site/en/README.md b/site/en/README.md index 21bdd58fb9d..28dc0cce7d4 100644 --- a/site/en/README.md +++ b/site/en/README.md @@ -39,7 +39,7 @@ tensorflow.org project | GitHub docs location [/ranking](https://www.tensorflow.org/ranking) | https://github.com/tensorflow/ranking/tree/master/docs [/recommenders](https://www.tensorflow.org/recommenders) | https://github.com/tensorflow/recommenders/tree/main/docs [/responsible_ai/fairness_indicators](https://www.tensorflow.org/responsible_ai/fairness_indicators/guide) | https://github.com/tensorflow/fairness-indicators/tree/master/g3doc -[/responsible_ai/model_card_toolkit](https://www.tensorflow.org/responsible_ai/model_card_toolkit/guide) | https://github.com/tensorflow/model-card-toolkit/tree/master/model_card_toolkit/documentation +[/responsible_ai/model_card_toolkit](https://www.tensorflow.org/responsible_ai/model_card_toolkit/guide) | https://github.com/tensorflow/model-card-toolkit/tree/main/model_card_toolkit/documentation [/responsible_ai/model_remediation](https://www.tensorflow.org/responsible_ai/model_remediation) | https://github.com/tensorflow/model-remediation/tree/master/docs [/responsible_ai/privacy](https://www.tensorflow.org/responsible_ai/privacy/guide) | https://github.com/tensorflow/privacy/tree/master/g3doc [/tensorboard](https://www.tensorflow.org/tensorboard) | https://github.com/tensorflow/tensorboard/tree/master/docs diff --git a/site/en/community/contribute/code.md b/site/en/community/contribute/code.md index 790e1c72e71..2f71f12d7fe 100644 --- a/site/en/community/contribute/code.md +++ b/site/en/community/contribute/code.md @@ -2,7 +2,7 @@ Whether you are adding a loss function, improving test coverage, or writing an RFC for a major design change, this portion of the contributor guide will help -you get started. Thank you for work and interest in improving TensorFlow. +you get started. Thank you for your work and interest in improving TensorFlow. ## Before you get started @@ -34,7 +34,7 @@ workflow, and for the core devs to become acquainted with the contributor. If you are interested in recruiting a team to help tackle a large-scale problem or a new feature, please email the -[developers@ group](https://groups.google.com/a/tensorflow.org/forum/#!forum/developers) +[developers@ group](https://groups.google.com/a/tensorflow.org/g/developers) and review our current list of RFCs. ## Code review diff --git a/site/en/community/contribute/docs.md b/site/en/community/contribute/docs.md index 29b2b5c9550..34b1619ca5d 100644 --- a/site/en/community/contribute/docs.md +++ b/site/en/community/contribute/docs.md @@ -32,7 +32,7 @@ To participate in the TensorFlow docs community: For details, use the [TensorFlow API docs contributor guide](docs_ref.md). This shows you how to find the -[source file](https://www.tensorflow.org/code/tensorflow/python/) +[source file](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/) and edit the symbol's docstring. Many API reference pages on tensorflow.org include a link to the source file @@ -53,9 +53,9 @@ main tensorflow/tensorflow repo. The reference documentation is generated from code comments and docstrings in the source code for -Python, -C++, and -Java. +Python, +C++, and +Java. Previous versions of the TensorFlow documentation are available as [rX.x branches](https://github.com/tensorflow/docs/branches) in the TensorFlow diff --git a/site/en/community/contribute/docs_ref.md b/site/en/community/contribute/docs_ref.md index 12d43792ebf..41fce4dde40 100644 --- a/site/en/community/contribute/docs_ref.md +++ b/site/en/community/contribute/docs_ref.md @@ -8,7 +8,7 @@ TensorFlow uses [DocTest](https://docs.python.org/3/library/doctest.html) to test code snippets in Python docstrings. The snippet must be executable Python code. To enable testing, prepend the line with `>>>` (three left-angle brackets). For example, here's a excerpt from the `tf.concat` function in the -[array_ops.py](https://www.tensorflow.org/code/tensorflow/python/ops/array_ops.py) +[array_ops.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/array_ops.py) source file: ``` @@ -100,7 +100,7 @@ TensorFlow uses a few customizations to the builtin doctest logic: ``` def NewLayer(): - “””This layer does cool stuff. + """This layer does cool stuff. Example usage: @@ -108,7 +108,7 @@ TensorFlow uses a few customizations to the builtin doctest logic: >>> new_layer = NewLayer(x) >>> new_layer - “”” + """ ``` * *Floating point values*: The TensorFlow doctest extracts float values from @@ -178,7 +178,7 @@ There are two ways to test the code in the docstring locally: * If you are only changing the docstring of a class/function/method, then you can test it by passing that file's path to - [tf_doctest.py](https://www.tensorflow.org/code/tensorflow/tools/docs/tf_doctest.py). + [tf_doctest.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/docs/tf_doctest.py). For example:
diff --git a/site/en/community/contribute/docs_style.md b/site/en/community/contribute/docs_style.md
index eba78afa896..10f18e52699 100644
--- a/site/en/community/contribute/docs_style.md
+++ b/site/en/community/contribute/docs_style.md
@@ -63,10 +63,10 @@ repository like this:
 * \[Basics\]\(../../guide/basics.ipynb\) produces
 [Basics](../../guide/basics.ipynb).
 
-This is the prefered approach because this way the links on
+This is the preferred approach because this way the links on
 [tensorflow.org](https://www.tensorflow.org),
-[GitHub](https://github.com/tensorflow/docs){:.external} and
-[Colab](https://github.com/tensorflow/docs/tree/master/site/en/guide/bazics.ipynb){:.external}
+[GitHub](https://github.com/tensorflow/docs) and
+[Colab](https://github.com/tensorflow/docs/tree/master/site/en/guide/bazics.ipynb)
 all work. Also, the reader stays in the same site when they click a link.
 
 Note: You should include the file extension—such as `.ipynb` or `.md`—for
@@ -83,10 +83,10 @@ To link to source code, use a link starting with
 by the file name starting at the GitHub root.
 
 When linking off of [tensorflow.org](https://www.tensorflow.org), include a
-`{:.external}` on the Markdown link so that the "external link" symbol is shown.
+`` on the Markdown link so that the "external link" symbol is shown.
 
-* `[GitHub](https://github.com/tensorflow/docs){:.external}` produces
-  [GitHub](https://github.com/tensorflow/docs){:.external}
+* `[GitHub](https://github.com/tensorflow/docs)` produces
+  [GitHub](https://github.com/tensorflow/docs)
 
 Do not include URI query parameters in the link:
 
diff --git a/site/en/community/mailing-lists.md b/site/en/community/mailing-lists.md
index 54c5d57ab91..35bfb218ba1 100644
--- a/site/en/community/mailing-lists.md
+++ b/site/en/community/mailing-lists.md
@@ -5,7 +5,7 @@ note that if you're looking for help using TensorFlow,
 [TensorFlow Forum](https://discuss.tensorflow.org/),
 [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow), and
 [GitHub issues](https://github.com/tensorflow/tensorflow/issues) are the best
-initial places to look.
+initial places to look. To receive a roundup of updates from the TensorFlow team each quarter, subscribe to the [TensorFlow newsletter](https://services.google.com/fb/forms/tensorflow/).
 
 ## General TensorFlow lists and forums
 
diff --git a/site/en/community/sig_playbook.md b/site/en/community/sig_playbook.md
index 75e277d3d96..6ec7a554a5b 100644
--- a/site/en/community/sig_playbook.md
+++ b/site/en/community/sig_playbook.md
@@ -55,7 +55,7 @@ must demonstrate:
     application area)
 *   Two or more contributors willing to act as group leads, existence of other
     contributors, and evidence of demand for the group
-*   Resources it will initially require (usually, mailing list and regular VC
+*   Resources it will initially require (usually, mailing list and regular video conference 
     call.) 
 
 Approval for the group will be given by a decision of the TF Community Team,
diff --git a/site/en/guide/_index.yaml b/site/en/guide/_index.yaml
index f9d873dbf2a..e39dd37ead5 100644
--- a/site/en/guide/_index.yaml
+++ b/site/en/guide/_index.yaml
@@ -95,10 +95,11 @@ landing_page:
     items:
     - list:
       - description: >
-          
-          A suite of visualization tools to understand, debug, and optimize
-          TensorFlow programs.
-        path: /tensorboard
+          
+          A library to train, run and interpret decision forest models (e.g., Random Forests,
+          Gradient Boosted Trees) in TensorFlow.
+        path: /decision_forests
         icon:
           icon_name: chevron_right
           foreground: theme
@@ -113,10 +114,10 @@ landing_page:
           foreground: theme
           background: grey
       - description: >
-          
-          The TensorFlow Model Optimization Toolkit is a suite of tools for
-          optimizing ML models for deployment and execution.
-        path: /model_optimization
+          
+          A TFX serving system for ML models, designed for high-performance in
+          production environments.
+        path: /tfx/guide/serving
         icon:
           icon_name: chevron_right
           foreground: theme
@@ -147,7 +148,24 @@ landing_page:
           icon_name: chevron_right
           foreground: theme
           background: grey
+      - description: >
+          
+          Extra functionality for TensorFlow, maintained by SIG Addons.
+        path: https://github.com/tensorflow/addons
+        icon:
+          icon_name: chevron_right
+          foreground: theme
+          background: grey
     - list:
+      - description: >
+          
+          A suite of visualization tools to understand, debug, and optimize
+          TensorFlow programs.
+        path: /tensorboard
+        icon:
+          icon_name: chevron_right
+          foreground: theme
+          background: grey
       - description: >
           
           A collection of datasets ready to use with TensorFlow.
@@ -157,10 +175,10 @@ landing_page:
           foreground: theme
           background: grey
       - description: >
-          
-          A TFX serving system for ML models, designed for high-performance in
-          production environments.
-        path: /tfx/guide/serving
+          
+          The TensorFlow Model Optimization Toolkit is a suite of tools for
+          optimizing ML models for deployment and execution.
+        path: /model_optimization
         icon:
           icon_name: chevron_right
           foreground: theme
@@ -192,14 +210,6 @@ landing_page:
           icon_name: chevron_right
           foreground: theme
           background: grey
-      - description: >
-          
-          Extra functionality for TensorFlow, maintained by SIG Addons.
-        path: https://github.com/tensorflow/addons
-        icon:
-          icon_name: chevron_right
-          foreground: theme
-          background: grey
       - description: >
           
           Dataset, streaming, and file system extensions, maintained by SIG IO.
diff --git a/site/en/guide/_toc.yaml b/site/en/guide/_toc.yaml
index 85ea358bc28..92e5d6a80c3 100644
--- a/site/en/guide/_toc.yaml
+++ b/site/en/guide/_toc.yaml
@@ -19,8 +19,28 @@ toc:
   path: /guide/basic_training_loops
 
 - heading: "Keras"
+- title: "Overview"
+  path: /guide/keras
 - include: /guide/keras/_toc.yaml
 
+- heading: "Build with Core"
+  status: new
+- title: "Overview"
+  path: /guide/core/index
+- title: "Quickstart for Core"
+  path: /guide/core/quickstart_core
+- title: "Logistic regression"
+  path: /guide/core/logistic_regression_core
+- title: "Multilayer perceptrons"
+  path: /guide/core/mlp_core
+- title: "Matrix approximation"
+  path: /guide/core/matrix_core
+- title: "Custom optimizers"
+  path: /guide/core/optimizers_core
+- title: "DTensor with Core APIs"
+  path: /guide/core/distribution
+  status: experimental
+
 - heading: "TensorFlow in depth"
 - title: "Tensor slicing"
   path: /guide/tensor_slicing
@@ -30,12 +50,17 @@ toc:
   path: /guide/ragged_tensor
 - title: "Sparse tensor"
   path: /guide/sparse_tensor
-
 - title: "Random number generation"
   path: /guide/random_numbers
 - title: "NumPy API"
   status: experimental
   path: /guide/tf_numpy
+- title: "NumPy API Type Promotion"
+  status: nightly
+  path: /guide/tf_numpy_type_promotion
+- title: "DTensor concepts"
+  path: /guide/dtensor_overview
+  status: experimental
 - title: "Thinking in TensorFlow 2"
   path: /guide/effective_tf2
 
@@ -54,11 +79,14 @@ toc:
 - title: "Analyze pipeline performance"
   path: /guide/data_performance_analysis
 
-- heading: "Save a model"
+- heading: "Import and export"
 - title: "Checkpoint"
   path: /guide/checkpoint
 - title: "SavedModel"
   path: /guide/saved_model
+- title: "Import a JAX model using JAX2TF"
+  status: new
+  path: /guide/jax2tf
 
 - heading: "Accelerators"
 - title: "Distributed training"
@@ -80,7 +108,19 @@ toc:
 - title: "Mixed precision"
   path: /guide/mixed_precision
 
+- heading: "Model Garden"
+  status: new
+- title: "Overview"
+  path: /tfmodels
+- title: "Training with Orbit"
+  path: /tfmodels/orbit
+- title: "TFModels - NLP"
+  path: /tfmodels/nlp
+  status: external
+- include: /tfmodels/vision/_toc.yaml
+
 - heading: "Estimators"
+  status: deprecated
 - title: "Estimator overview"
   path: /guide/estimator
 
diff --git a/site/en/guide/advanced_autodiff.ipynb b/site/en/guide/advanced_autodiff.ipynb
index 7da53d8ee48..e04b9db4d77 100644
--- a/site/en/guide/advanced_autodiff.ipynb
+++ b/site/en/guide/advanced_autodiff.ipynb
@@ -1101,7 +1101,7 @@
         "id": "M_x7ih5sarvG"
       },
       "source": [
-        "In this case, `batch_jacobian` still runs and returns _something_ with the expected shape, but it's contents have an unclear meaning:"
+        "In this case, `batch_jacobian` still runs and returns _something_ with the expected shape, but its contents have an unclear meaning:"
       ]
     },
     {
diff --git a/site/en/guide/autodiff.ipynb b/site/en/guide/autodiff.ipynb
index 5571079fed5..237a224569b 100644
--- a/site/en/guide/autodiff.ipynb
+++ b/site/en/guide/autodiff.ipynb
@@ -746,9 +746,9 @@
         "id": "egypBxISAHhx"
       },
       "source": [
-        "## Getting a gradient of `None`\n",
+        "## Cases where `gradient` returns `None`\n",
         "\n",
-        "When a target is not connected to a source you will get a gradient of `None`.\n"
+        "When a target is not connected to a source, `gradient` will return `None`.\n"
       ]
     },
     {
diff --git a/site/en/guide/basic_training_loops.ipynb b/site/en/guide/basic_training_loops.ipynb
index 74284442cde..a1558b1903e 100644
--- a/site/en/guide/basic_training_loops.ipynb
+++ b/site/en/guide/basic_training_loops.ipynb
@@ -233,7 +233,7 @@
         "id": "rdpN_3ssG9D5"
       },
       "source": [
-        "The initial variables are set here in a fixed way, but Keras comes with any of a number of [initalizers](https://www.tensorflow.org/api_docs/python/tf/keras/initializers) you could use, with or without the rest of Keras."
+        "The initial variables are set here in a fixed way, but Keras comes with any of a number of [initializers](https://www.tensorflow.org/api_docs/python/tf/keras/initializers) you could use, with or without the rest of Keras."
       ]
     },
     {
diff --git a/site/en/guide/basics.ipynb b/site/en/guide/basics.ipynb
index e9499d3f25a..5457f162a0e 100644
--- a/site/en/guide/basics.ipynb
+++ b/site/en/guide/basics.ipynb
@@ -197,6 +197,37 @@
         "tf.reduce_sum(x)"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TNHnIjOVLJfA"
+      },
+      "source": [
+        "Note: Typically, anywhere a TensorFlow function expects a `Tensor` as input, the function will also accept anything that can be converted to a `Tensor` using `tf.convert_to_tensor`. See below for an example."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "i_XKgjDsL4GE"
+      },
+      "outputs": [],
+      "source": [
+        "tf.convert_to_tensor([1,2,3])"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wTBt-JUqLJDJ"
+      },
+      "outputs": [],
+      "source": [
+        "tf.reduce_sum([1,2,3])"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -637,7 +668,7 @@
         "y = f(x) + tf.random.normal(shape=[201])\n",
         "\n",
         "plt.plot(x.numpy(), y.numpy(), '.', label='Data')\n",
-        "plt.plot(x, f(x),  label='Ground truth')\n",
+        "plt.plot(x, f(x), label='Ground truth')\n",
         "plt.legend();"
       ]
     },
@@ -647,7 +678,7 @@
         "id": "De5LldboSWcW"
       },
       "source": [
-        "Create a model:"
+        "Create a quadratic model with randomly initialized weights and a bias:"
       ]
     },
     {
@@ -658,21 +689,29 @@
       },
       "outputs": [],
       "source": [
-        "class Model(tf.keras.Model):\n",
-        "  def __init__(self, units):\n",
-        "    super().__init__()\n",
-        "    self.dense1 = tf.keras.layers.Dense(units=units,\n",
-        "                                        activation=tf.nn.relu,\n",
-        "                                        kernel_initializer=tf.random.normal,\n",
-        "                                        bias_initializer=tf.random.normal)\n",
-        "    self.dense2 = tf.keras.layers.Dense(1)\n",
+        "class Model(tf.Module):\n",
         "\n",
-        "  def call(self, x, training=True):\n",
-        "    # For Keras layers/models, implement `call` instead of `__call__`.\n",
-        "    x = x[:, tf.newaxis]\n",
-        "    x = self.dense1(x)\n",
-        "    x = self.dense2(x)\n",
-        "    return tf.squeeze(x, axis=1)"
+        "  def __init__(self):\n",
+        "    # Randomly generate weight and bias terms\n",
+        "    rand_init = tf.random.uniform(shape=[3], minval=0., maxval=5., seed=22)\n",
+        "    # Initialize model parameters\n",
+        "    self.w_q = tf.Variable(rand_init[0])\n",
+        "    self.w_l = tf.Variable(rand_init[1])\n",
+        "    self.b = tf.Variable(rand_init[2])\n",
+        "  \n",
+        "  @tf.function\n",
+        "  def __call__(self, x):\n",
+        "    # Quadratic Model : quadratic_weight * x^2 + linear_weight * x + bias\n",
+        "    return self.w_q * (x**2) + self.w_l * x + self.b"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "36o7VjaesScg"
+      },
+      "source": [
+        "First, observe your model's performance before training:"
       ]
     },
     {
@@ -683,7 +722,7 @@
       },
       "outputs": [],
       "source": [
-        "model = Model(64)"
+        "quad_model = Model()"
       ]
     },
     {
@@ -694,20 +733,71 @@
       },
       "outputs": [],
       "source": [
-        "plt.plot(x.numpy(), y.numpy(), '.', label='data')\n",
-        "plt.plot(x, f(x),  label='Ground truth')\n",
-        "plt.plot(x, model(x), label='Untrained predictions')\n",
-        "plt.title('Before training')\n",
-        "plt.legend();"
+        "def plot_preds(x, y, f, model, title):\n",
+        "  plt.figure()\n",
+        "  plt.plot(x, y, '.', label='Data')\n",
+        "  plt.plot(x, f(x), label='Ground truth')\n",
+        "  plt.plot(x, model(x), label='Predictions')\n",
+        "  plt.title(title)\n",
+        "  plt.legend()"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Y0JtXQat-nlk"
+      },
+      "outputs": [],
+      "source": [
+        "plot_preds(x, y, f, quad_model, 'Before training')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hLzwD0-ascGf"
+      },
+      "source": [
+        "Now, define a loss for your model:\n",
+        "\n",
+        "Given that this model is intended to predict continuous values, the mean squared error (MSE) is a good choice for the loss function. Given a vector of predictions, $\\hat{y}$, and a vector of true targets, $y$, the MSE is defined as the mean of the squared differences between the predicted values and the ground truth.\n",
+        "\n",
+        "$MSE = \\frac{1}{m}\\sum_{i=1}^{m}(\\hat{y}_i -y_i)^2$"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eCtJ1uuCseZd"
+      },
+      "outputs": [],
+      "source": [
+        "def mse_loss(y_pred, y):\n",
+        "  return tf.reduce_mean(tf.square(y_pred - y))"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "ZebWva4vTBlC"
+        "id": "7EWyDu3zot2w"
+      },
+      "source": [
+        "Write a basic training loop for the model. The loop will make use of the MSE loss function and its gradients with respect to the input in order to iteratively update the model's parameters. Using mini-batches for training provides both memory efficiency and faster convergence. The `tf.data.Dataset` API has useful functions for batching and shuffling."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8kX_-zily2Ia"
       },
+      "outputs": [],
       "source": [
-        "Write a basic training loop:"
+        "batch_size = 32\n",
+        "dataset = tf.data.Dataset.from_tensor_slices((x, y))\n",
+        "dataset = dataset.shuffle(buffer_size=x.shape[0]).batch(batch_size)"
       ]
     },
     {
@@ -718,20 +808,41 @@
       },
       "outputs": [],
       "source": [
-        "variables = model.variables\n",
-        "\n",
-        "optimizer = tf.optimizers.SGD(learning_rate=0.01)\n",
+        "# Set training parameters\n",
+        "epochs = 100\n",
+        "learning_rate = 0.01\n",
+        "losses = []\n",
         "\n",
-        "for step in range(1000):\n",
-        "  with tf.GradientTape() as tape:\n",
-        "    prediction = model(x)\n",
-        "    error = (y-prediction)**2\n",
-        "    mean_error = tf.reduce_mean(error)\n",
-        "  gradient = tape.gradient(mean_error, variables)\n",
-        "  optimizer.apply_gradients(zip(gradient, variables))\n",
+        "# Format training loop\n",
+        "for epoch in range(epochs):\n",
+        "  for x_batch, y_batch in dataset:\n",
+        "    with tf.GradientTape() as tape:\n",
+        "      batch_loss = mse_loss(quad_model(x_batch), y_batch)\n",
+        "    # Update parameters with respect to the gradient calculations\n",
+        "    grads = tape.gradient(batch_loss, quad_model.variables)\n",
+        "    for g,v in zip(grads, quad_model.variables):\n",
+        "        v.assign_sub(learning_rate*g)\n",
+        "  # Keep track of model loss per epoch\n",
+        "  loss = mse_loss(quad_model(x), y)\n",
+        "  losses.append(loss)\n",
+        "  if epoch % 10 == 0:\n",
+        "    print(f'Mean squared error for step {epoch}: {loss.numpy():0.3f}')\n",
         "\n",
-        "  if step % 100 == 0:\n",
-        "    print(f'Mean squared error: {mean_error.numpy():0.3f}')"
+        "# Plot model results\n",
+        "print(\"\\n\")\n",
+        "plt.plot(range(epochs), losses)\n",
+        "plt.xlabel(\"Epoch\")\n",
+        "plt.ylabel(\"Mean Squared Error (MSE)\")\n",
+        "plt.title('MSE loss vs training iterations');"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dW5B2TTRsvxE"
+      },
+      "source": [
+        "Now, observe your model's performance after training:"
       ]
     },
     {
@@ -742,11 +853,7 @@
       },
       "outputs": [],
       "source": [
-        "plt.plot(x.numpy(),y.numpy(), '.', label=\"data\")\n",
-        "plt.plot(x, f(x),  label='Ground truth')\n",
-        "plt.plot(x, model(x), label='Trained predictions')\n",
-        "plt.title('After training')\n",
-        "plt.legend();"
+        "plot_preds(x, y, f, quad_model, 'After training')"
       ]
     },
     {
@@ -755,7 +862,16 @@
         "id": "hbtmFJIXb6qm"
       },
       "source": [
-        "That's working, but remember that implementations of common training utilities are available in the `tf.keras` module. So consider using those before writing your own. To start with, the `Model.compile` and  `Model.fit` methods implement a training loop for you:"
+        "That's working, but remember that implementations of common training utilities are available in the `tf.keras` module. So, consider using those before writing your own. To start with, the `Model.compile` and `Model.fit` methods implement a training loop for you:"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cjx23MiztFmT"
+      },
+      "source": [
+        "Begin by creating a Sequential Model in Keras using `tf.keras.Sequential`. One of the simplest Keras layers is the dense layer, which can be instantiated with `tf.keras.layers.Dense`. The dense layer is able to learn multidimensional linear relationships of the form $\\mathrm{Y} = \\mathrm{W}\\mathrm{X} +  \\vec{b}$. In order to learn a nonlinear equation of the form, $w_1x^2 + w_2x + b$, the dense layer's input should be a data matrix with $x^2$ and $x$ as features. The lambda layer, `tf.keras.layers.Lambda`, can be used to perform this stacking transformation."
       ]
     },
     {
@@ -766,7 +882,9 @@
       },
       "outputs": [],
       "source": [
-        "new_model = Model(64)"
+        "new_model = tf.keras.Sequential([\n",
+        "    tf.keras.layers.Lambda(lambda x: tf.stack([x, x**2], axis=1)),\n",
+        "    tf.keras.layers.Dense(units=1, kernel_initializer=tf.random.normal)])"
       ]
     },
     {
@@ -779,14 +897,23 @@
       "source": [
         "new_model.compile(\n",
         "    loss=tf.keras.losses.MSE,\n",
-        "    optimizer=tf.optimizers.SGD(learning_rate=0.01))\n",
+        "    optimizer=tf.keras.optimizers.SGD(learning_rate=0.01))\n",
         "\n",
         "history = new_model.fit(x, y,\n",
         "                        epochs=100,\n",
         "                        batch_size=32,\n",
         "                        verbose=0)\n",
         "\n",
-        "model.save('./my_model')"
+        "new_model.save('./my_new_model.keras')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "u3q5d1SzvzTq"
+      },
+      "source": [
+        "Observe your Keras model's performance after training:"
       ]
     },
     {
@@ -804,6 +931,17 @@
         "plt.title('Keras training progress');"
       ]
     },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bB44a9YsvnfK"
+      },
+      "outputs": [],
+      "source": [
+        "plot_preds(x, y, f, new_model, 'After Training: Keras')"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
diff --git a/site/en/guide/core/distribution.ipynb b/site/en/guide/core/distribution.ipynb
new file mode 100644
index 00000000000..c7f13b2f4db
--- /dev/null
+++ b/site/en/guide/core/distribution.ipynb
@@ -0,0 +1,700 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FhGuhbZ6M5tl"
+      },
+      "source": [
+        "##### Copyright 2022 The TensorFlow Authors."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "AwOEIRJC6Une"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "# https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "EIdT9iu_Z4Rb"
+      },
+      "source": [
+        "# Distributed training with Core APIs and DTensor"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "bBIlTPscrIT9"
+      },
+      "source": [
+        "\n",
+        "  \n",
+        "  \n",
+        "  \n",
+        "  \n",
+        "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SjAxxRpBzVYg" + }, + "source": [ + "## Introduction\n", + "\n", + "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) and [DTensor](https://www.tensorflow.org/guide/dtensor_overview) to demonstrate a data parallel distributed training example. Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases. Refer to the [DTensor Overview](https://www.tensorflow.org/guide/dtensor_overview) guide and [Distributed Training with DTensors](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial) tutorial to learn more about DTensor.\n", + "\n", + "This example uses the same model and optimizer shown in the [multilayer perceptrons](https://www.tensorflow.org/guide/core/mlp_core) tutorial. See this tutorial first to get comfortable with writing an end-to-end machine learning workflow with the Core APIs.\n", + "\n", + "Note: DTensor is still an experimental TensorFlow API which means that its features are available for testing, and it is intended for use in test environments only." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d_OFkG0dyWCp" + }, + "source": [ + "## Overview of data parallel training with DTensor\n", + "\n", + "Before building an MLP that supports distribution, take a moment to explore the fundamentals of DTensor for data parallel training.\n", + "\n", + "DTensor allows you to run distributed training across devices to improve efficiency, reliability and scalability. DTensor distributes the program and tensors according to the sharding directives through a procedure called Single program, multiple data (SPMD) expansion. A variable of a `DTensor` aware layer is created as `dtensor.DVariable`, and the constructors of `DTensor` aware layer objects take additional `Layout` inputs in addition to the usual layer parameters.\n", + "\n", + "The main ideas for data parallel training are as follows:\n", + " - Model variables are replicated on N devices each.\n", + " - A global batch is split into N per-replica batches.\n", + " - Each per-replica batch is trained on the replica device.\n", + " - The gradient is reduced before weight up data is collectively performed on all replicas.\n", + " - Data parallel training provides nearly linear speed with respect to the number of devices" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup\n", + "\n", + "DTensor is part of TensorFlow 2.9.0 release." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "latuqlI_Yvoo" + }, + "outputs": [], + "source": [ + "#!pip install --quiet --upgrade --pre tensorflow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rRo8oNqZ-Rj" + }, + "outputs": [], + "source": [ + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "# Preset Matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9xQKvCJ85kCQ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "from tensorflow.experimental import dtensor\n", + "print(tf.__version__)\n", + "# Set random seed for reproducible results \n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vDH9-sy4sfPf" + }, + "source": [ + "Configure 8 virtual CPUs for this experiment. DTensor can also be used with GPU or TPU devices. Given that this notebook uses virtual devices, the speedup gained from distributed training is not noticeable. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H2iM-6J4s2D6" + }, + "outputs": [], + "source": [ + "def configure_virtual_cpus(ncpu):\n", + " phy_devices = tf.config.list_physical_devices('CPU')\n", + " tf.config.set_logical_device_configuration(phy_devices[0], [\n", + " tf.config.LogicalDeviceConfiguration(),\n", + " ] * ncpu)\n", + "\n", + "configure_virtual_cpus(8)\n", + "\n", + "DEVICES = [f'CPU:{i}' for i in range(8)]\n", + "devices = tf.config.list_logical_devices('CPU')\n", + "device_names = [d.name for d in devices]\n", + "device_names" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_72b0LCNbjx" + }, + "source": [ + "## The MNIST Dataset\n", + "\n", + "The dataset is available from [TensorFlow Datasets](https://www.tensorflow.org/datasets/catalog/mnist). Split the data into training and testing sets. Only use 5000 examples for training and testing to save time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8h4fV_JCfPIX" + }, + "outputs": [], + "source": [ + "train_data, test_data = tfds.load(\"mnist\", split=['train[:5000]', 'test[:5000]'], batch_size=128, as_supervised=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "twkJ35YB6tSi" + }, + "source": [ + "### Preprocessing the data\n", + "\n", + "Preprocess the data by reshaping it to be 2-dimensional and by rescaling it to fit into the unit interval, [0,1]." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6Cmjhg0xCqbz" + }, + "outputs": [], + "source": [ + "def preprocess(x, y):\n", + " # Reshaping the data\n", + " x = tf.reshape(x, shape=[-1, 784])\n", + " # Rescaling the data\n", + " x = x/255\n", + " return x, y\n", + "\n", + "train_data, test_data = train_data.map(preprocess), test_data.map(preprocess)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6o3CrycBXA2s" + }, + "source": [ + "## Build the MLP \n", + "\n", + "Build an MLP model with DTensor aware layers." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OHW6Yvg2yS6H" + }, + "source": [ + "### The dense layer\n", + "\n", + "Start by creating a dense layer module that supports DTensor. The `dtensor.call_with_layout` function can be used to call a function that takes in a DTensor input and produces a DTensor output. This is useful for initializing a DTensor variable, `dtensor.DVariable`, with a TensorFlow supported function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IM0yJos25FG5" + }, + "outputs": [], + "source": [ + "class DenseLayer(tf.Module):\n", + "\n", + " def __init__(self, in_dim, out_dim, weight_layout, activation=tf.identity):\n", + " super().__init__()\n", + " # Initialize dimensions and the activation function\n", + " self.in_dim, self.out_dim = in_dim, out_dim\n", + " self.activation = activation\n", + "\n", + " # Initialize the DTensor weights using the Xavier scheme\n", + " uniform_initializer = tf.function(tf.random.stateless_uniform)\n", + " xavier_lim = tf.sqrt(6.)/tf.sqrt(tf.cast(self.in_dim + self.out_dim, tf.float32))\n", + " self.w = dtensor.DVariable(\n", + " dtensor.call_with_layout(\n", + " uniform_initializer, weight_layout,\n", + " shape=(self.in_dim, self.out_dim), seed=(22, 23),\n", + " minval=-xavier_lim, maxval=xavier_lim))\n", + " \n", + " # Initialize the bias with the zeros\n", + " bias_layout = weight_layout.delete([0])\n", + " self.b = dtensor.DVariable(\n", + " dtensor.call_with_layout(tf.zeros, bias_layout, shape=[out_dim]))\n", + "\n", + " def __call__(self, x):\n", + " # Compute the forward pass\n", + " z = tf.add(tf.matmul(x, self.w), self.b)\n", + " return self.activation(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X-7MzpjgyHg6" + }, + "source": [ + "### The MLP sequential model\n", + "\n", + "Now create an MLP module that executes the dense layers sequentially." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6XisRWiCyHAb" + }, + "outputs": [], + "source": [ + "class MLP(tf.Module):\n", + "\n", + " def __init__(self, layers):\n", + " self.layers = layers\n", + " \n", + " def __call__(self, x, preds=False): \n", + " # Execute the model's layers sequentially\n", + " for layer in self.layers:\n", + " x = layer(x)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r5HZJ0kv-V3v" + }, + "source": [ + "Performing \"data-parallel\" training with DTensor is equivalent to `tf.distribute.MirroredStrategy`. To do this each device will run the same model on a shard of the data batch. So you'll need the following:\n", + "\n", + "* A `dtensor.Mesh` with a single `\"batch\"` dimension\n", + "* A `dtensor.Layout` for all the weights that replicates them across the mesh (using `dtensor.UNSHARDED` for each axis)\n", + "* A `dtensor.Layout` for the data that splits the batch dimension across the mesh\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "Create a DTensor mesh that consists of a single batch dimension, where each device becomes a replica that receives a shard from the global batch. Use this mesh to instantiate an MLP mode with the following architecture:\n", + "\n", + "Forward Pass: ReLU(784 x 700) x ReLU(700 x 500) x Softmax(500 x 10)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VmlACuki3oPi" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"batch\", 8)], devices=DEVICES)\n", + "weight_layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)\n", + "\n", + "input_size = 784\n", + "hidden_layer_1_size = 700\n", + "hidden_layer_2_size = 500\n", + "hidden_layer_2_size = 10\n", + "\n", + "mlp_model = MLP([\n", + " DenseLayer(in_dim=input_size, out_dim=hidden_layer_1_size, \n", + " weight_layout=weight_layout,\n", + " activation=tf.nn.relu),\n", + " DenseLayer(in_dim=hidden_layer_1_size , out_dim=hidden_layer_2_size,\n", + " weight_layout=weight_layout,\n", + " activation=tf.nn.relu),\n", + " DenseLayer(in_dim=hidden_layer_2_size, out_dim=hidden_layer_2_size, \n", + " weight_layout=weight_layout)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tyBATDoRmDkg" + }, + "source": [ + "### Training metrics\n", + "\n", + "Use the cross-entropy loss function and accuracy metric for training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rskOYA7FVCwg" + }, + "outputs": [], + "source": [ + "def cross_entropy_loss(y_pred, y):\n", + " # Compute cross entropy loss with a sparse operation\n", + " sparse_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=y_pred)\n", + " return tf.reduce_mean(sparse_ce)\n", + "\n", + "def accuracy(y_pred, y):\n", + " # Compute accuracy after extracting class predictions\n", + " class_preds = tf.argmax(y_pred, axis=1)\n", + " is_equal = tf.equal(y, class_preds)\n", + " return tf.reduce_mean(tf.cast(is_equal, tf.float32))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JSiNRhTOnKZr" + }, + "source": [ + "### Optimizer\n", + "\n", + "Using an optimizer can result in significantly faster convergence compared to standard gradient descent. The Adam optimizer is implemented below and has been configured to be compatible with DTensor. In order to use Keras optimizers with DTensor, refer to the experimental`tf.keras.dtensor.experimental.optimizers` module." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-9kIAI_lfXDS" + }, + "outputs": [], + "source": [ + "class Adam(tf.Module):\n", + "\n", + " def __init__(self, model_vars, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, ep=1e-7):\n", + " # Initialize optimizer parameters and variable slots\n", + " self.model_vars = model_vars\n", + " self.beta_1 = beta_1\n", + " self.beta_2 = beta_2\n", + " self.learning_rate = learning_rate\n", + " self.ep = ep\n", + " self.t = 1.\n", + " self.v_dvar, self.s_dvar = [], []\n", + " # Initialize optimizer variable slots\n", + " for var in model_vars:\n", + " v = dtensor.DVariable(dtensor.call_with_layout(tf.zeros, var.layout, shape=var.shape))\n", + " s = dtensor.DVariable(dtensor.call_with_layout(tf.zeros, var.layout, shape=var.shape))\n", + " self.v_dvar.append(v)\n", + " self.s_dvar.append(s)\n", + "\n", + " def apply_gradients(self, grads):\n", + " # Update the model variables given their gradients\n", + " for i, (d_var, var) in enumerate(zip(grads, self.model_vars)):\n", + " self.v_dvar[i].assign(self.beta_1*self.v_dvar[i] + (1-self.beta_1)*d_var)\n", + " self.s_dvar[i].assign(self.beta_2*self.s_dvar[i] + (1-self.beta_2)*tf.square(d_var))\n", + " v_dvar_bc = self.v_dvar[i]/(1-(self.beta_1**self.t))\n", + " s_dvar_bc = self.s_dvar[i]/(1-(self.beta_2**self.t))\n", + " var.assign_sub(self.learning_rate*(v_dvar_bc/(tf.sqrt(s_dvar_bc) + self.ep)))\n", + " self.t += 1.\n", + " return " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w54b7GtLfn1j" + }, + "source": [ + "### Data packing\n", + "\n", + "Start by writing a helper function for transferring data to the device. This function should use `dtensor.pack` to send (and only send) the shard of the global batch that is intended for a replica to the device backing the replica. For simplicity, assume a single-client application.\n", + "\n", + "Next, write a function that uses this helper function to pack the training data batches into DTensors sharded along the batch (first) axis. This ensures that DTensor evenly distributes the training data to the 'batch' mesh dimension. Note that in DTensor, the batch size always refers to the global batch size; therefore, the batch size should be chosen such that it can be divided evenly by the size of the batch mesh dimension. Additional DTensor APIs to simplify `tf.data` integration are planned, so please stay tuned." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3Rx82djZ6ITm" + }, + "outputs": [], + "source": [ + "def repack_local_tensor(x, layout):\n", + " # Repacks a local Tensor-like to a DTensor with layout\n", + " # This function assumes a single-client application\n", + " x = tf.convert_to_tensor(x)\n", + " sharded_dims = []\n", + "\n", + " # For every sharded dimension, use tf.split to split the along the dimension.\n", + " # The result is a nested list of split-tensors in queue[0].\n", + " queue = [x]\n", + " for axis, dim in enumerate(layout.sharding_specs):\n", + " if dim == dtensor.UNSHARDED:\n", + " continue\n", + " num_splits = layout.shape[axis]\n", + " queue = tf.nest.map_structure(lambda x: tf.split(x, num_splits, axis=axis), queue)\n", + " sharded_dims.append(dim)\n", + "\n", + " # Now you can build the list of component tensors by looking up the location in\n", + " # the nested list of split-tensors created in queue[0].\n", + " components = []\n", + " for locations in layout.mesh.local_device_locations():\n", + " t = queue[0]\n", + " for dim in sharded_dims:\n", + " split_index = locations[dim] # Only valid on single-client mesh.\n", + " t = t[split_index]\n", + " components.append(t)\n", + "\n", + " return dtensor.pack(components, layout)\n", + "\n", + "def repack_batch(x, y, mesh):\n", + " # Pack training data batches into DTensors along the batch axis\n", + " x = repack_local_tensor(x, layout=dtensor.Layout(['batch', dtensor.UNSHARDED], mesh))\n", + " y = repack_local_tensor(y, layout=dtensor.Layout(['batch'], mesh))\n", + " return x, y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "osEK3rqpYfKd" + }, + "source": [ + "### Training\n", + "\n", + "Write a traceable function that executes a single training step given a batch of data. This function does not require any special DTensor annotations. Also write a function that executes a test step and returns the appropriate performance metrics." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZICEsDGuSbDD" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def train_step(model, x_batch, y_batch, loss, metric, optimizer):\n", + " # Execute a single training step\n", + " with tf.GradientTape() as tape:\n", + " y_pred = model(x_batch)\n", + " batch_loss = loss(y_pred, y_batch)\n", + " # Compute gradients and update the model's parameters\n", + " grads = tape.gradient(batch_loss, model.trainable_variables)\n", + " optimizer.apply_gradients(grads)\n", + " # Return batch loss and accuracy\n", + " batch_acc = metric(y_pred, y_batch)\n", + " return batch_loss, batch_acc\n", + "\n", + "@tf.function\n", + "def test_step(model, x_batch, y_batch, loss, metric):\n", + " # Execute a single testing step\n", + " y_pred = model(x_batch)\n", + " batch_loss = loss(y_pred, y_batch)\n", + " batch_acc = metric(y_pred, y_batch)\n", + " return batch_loss, batch_acc" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RjIDVTwwX-Mr" + }, + "source": [ + "Now, train the MLP model for 3 epochs with a batch size of 128." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oC85kuZgmh3q" + }, + "outputs": [], + "source": [ + "# Initialize the training loop parameters and structures\n", + "epochs = 3\n", + "batch_size = 128\n", + "train_losses, test_losses = [], []\n", + "train_accs, test_accs = [], []\n", + "optimizer = Adam(mlp_model.trainable_variables)\n", + "\n", + "# Format training loop\n", + "for epoch in range(epochs):\n", + " batch_losses_train, batch_accs_train = [], []\n", + " batch_losses_test, batch_accs_test = [], []\n", + "\n", + " # Iterate through training data\n", + " for x_batch, y_batch in train_data:\n", + " x_batch, y_batch = repack_batch(x_batch, y_batch, mesh)\n", + " batch_loss, batch_acc = train_step(mlp_model, x_batch, y_batch, cross_entropy_loss, accuracy, optimizer)\n", + " # Keep track of batch-level training performance\n", + " batch_losses_train.append(batch_loss)\n", + " batch_accs_train.append(batch_acc)\n", + "\n", + " # Iterate through testing data\n", + " for x_batch, y_batch in test_data:\n", + " x_batch, y_batch = repack_batch(x_batch, y_batch, mesh)\n", + " batch_loss, batch_acc = test_step(mlp_model, x_batch, y_batch, cross_entropy_loss, accuracy)\n", + " # Keep track of batch-level testing\n", + " batch_losses_test.append(batch_loss)\n", + " batch_accs_test.append(batch_acc)\n", + "\n", + "# Keep track of epoch-level model performance\n", + " train_loss, train_acc = tf.reduce_mean(batch_losses_train), tf.reduce_mean(batch_accs_train)\n", + " test_loss, test_acc = tf.reduce_mean(batch_losses_test), tf.reduce_mean(batch_accs_test)\n", + " train_losses.append(train_loss)\n", + " train_accs.append(train_acc)\n", + " test_losses.append(test_loss)\n", + " test_accs.append(test_acc)\n", + " print(f\"Epoch: {epoch}\")\n", + " print(f\"Training loss: {train_loss.numpy():.3f}, Training accuracy: {train_acc.numpy():.3f}\")\n", + " print(f\"Testing loss: {test_loss.numpy():.3f}, Testing accuracy: {test_acc.numpy():.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j_RVmt43G12R" + }, + "source": [ + "### Performance evaluation\n", + "\n", + "Start by writing a plotting function to visualize the model's loss and accuracy during training. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VXTCYVtNDjAM" + }, + "outputs": [], + "source": [ + "def plot_metrics(train_metric, test_metric, metric_type):\n", + " # Visualize metrics vs training Epochs\n", + " plt.figure()\n", + " plt.plot(range(len(train_metric)), train_metric, label = f\"Training {metric_type}\")\n", + " plt.plot(range(len(test_metric)), test_metric, label = f\"Testing {metric_type}\")\n", + " plt.xlabel(\"Epochs\")\n", + " plt.ylabel(metric_type)\n", + " plt.legend()\n", + " plt.title(f\"{metric_type} vs Training Epochs\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "407qok7q2JIO" + }, + "outputs": [], + "source": [ + "plot_metrics(train_losses, test_losses, \"Cross entropy loss\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8H_TgxV92NfX" + }, + "outputs": [], + "source": [ + "plot_metrics(train_accs, test_accs, \"Accuracy\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DHO_u-3w4YRF" + }, + "source": [ + "## Saving your model\n", + "\n", + "The integration of `tf.saved_model` and DTensor is still under development. As of TensorFlow 2.9.0, tf.saved_model only accepts DTensor models with fully replicated variables. As a workaround, you can convert a DTensor model to a fully replicated one by reloading a checkpoint. However, after a model is saved, all DTensor annotations are lost and the saved signatures can only be used with regular Tensors. This tutorial will be updated to showcase the integration once it is solidified.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VFLfEH4ManbW" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook provided an overview of distributed training with DTensor and the TensorFlow Core APIs. Here are a few more tips that may help:\n", + "\n", + "- The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be used to build highly-configurable machine learning workflows with support for distributed training.\n", + "- The [DTensor concepts](https://www.tensorflow.org/guide/dtensor_overview) guide and [Distributed training with DTensors](https://www.tensorflow.org/tutorials/distribute/dtensor_ml_tutorial) tutorial contain the most up-to-date information about DTensor and its integrations.\n", + "\n", + "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want to learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "FhGuhbZ6M5tl" + ], + "name": "distribution.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/core/index.md b/site/en/guide/core/index.md new file mode 100644 index 00000000000..7f44e11b018 --- /dev/null +++ b/site/en/guide/core/index.md @@ -0,0 +1,112 @@ +# TensorFlow Core APIs overview + +The TensorFlow Core APIs provide a set of comprehensive, composable, and +extensible low-level APIs for high-performance (distributed and accelerated) +computation, primarily aimed at building machine learning (ML) models as well as +authoring ML workflow tools and frameworks within the TensorFlow platform. These +APIs provide a foundation for creating highly configurable models with +fine-grained control and new frameworks from the ground up. + +The Core APIs can be used as an alternative to high-level machine learning APIs +like Keras. These high-level APIs are best suited for general machine learning +needs. They offer a variety of modules that abstract away the complexities of ML +while also offering functionalities for customization through subclassing. If +you are looking for an overview of TensorFlow using Keras, see the Quickstarts +and Keras sections in the [tutorials](https://www.tensorflow.org/tutorials). + +## Who should use the Core APIs + +The TensorFlow Core low-level APIs are designed with the following ML developers +in mind: + +* Researchers building complex models with high levels of configurability +* Developers interested in using TensorFlow as a high-performance scientific + computing platform +* Framework authors building tools on top of the TensorFlow platform +* High-level API users interested in: + * Adding additional functionalities to their machine learning workflows + such as custom layers, losses, models, and optimizers + * Learning more about the inner workings of their models + +## Core API applications + +The TensorFlow Core APIs provide access to low level functionality within the +TensorFlow ecosystem. This API provides more flexibility and control for +building ML models, applications, and tools, compared to high-level APIs, such +as Keras. + +### Build models and workflows + +The Core APIs are most commonly used to build highly customizable and optimized +machine learning models and workflows. Here are some of the ways that the +TensorFlow Core APIs can improve your machine learning models and workflow +development: + +TensorFlow + +* Building non-traditional models or layers that do not fully fit the + structures supported by high-level APIs +* Building custom layers, losses, models, and optimizers within Keras +* Implementing new optimization techniques to expedite convergence during + training +* Creating custom metrics for performance evaluation +* Designing highly-configurable training loops with support for features like + batching, cross-validation, and distribution strategies + +### Build frameworks and tools + +The TensorFlow Core APIs can also serve as the building blocks for new +high-level frameworks. Here are some examples of tools and frameworks that are +created with the low-level APIs: +TensorFlow + +* [Keras](https://keras.io): deep learning for humans +* [TensorFlow Model Optimization Toolkit](https://www.tensorflow.org/model_optimization): + a suite of tools to optimize ML models for deployment and execution +* [TensorFlow Graphics](https://www.tensorflow.org/graphics): a library for + making useful graphics functions widely accessible + +### Build for scientific computing + +The TensorFlow Core APIs can also be applied outside the realm of machine +learning. Here are a few general-purpose use cases of TensorFlow for scientific +computing: +TensorFlow + +* Physics simulations for solid mechanics and + [fluid dynamics](https://arxiv.org/abs/2108.11076) problems +* Graphics rendering applications like + [ray tracing](https://github.com/BachiLi/redner) +* Solving + [constrained optimization problems](https://github.com/google-research/tensorflow_constrained_optimization/blob/master/README.md) + +## Core API components + +Here are some of the fundamental components that comprise TensorFlow Core’s low- +level APIs. Note that this is not an all-encompassing list: + +TensorFlow + +* Data structures : `tf.Tensor`, `tf.Variable`, `tf.TensorArray` +* Primitive APIs: `tf.shape`, + [slicing](https://www.tensorflow.org/guide/tensor_slicing), `tf.concat`, + `tf.bitwise` +* Numerical: `tf.math`, `tf.linalg`, `tf.random` +* Functional components: `tf.function`, `tf.GradientTape` +* Distribution: [DTensor](https://www.tensorflow.org/guide/dtensor_overview) +* Export: `tf.saved_model` + +## Next steps + +The *Build with Core* documentation provides tutorials of basic machine learning +concepts from scratch. The tutorials in this section help you get comfortable +with writing low-level code with Core APIs that you can then apply to more +complex use cases of your own. + +Note: You should not use the Core APIs to simply re-implement high-level APIs, +and it is possible to use high-level APIs, such as Keras, with the Core APIs. + +To get started using and learning more about the Core APIs, check out the +[Quickstart for TensorFlow Core](https://www.tensorflow.org/guide/core/quickstart_core). diff --git a/site/en/guide/core/logistic_regression_core.ipynb b/site/en/guide/core/logistic_regression_core.ipynb new file mode 100644 index 00000000000..5a9af324ad5 --- /dev/null +++ b/site/en/guide/core/logistic_regression_core.ipynb @@ -0,0 +1,935 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Logistic regression for binary classification with Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DauaqJ7WhIhO" + }, + "source": [ + "This guide demonstrates how to use the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to perform [binary classification](https://developers.google.com/machine-learning/glossary#binary_classification) with [logistic regression](https://developers.google.com/machine-learning/crash-course/logistic-regression/). It uses the [Wisconsin Breast Cancer Dataset](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)) for tumor classification.\n", + "\n", + "[Logistic regression](https://developers.google.com/machine-learning/crash-course/logistic-regression/) is one of the most popular algorithms for binary classification. Given a set of examples with features, the goal of logistic regression is to output values between 0 and 1, which can be interpreted as the probabilities of each example belonging to a particular class. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup\n", + "\n", + "This tutorial uses [pandas](https://pandas.pydata.org) for reading a CSV file into a [DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html), [seaborn](https://seaborn.pydata.org) for plotting a pairwise relationship in a dataset, [Scikit-learn](https://scikit-learn.org/) for computing a confusion matrix, and [matplotlib](https://matplotlib.org/) for creating visualizations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5lZoUK6AVTos" + }, + "outputs": [], + "source": [ + "!pip install -q seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rRo8oNqZ-Rj" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import pandas as pd\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "import seaborn as sns\n", + "import sklearn.metrics as sk_metrics\n", + "import tempfile\n", + "import os\n", + "\n", + "# Preset matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]\n", + "\n", + "print(tf.__version__)\n", + "# To make the results reproducible, set the random seed value.\n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gFh9ne3FZ-On" + }, + "source": [ + "## Load the data\n", + "\n", + "Next, load the [Wisconsin Breast Cancer Dataset](https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(original)) from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/). This dataset contains various features such as a tumor's radius, texture, and concavity." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CiX2FI4gZtTt" + }, + "outputs": [], + "source": [ + "url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data'\n", + "\n", + "features = ['radius', 'texture', 'perimeter', 'area', 'smoothness', 'compactness',\n", + " 'concavity', 'concave_poinits', 'symmetry', 'fractal_dimension']\n", + "column_names = ['id', 'diagnosis']\n", + "\n", + "for attr in ['mean', 'ste', 'largest']:\n", + " for feature in features:\n", + " column_names.append(feature + \"_\" + attr)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A3VR1aTP92nV" + }, + "source": [ + "Read the dataset into a pandas [DataFrame]() using [`pandas.read_csv`](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uvR2Bzb691lJ" + }, + "outputs": [], + "source": [ + "dataset = pd.read_csv(url, names=column_names)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YB9eq6Zq-IZ4" + }, + "outputs": [], + "source": [ + "dataset.info()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0_Z1V6Dg-La_" + }, + "source": [ + "Display the first five rows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hWxktwbv-KPp" + }, + "outputs": [], + "source": [ + "dataset.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s4-Wn2jzVC1W" + }, + "source": [ + "Split the dataset into training and test sets using [`pandas.DataFrame.sample`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html), [`pandas.DataFrame.drop`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.drop.html) and [`pandas.DataFrame.iloc`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.iloc.html). Make sure to split the features from the target labels. The test set is used to evaluate your model's generalizability to unseen data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "m2O60B-IVG9Q" + }, + "outputs": [], + "source": [ + "train_dataset = dataset.sample(frac=0.75, random_state=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i06vHFv_QB24" + }, + "outputs": [], + "source": [ + "len(train_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "19JaochhaQ3m" + }, + "outputs": [], + "source": [ + "test_dataset = dataset.drop(train_dataset.index)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LmHRcbAfaSag" + }, + "outputs": [], + "source": [ + "len(test_dataset)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "w6JxBhBc_wwO" + }, + "outputs": [], + "source": [ + "# The `id` column can be dropped since each row is unique\n", + "x_train, y_train = train_dataset.iloc[:, 2:], train_dataset.iloc[:, 1]\n", + "x_test, y_test = test_dataset.iloc[:, 2:], test_dataset.iloc[:, 1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3MWuJTKEDM-f" + }, + "source": [ + "## Preprocess the data\n", + "\n", + "This dataset contains the mean, standard error, and largest values for each of the 10 tumor measurements collected per example. The `\"diagnosis\"` target column is a categorical variable with `'M'` indicating a malignant tumor and `'B'` indicating a benign tumor diagnosis. This column needs to be converted into a numerical binary format for model training.\n", + "\n", + "The [`pandas.Series.map`](https://pandas.pydata.org/docs/reference/api/pandas.Series.map.html) function is useful for mapping binary values to the categories.\n", + "\n", + "The dataset should also be converted to a tensor with the `tf.convert_to_tensor` function after the preprocessing is complete." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JEJHhN65a2VV" + }, + "outputs": [], + "source": [ + "y_train, y_test = y_train.map({'B': 0, 'M': 1}), y_test.map({'B': 0, 'M': 1})\n", + "x_train, y_train = tf.convert_to_tensor(x_train, dtype=tf.float32), tf.convert_to_tensor(y_train, dtype=tf.float32)\n", + "x_test, y_test = tf.convert_to_tensor(x_test, dtype=tf.float32), tf.convert_to_tensor(y_test, dtype=tf.float32)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J4ubs136WLNp" + }, + "source": [ + "Use [`seaborn.pairplot`](https://seaborn.pydata.org/generated/seaborn.pairplot.html) to review the joint distribution of a few pairs of mean-based features from the training set and observe how they relate to the target:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oRKO_x8gWKv-" + }, + "outputs": [], + "source": [ + "sns.pairplot(train_dataset.iloc[:, 1:6], hue = 'diagnosis', diag_kind='kde');" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5YOG5iKYKW_3" + }, + "source": [ + "This pairplot demonstrates that certain features such as radius, perimeter and area are highly correlated. This is expected since the tumor radius is directly involved in the computation of both perimeter and area. Additionally, note that malignant diagnoses seem to be more right-skewed for many of the features.\n", + "\n", + "Make sure to also check the overall statistics. Note how each feature covers a vastly different range of values." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yi2FzC3T21jR" + }, + "outputs": [], + "source": [ + "train_dataset.describe().transpose()[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_8pDCIFjMla8" + }, + "source": [ + "Given the inconsistent ranges, it is beneficial to standardize the data such that each feature has a zero mean and unit variance. This process is called [normalization](https://developers.google.com/machine-learning/glossary#normalization)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FrzKNFNjLQDl" + }, + "outputs": [], + "source": [ + "class Normalize(tf.Module):\n", + " def __init__(self, x):\n", + " # Initialize the mean and standard deviation for normalization\n", + " self.mean = tf.Variable(tf.math.reduce_mean(x, axis=0))\n", + " self.std = tf.Variable(tf.math.reduce_std(x, axis=0))\n", + "\n", + " def norm(self, x):\n", + " # Normalize the input\n", + " return (x - self.mean)/self.std\n", + "\n", + " def unnorm(self, x):\n", + " # Unnormalize the input\n", + " return (x * self.std) + self.mean\n", + "\n", + "norm_x = Normalize(x_train)\n", + "x_train_norm, x_test_norm = norm_x.norm(x_train), norm_x.norm(x_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6o3CrycBXA2s" + }, + "source": [ + "## Logistic regression\n", + "\n", + "Before building a logistic regression model, it is crucial to understand the method's differences compared to traditional linear regression.\n", + "\n", + "### Logistic regression fundamentals\n", + "\n", + "Linear regression returns a linear combination of its inputs; this output is unbounded. The output of a [logistic regression](https://developers.google.com/machine-learning/glossary#logistic_regression) is in the `(0, 1)` range. For each example, it represents the probability that the example belongs to the _positive_ class.\n", + "\n", + "Logistic regression maps the continuous outputs of traditional linear regression, `(-∞, ∞)`, to probabilities, `(0, 1)`. This transformation is also symmetric so that flipping the sign of the linear output results in the inverse of the original probability.\n", + "\n", + "Let $Y$ denote the probability of being in class `1` (the tumor is malignant). The desired mapping can be achieved by interpreting the linear regression output as the [log odds](https://developers.google.com/machine-learning/glossary#log-odds) ratio of being in class `1` as opposed to class `0`:\n", + "\n", + "$$\\ln(\\frac{Y}{1-Y}) = wX + b$$\n", + "\n", + "By setting $wX + b = z$, this equation can then be solved for $Y$:\n", + "\n", + "$$Y = \\frac{e^{z}}{1 + e^{z}} = \\frac{1}{1 + e^{-z}}$$\n", + "\n", + "The expression $\\frac{1}{1 + e^{-z}}$ is known as the [sigmoid function](https://developers.google.com/machine-learning/glossary#sigmoid_function) $\\sigma(z)$. Hence, the equation for logistic regression can be written as $Y = \\sigma(wX + b)$.\n", + "\n", + "The dataset in this tutorial deals with a high-dimensional feature matrix. Therefore, the above equation must be rewritten in a matrix vector form as follows:\n", + "\n", + "$${\\mathrm{Y}} = \\sigma({\\mathrm{X}}w + b)$$\n", + "\n", + "where:\n", + "\n", + "* $\\underset{m\\times 1}{\\mathrm{Y}}$: a target vector\n", + "* $\\underset{m\\times n}{\\mathrm{X}}$: a feature matrix\n", + "* $\\underset{n\\times 1}w$: a weight vector\n", + "* $b$: a bias\n", + "* $\\sigma$: a sigmoid function applied to each element of the output vector\n", + "\n", + "Start by visualizing the sigmoid function, which transforms the linear output, `(-∞, ∞)`, to fall between `0` and `1`. The sigmoid function is available in `tf.math.sigmoid`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ThHaV_RmucZl" + }, + "outputs": [], + "source": [ + "x = tf.linspace(-10, 10, 500)\n", + "x = tf.cast(x, tf.float32)\n", + "f = lambda x : (1/20)*x + 0.6\n", + "plt.plot(x, tf.math.sigmoid(x))\n", + "plt.ylim((-0.1,1.1))\n", + "plt.title(\"Sigmoid function\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VMXEhrZuKECV" + }, + "source": [ + "### The log loss function\n", + "\n", + "The [log loss](https://developers.google.com/machine-learning/glossary#Log_Loss), or binary cross-entropy loss, is the ideal loss function for a binary classification problem with logistic regression. For each example, the log loss quantifies the similarity between a predicted probability and the example's true value. It is determined by the following equation:\n", + "\n", + "$$L = -\\frac{1}{m}\\sum_{i=1}^{m}y_i\\cdot\\log(\\hat{y}_i) + (1- y_i)\\cdot\\log(1 - \\hat{y}_i)$$\n", + "\n", + "where:\n", + "\n", + "* $\\hat{y}$: a vector of predicted probabilities\n", + "* $y$: a vector of true targets\n", + "\n", + "You can use the `tf.nn.sigmoid_cross_entropy_with_logits` function to compute the log loss. This function automatically applies the sigmoid activation to the regression output:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JVBInnSqS36W" + }, + "outputs": [], + "source": [ + "def log_loss(y_pred, y):\n", + " # Compute the log loss function\n", + " ce = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_pred)\n", + " return tf.reduce_mean(ce)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q_mutLj0KNUb" + }, + "source": [ + "### The gradient descent update rule\n", + "\n", + "The TensorFlow Core APIs support automatic differentiation with `tf.GradientTape`. If you are curious about the mathematics behind the logistic regression [gradient updates](https://developers.google.com/machine-learning/glossary#gradient_descent), here is a short explanation:\n", + "\n", + "In the above equation for the log loss, recall that each $\\hat{y}_i$ can be rewritten in terms of the inputs as $\\sigma({\\mathrm{X_i}}w + b)$.\n", + "\n", + "The goal is to find a $w^*$ and $b^*$ that minimize the log loss:\n", + "\n", + "$$L = -\\frac{1}{m}\\sum_{i=1}^{m}y_i\\cdot\\log(\\sigma({\\mathrm{X_i}}w + b)) + (1- y_i)\\cdot\\log(1 - \\sigma({\\mathrm{X_i}}w + b))$$\n", + "\n", + "By taking the gradient $L$ with respect to $w$, you get the following:\n", + "\n", + "$$\\frac{\\partial L}{\\partial w} = \\frac{1}{m}(\\sigma({\\mathrm{X}}w + b) - y)X$$\n", + "\n", + "By taking the gradient $L$ with respect to $b$, you get the following:\n", + "\n", + "$$\\frac{\\partial L}{\\partial b} = \\frac{1}{m}\\sum_{i=1}^{m}\\sigma({\\mathrm{X_i}}w + b) - y_i$$" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uTCndUecKZho" + }, + "source": [ + "Now, build the logistic regression model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c0sXM7qLlKfZ" + }, + "outputs": [], + "source": [ + "class LogisticRegression(tf.Module):\n", + "\n", + " def __init__(self):\n", + " self.built = False\n", + " \n", + " def __call__(self, x, train=True):\n", + " # Initialize the model parameters on the first call\n", + " if not self.built:\n", + " # Randomly generate the weights and the bias term\n", + " rand_w = tf.random.uniform(shape=[x.shape[-1], 1], seed=22)\n", + " rand_b = tf.random.uniform(shape=[], seed=22)\n", + " self.w = tf.Variable(rand_w)\n", + " self.b = tf.Variable(rand_b)\n", + " self.built = True\n", + " # Compute the model output\n", + " z = tf.add(tf.matmul(x, self.w), self.b)\n", + " z = tf.squeeze(z, axis=1)\n", + " if train:\n", + " return z\n", + " return tf.sigmoid(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eObQu9fDnXGL" + }, + "source": [ + "To validate, make sure the untrained model outputs values in the range of `(0, 1)` for a small subset of the training data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5bIovC0Z4QHJ" + }, + "outputs": [], + "source": [ + "log_reg = LogisticRegression()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QJ2ievISyf0p" + }, + "outputs": [], + "source": [ + "y_pred = log_reg(x_train_norm[:5], train=False)\n", + "y_pred.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PribnwDHUksC" + }, + "source": [ + "Next, write an accuracy function to calculate the proportion of correct classifications during training. In order to retrieve the classifications from the predicted probabilities, set a threshold for which all probabilities higher than the threshold belong to class `1`. This is a configurable hyperparameter that can be set to `0.5` as a default." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ssnVcKg7oMe6" + }, + "outputs": [], + "source": [ + "def predict_class(y_pred, thresh=0.5):\n", + " # Return a tensor with `1` if `y_pred` > `0.5`, and `0` otherwise\n", + " return tf.cast(y_pred > thresh, tf.float32)\n", + "\n", + "def accuracy(y_pred, y):\n", + " # Return the proportion of matches between `y_pred` and `y`\n", + " y_pred = tf.math.sigmoid(y_pred)\n", + " y_pred_class = predict_class(y_pred)\n", + " check_equal = tf.cast(y_pred_class == y,tf.float32)\n", + " acc_val = tf.reduce_mean(check_equal)\n", + " return acc_val" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J_0KHQ25_2dF" + }, + "source": [ + "### Train the model\n", + "\n", + "Using mini-batches for training provides both memory efficiency and faster convergence. The `tf.data.Dataset` API has useful functions for batching and shuffling. The API enables you to build complex input pipelines from simple, reusable pieces. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vJD7-4U0etqa" + }, + "outputs": [], + "source": [ + "batch_size = 64\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train_norm, y_train))\n", + "train_dataset = train_dataset.shuffle(buffer_size=x_train.shape[0]).batch(batch_size)\n", + "test_dataset = tf.data.Dataset.from_tensor_slices((x_test_norm, y_test))\n", + "test_dataset = test_dataset.shuffle(buffer_size=x_test.shape[0]).batch(batch_size)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sLiWZZPBSDip" + }, + "source": [ + "Now write a training loop for the logistic regression model. The loop utilizes the log loss function and its gradients with respect to the input in order to iteratively update the model's parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jNC3D1DGsGgK" + }, + "outputs": [], + "source": [ + "# Set training parameters\n", + "epochs = 200\n", + "learning_rate = 0.01\n", + "train_losses, test_losses = [], []\n", + "train_accs, test_accs = [], []\n", + "\n", + "# Set up the training loop and begin training\n", + "for epoch in range(epochs):\n", + " batch_losses_train, batch_accs_train = [], []\n", + " batch_losses_test, batch_accs_test = [], []\n", + "\n", + " # Iterate over the training data\n", + " for x_batch, y_batch in train_dataset:\n", + " with tf.GradientTape() as tape:\n", + " y_pred_batch = log_reg(x_batch)\n", + " batch_loss = log_loss(y_pred_batch, y_batch)\n", + " batch_acc = accuracy(y_pred_batch, y_batch)\n", + " # Update the parameters with respect to the gradient calculations\n", + " grads = tape.gradient(batch_loss, log_reg.variables)\n", + " for g,v in zip(grads, log_reg.variables):\n", + " v.assign_sub(learning_rate * g)\n", + " # Keep track of batch-level training performance\n", + " batch_losses_train.append(batch_loss)\n", + " batch_accs_train.append(batch_acc)\n", + "\n", + " # Iterate over the testing data\n", + " for x_batch, y_batch in test_dataset:\n", + " y_pred_batch = log_reg(x_batch)\n", + " batch_loss = log_loss(y_pred_batch, y_batch)\n", + " batch_acc = accuracy(y_pred_batch, y_batch)\n", + " # Keep track of batch-level testing performance\n", + " batch_losses_test.append(batch_loss)\n", + " batch_accs_test.append(batch_acc)\n", + "\n", + " # Keep track of epoch-level model performance\n", + " train_loss, train_acc = tf.reduce_mean(batch_losses_train), tf.reduce_mean(batch_accs_train)\n", + " test_loss, test_acc = tf.reduce_mean(batch_losses_test), tf.reduce_mean(batch_accs_test)\n", + " train_losses.append(train_loss)\n", + " train_accs.append(train_acc)\n", + " test_losses.append(test_loss)\n", + " test_accs.append(test_acc)\n", + " if epoch % 20 == 0:\n", + " print(f\"Epoch: {epoch}, Training log loss: {train_loss:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NoLiAg7fYft7" + }, + "source": [ + "### Performance evaluation\n", + "\n", + "Observe the changes in your model's loss and accuracy over time. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mv3oCQPvWhr0" + }, + "outputs": [], + "source": [ + "plt.plot(range(epochs), train_losses, label = \"Training loss\")\n", + "plt.plot(range(epochs), test_losses, label = \"Testing loss\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Log loss\")\n", + "plt.legend()\n", + "plt.title(\"Log loss vs training iterations\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D2HDVGLPODIE" + }, + "outputs": [], + "source": [ + "plt.plot(range(epochs), train_accs, label = \"Training accuracy\")\n", + "plt.plot(range(epochs), test_accs, label = \"Testing accuracy\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Accuracy (%)\")\n", + "plt.legend()\n", + "plt.title(\"Accuracy vs training iterations\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jonKhUzuPyfa" + }, + "outputs": [], + "source": [ + "print(f\"Final training log loss: {train_losses[-1]:.3f}\")\n", + "print(f\"Final testing log Loss: {test_losses[-1]:.3f}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d3DF4qyrPyke" + }, + "outputs": [], + "source": [ + "print(f\"Final training accuracy: {train_accs[-1]:.3f}\")\n", + "print(f\"Final testing accuracy: {test_accs[-1]:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yrj1TbOJasjA" + }, + "source": [ + "The model demonstrates a high accuracy and a low loss when it comes to classifying tumors in the training dataset and also generalizes well to the unseen test data. To go one step further, you can explore error rates that give more insight beyond the overall accuracy score. The two most popular error rates for binary classification problems are the false positive rate (FPR) and the false negative rate (FNR).\n", + "\n", + "For this problem, the FPR is the proportion of malignant tumor predictions amongst tumors that are actually benign. Conversely, the FNR is the proportion of benign tumor predictions among tumors that are actually malignant.\n", + "\n", + "Compute a confusion matrix using [`sklearn.metrics.confusion_matrix`](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html#sklearn.metrics.confusion_matrix), which evaluates the accuracy of the classification, and use matplotlib to display the matrix:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OJO7YkA8ZDMU" + }, + "outputs": [], + "source": [ + "def show_confusion_matrix(y, y_classes, typ):\n", + " # Compute the confusion matrix and normalize it\n", + " plt.figure(figsize=(10,10))\n", + " confusion = sk_metrics.confusion_matrix(y.numpy(), y_classes.numpy())\n", + " confusion_normalized = confusion / confusion.sum(axis=1, keepdims=True)\n", + " axis_labels = range(2)\n", + " ax = sns.heatmap(\n", + " confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,\n", + " cmap='Blues', annot=True, fmt='.4f', square=True)\n", + " plt.title(f\"Confusion matrix: {typ}\")\n", + " plt.ylabel(\"True label\")\n", + " plt.xlabel(\"Predicted label\")\n", + "\n", + "y_pred_train, y_pred_test = log_reg(x_train_norm, train=False), log_reg(x_test_norm, train=False)\n", + "train_classes, test_classes = predict_class(y_pred_train), predict_class(y_pred_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OQ5DFcleiDFm" + }, + "outputs": [], + "source": [ + "show_confusion_matrix(y_train, train_classes, 'Training')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gtfcsAp_iCNR" + }, + "outputs": [], + "source": [ + "show_confusion_matrix(y_test, test_classes, 'Testing')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DlivxaDmTnGq" + }, + "source": [ + "Observe the error rate measurements and interpret their significance in the context of this example. In many medical testing studies such as cancer detection, having a high false positive rate to ensure a low false negative rate is perfectly acceptable and in fact encouraged since the risk of missing a malignant tumor diagnosis (false negative) is a lot worse than misclassifying a benign tumor as malignant (false positive).\n", + "\n", + "In order to control for the FPR and FNR, try changing the threshold hyperparameter before classifying the probability predictions. A lower threshold increases the model's overall chances of making a malignant tumor classification. This inevitably increases the number of false positives and the FPR but it also helps to decrease the number of false negatives and the FNR." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7ADEN2rb4Nhj" + }, + "source": [ + "## Save the model\n", + "\n", + "Start by making an export module that takes in raw data and performs the following operations:\n", + "- Normalization\n", + "- Probability prediction\n", + "- Class prediction\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6KPRHCzg4ZxH" + }, + "outputs": [], + "source": [ + "class ExportModule(tf.Module):\n", + " def __init__(self, model, norm_x, class_pred):\n", + " # Initialize pre- and post-processing functions\n", + " self.model = model\n", + " self.norm_x = norm_x\n", + " self.class_pred = class_pred\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.float32)])\n", + " def __call__(self, x):\n", + " # Run the `ExportModule` for new data points\n", + " x = self.norm_x.norm(x)\n", + " y = self.model(x, train=False)\n", + " y = self.class_pred(y)\n", + " return y " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2YzRclo5-yjO" + }, + "outputs": [], + "source": [ + "log_reg_export = ExportModule(model=log_reg,\n", + " norm_x=norm_x,\n", + " class_pred=predict_class)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gtofGIBN_qFd" + }, + "source": [ + "If you want to save the model at its current state, you can do so with the `tf.saved_model.save` function. To load a saved model and make predictions, use the `tf.saved_model.load` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a4Qum1Ts_pmF" + }, + "outputs": [], + "source": [ + "models = tempfile.mkdtemp()\n", + "save_path = os.path.join(models, 'log_reg_export')\n", + "tf.saved_model.save(log_reg_export, save_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3KPILr1i_M_c" + }, + "outputs": [], + "source": [ + "log_reg_loaded = tf.saved_model.load(save_path)\n", + "test_preds = log_reg_loaded(x_test)\n", + "test_preds[:10].numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vgGQuV-yqYZH" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook introduced a few techniques to handle a logistic regression problem. Here are a few more tips that may help:\n", + "\n", + "- The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be used to build machine learning workflows with high levels of configurability\n", + "- Analyzing error rates is a great way to gain more insight about a classification model's performance beyond its overall accuracy score.\n", + "- Overfitting is another common problem for logistic regression models, though it wasn't a problem for this tutorial. Visit the [Overfit and underfit](../../tutorials/keras/overfit_and_underfit.ipynb) tutorial for more help with this.\n", + "\n", + "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want to learn more about loading and preparing data, see the tutorials on [image data loading](../../tutorials/load_data/images.ipynb) or [CSV data loading](../../tutorials/load_data/csv.ipynb)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "logistic_regression_core.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/core/matrix_core.ipynb b/site/en/guide/core/matrix_core.ipynb new file mode 100644 index 00000000000..1d7d35ed047 --- /dev/null +++ b/site/en/guide/core/matrix_core.ipynb @@ -0,0 +1,731 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Matrix approximation with Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qGw8TF2vtzru" + }, + "source": [ + "## Introduction \n", + "\n", + "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to showcase TensorFlow's capabilities as a high-performance scientific computing platform. Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases.\n", + "\n", + "This tutorial explores the technique of [singular value decomposition](https://developers.google.com/machine-learning/recommendation/collaborative/matrix) (SVD) and its applications for low-rank approximation problems. The SVD is used to factorize real or complex matrices and has a variety of use cases in data science such as image compression. The images for this tutorial come from Google Brain's [Imagen](https://imagen.research.google/) project. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5_FdwaovEkCC" + }, + "source": [ + ">![svd_intro](http://tensorflow.org/images/core/svd_intro.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rRo8oNqZ-Rj" + }, + "outputs": [], + "source": [ + "import matplotlib\n", + "from matplotlib.image import imread\n", + "from matplotlib import pyplot as plt\n", + "import requests\n", + "# Preset Matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [16, 9]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9xQKvCJ85kCQ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "print(tf.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "so_ewq3gAoEI" + }, + "source": [ + "## SVD fundamentals\n", + "\n", + "The singular value decomposition of a matrix, ${\\mathrm{A}}$, is determined by the following factorization:\n", + "\n", + "$${\\mathrm{A}} = {\\mathrm{U}} \\Sigma {\\mathrm{V}}^T$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{m \\times n}{\\mathrm{A}}$: input matrix where $m \\geq n$\n", + "* $\\underset{m \\times n}{\\mathrm{U}}$: orthogonal matrix, ${\\mathrm{U}}^T{\\mathrm{U}} = {\\mathrm{I}}$, with each column, $u_i$, denoting a left singular vector of ${\\mathrm{A}}$\n", + "* $\\underset{n \\times n}{\\Sigma}$: diagonal matrix with each diagonal entry, $\\sigma_i$, denoting a singular value of ${\\mathrm{A}}$\n", + "* $\\underset{n \\times n}{{\\mathrm{V}}^T}$: orthogonal matrix, ${\\mathrm{V}}^T{\\mathrm{V}} = {\\mathrm{I}}$, with each row, $v_i$, denoting a right singular vector of ${\\mathrm{A}}$\n", + "\n", + "When $m < n$, ${\\mathrm{U}}$ and $\\Sigma$ both have dimension $(m \\times m)$, and ${\\mathrm{V}}^T$ has dimension $(m \\times n)$." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "enGGGXCQKNv8" + }, + "source": [ + ">![svd_full](http://tensorflow.org/images/core/svd_full.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NlP-cBdSKLtc" + }, + "source": [ + "TensorFlow's linear algebra package has a function, `tf.linalg.svd`, which can be used to compute the singular value decomposition of one or more matrices. Start by defining a simple matrix and computing its SVD factorization.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "C3QAcgyoeIpv" + }, + "outputs": [], + "source": [ + "A = tf.random.uniform(shape=[40,30])\n", + "# Compute the SVD factorization\n", + "s, U, V = tf.linalg.svd(A)\n", + "# Define Sigma and V Transpose\n", + "S = tf.linalg.diag(s)\n", + "V_T = tf.transpose(V)\n", + "# Reconstruct the original matrix\n", + "A_svd = U@S@V_T\n", + "# Visualize \n", + "plt.bar(range(len(s)), s);\n", + "plt.xlabel(\"Singular value rank\")\n", + "plt.ylabel(\"Singular value\")\n", + "plt.title(\"Bar graph of singular values\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6H_C9WhFACm4" + }, + "source": [ + "The `tf.einsum` function can be used to directly compute the matrix reconstruction from the outputs of `tf.linalg.svd`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TPE6QeMtADUn" + }, + "outputs": [], + "source": [ + "A_svd = tf.einsum('s,us,vs -> uv',s,U,V)\n", + "print('\\nReconstructed Matrix, A_svd', A_svd)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x1m6JIsM9DLP" + }, + "source": [ + "## Low rank approximation with the SVD\n", + "\n", + "The rank of a matrix, ${\\mathrm{A}}$, is determined by the dimension of the vector space spanned by its columns. \n", + "The SVD can be used to approximate a matrix with a lower rank, which ultimately decreases the dimensionality of data required to store the information represented by the matrix.\n", + "\n", + "The rank-r approximation of ${\\mathrm{A}}$ in terms of the SVD is defined by the formula:\n", + "\n", + "$${\\mathrm{A_r}} = {\\mathrm{U_r}} \\Sigma_r {\\mathrm{V_r}}^T$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{m \\times r}{\\mathrm{U_r}}$: matrix consisting of the first $r$ columns of ${\\mathrm{U}}$\n", + "* $\\underset{r \\times r}{\\Sigma_r}$: diagonal matrix consisting of the first $r$ singular values in $\\Sigma$\n", + "* $\\underset{r \\times n}{\\mathrm{V_r}}^T$: matrix consisting of the first $r$ rows of ${\\mathrm{V}}^T$" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nJWMJu36QyUV" + }, + "source": [ + ">![svd_approx](http://tensorflow.org/images/core/svd_approx.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TkiVUxeaQybq" + }, + "source": [ + "Start by writing a function to compute the rank-r approximation of a given matrix. This low-rank approximation procedure is used for image compression; therefore, it is also helpful to compute the physical data sizes for each approximation. For simplicity, assume that data size for an rank-r approximated matrix is equal to the total number of elements required to compute the approximation. Next, write a function to visualize the original matrix, $\\mathrm{A}$ its rank-r approximation, $\\mathrm{A}_r$ and the error matrix, $|\\mathrm{A} - \\mathrm{A}_r|$." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2oY3pMPagJrO" + }, + "outputs": [], + "source": [ + "def rank_r_approx(s, U, V, r, verbose=False):\n", + " # Compute the matrices necessary for a rank-r approximation\n", + " s_r, U_r, V_r = s[..., :r], U[..., :, :r], V[..., :, :r] # ... implies any number of extra batch axes\n", + " # Compute the low-rank approximation and its size\n", + " A_r = tf.einsum('...s,...us,...vs->...uv',s_r,U_r,V_r)\n", + " A_r_size = tf.size(U_r) + tf.size(s_r) + tf.size(V_r)\n", + " if verbose:\n", + " print(f\"Approximation Size: {A_r_size}\")\n", + " return A_r, A_r_size\n", + "\n", + "def viz_approx(A, A_r):\n", + " # Plot A, A_r, and A - A_r\n", + " vmin, vmax = 0, tf.reduce_max(A)\n", + " fig, ax = plt.subplots(1,3)\n", + " mats = [A, A_r, abs(A - A_r)]\n", + " titles = ['Original A', 'Approximated A_r', 'Error |A - A_r|']\n", + " for i, (mat, title) in enumerate(zip(mats, titles)):\n", + " ax[i].pcolormesh(mat, vmin=vmin, vmax=vmax)\n", + " ax[i].set_title(title)\n", + " ax[i].axis('off')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O3ZRkYCkX2FQ" + }, + "outputs": [], + "source": [ + "print(f\"Original Size of A: {tf.size(A)}\")\n", + "s, U, V = tf.linalg.svd(A)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "S1DR83VMX4cM" + }, + "outputs": [], + "source": [ + "# Rank-15 approximation\n", + "A_15, A_15_size = rank_r_approx(s, U, V, 15, verbose = True)\n", + "viz_approx(A, A_15)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KgFT70XFX57E" + }, + "outputs": [], + "source": [ + "# Rank-3 approximation\n", + "A_3, A_3_size = rank_r_approx(s, U, V, 3, verbose = True)\n", + "viz_approx(A, A_3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DS4XoSlTJgX0" + }, + "source": [ + "As expected, using lower ranks results in less-accurate approximations. However, the quality of these low-rank approximations are often good enough in real world scenarios. Also note that the main goal of low-rank approximation with SVD \n", + "is to reduce the dimensionality of the data but not to reduce the disk space of the data itself. However, as the input matrices become higher-dimensional, many low-rank approximations also end up benefiting from reduced data size. This reduction benefit is why the process is applicable for image compression problems." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IhsaiOnnZs6M" + }, + "source": [ + "## Image loading\n", + "\n", + "The following image is available on the [Imagen](https://imagen.research.google/) home page. Imagen is a text-to-image diffusion model developed by Google Research's Brain team. An AI created this image based on the prompt: \"A photo of a Corgi dog riding a bike in Times Square. It is wearing sunglasses and a beach hat.\" How cool is that! You can also change the url below to any .jpg link to load in a custom image of choice. \n", + "\n", + "Start by reading in and visualizing the image. After reading a JPEG file, Matplotlib outputs a matrix, ${\\mathrm{I}}$, of shape $(m \\times n \\times 3)$ which represents a 2-dimensional image with 3 color channels for red, green and blue respectively." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OVsZOQUAZ2C7" + }, + "outputs": [], + "source": [ + "img_link = \"https://imagen.research.google/main_gallery_images/a-photo-of-a-corgi-dog-riding-a-bike-in-times-square.jpg/"\n", + "img_path = requests.get(img_link, stream=True).raw\n", + "I = imread(img_path, 0)\n", + "print(\"Input Image Shape:\", I.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qvs7uftcZ54x" + }, + "outputs": [], + "source": [ + "def show_img(I):\n", + " # Display the image in matplotlib\n", + " img = plt.imshow(I)\n", + " plt.axis('off')\n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZbesXO3HZ6Qs" + }, + "outputs": [], + "source": [ + "show_img(I)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tdnUBVg_JoOa" + }, + "source": [ + "## The image compression algorithm\n", + "\n", + "Now, use the SVD to compute low-rank approximations of the sample image. Recall that the image is of shape $(1024 \\times 1024 \\times 3)$ and that the theory SVD only applies for 2-dimensional matrices. This means that the sample image has to be batched into 3 equal-size matrices that correspond to each of the 3 color channels. This can be done so by transposing the matrix to be of shape $(3 \\times 1024 \\times 1024)$. In order to clearly visualize the approximation error, rescale the RGB values of the image from $[0,255]$ to $[0,1]$. Remember to clip the approximated values to fall within this interval before visualizing them. The `tf.clip_by_value` function is useful for this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i7DDp0h7oSIk" + }, + "outputs": [], + "source": [ + "def compress_image(I, r, verbose=False):\n", + " # Compress an image with the SVD given a rank \n", + " I_size = tf.size(I)\n", + " print(f\"Original size of image: {I_size}\")\n", + " # Compute SVD of image\n", + " I = tf.convert_to_tensor(I)/255\n", + " I_batched = tf.transpose(I, [2, 0, 1]) # einops.rearrange(I, 'h w c -> c h w')\n", + " s, U, V = tf.linalg.svd(I_batched)\n", + " # Compute low-rank approximation of image across each RGB channel\n", + " I_r, I_r_size = rank_r_approx(s, U, V, r)\n", + " I_r = tf.transpose(I_r, [1, 2, 0]) # einops.rearrange(I_r, 'c h w -> h w c')\n", + " I_r_prop = (I_r_size / I_size)\n", + " if verbose:\n", + " # Display compressed image and attributes\n", + " print(f\"Number of singular values used in compression: {r}\")\n", + " print(f\"Compressed image size: {I_r_size}\")\n", + " print(f\"Proportion of original size: {I_r_prop:.3f}\")\n", + " ax_1 = plt.subplot(1,2,1)\n", + " show_img(tf.clip_by_value(I_r,0.,1.))\n", + " ax_1.set_title(\"Approximated image\")\n", + " ax_2 = plt.subplot(1,2,2)\n", + " show_img(tf.clip_by_value(0.5+abs(I-I_r),0.,1.))\n", + " ax_2.set_title(\"Error\")\n", + " return I_r, I_r_prop" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RGQ_rTyKDX9F" + }, + "source": [ + "Now, compute rank-r approximations for the following ranks : 100, 50, 10" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7GlKkVLGDjre" + }, + "outputs": [], + "source": [ + "I_100, I_100_prop = compress_image(I, 100, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XdvUkF5_E75D" + }, + "outputs": [], + "source": [ + "I_50, I_50_prop = compress_image(I, 50, verbose=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MsCNZ8416Sbk" + }, + "outputs": [], + "source": [ + "I_10, I_10_prop = compress_image(I, 10, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RfYYBhcuNkvH" + }, + "source": [ + "## Evaluating approximations\n", + "\n", + "There are a variety of interesting methods to measure the effectiveness and have more control over matrix approximations." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D2Lotde9Zg7v" + }, + "source": [ + "### Compression factor vs rank\n", + "\n", + "For each of the above approximations, observe how the data sizes change with the rank." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O1ariNQe6Wbl" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(11,6))\n", + "plt.plot([100, 50, 10], [I_100_prop, I_50_prop, I_10_prop])\n", + "plt.xlabel(\"Rank\")\n", + "plt.ylabel(\"Proportion of original image size\")\n", + "plt.title(\"Compression factor vs rank\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dvHcLRj2QoDg" + }, + "source": [ + "Based on this plot, there is a linear relationship between an approximated image's compression factor and its rank. To explore this further, recall that the data size of an approximated matrix, ${\\mathrm{A}}_r$, is defined as the total number of elements required for its computation. The following equations can be used to find the relationship between compression factor and rank:\n", + "\n", + "$$x = (m \\times r) + r + (r \\times n) = r \\times (m + n + 1)$$\n", + "\n", + "$$c = \\large \\frac{x}{y} = \\frac{r \\times (m + n + 1)}{m \\times n}$$\n", + "\n", + "where\n", + "\n", + "* $x$: size of ${\\mathrm{A_r}}$\n", + "* $y$: size of ${\\mathrm{A}}$\n", + "* $c = \\frac{x}{y}$: compression factor\n", + "* $r$: rank of the approximation\n", + "* $m$ and $n$: row and column dimensions of ${\\mathrm{A}}$\n", + "\n", + "In order to find the rank, $r$, that is necessary to compress an image to a desired factor, $c$, the above equation can be rearranged to solve for $r$:\n", + "\n", + "$$r = ⌊{\\large\\frac{c \\times m \\times n}{m + n + 1}}⌋$$\n", + "\n", + "Note that this formula is independent of the color channel dimension since each of the RGB approximations do not affect each other. Now, write a function to compress an input image given a desired compression factor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "viVO-I60QynI" + }, + "outputs": [], + "source": [ + "def compress_image_with_factor(I, compression_factor, verbose=False):\n", + " # Returns a compressed image based on a desired compression factor\n", + " m,n,o = I.shape\n", + " r = int((compression_factor * m * n)/(m + n + 1))\n", + " I_r, I_r_prop = compress_image(I, r, verbose=verbose)\n", + " return I_r" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gWSv58J6LSRQ" + }, + "source": [ + "Compress an image to 15% of its original size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HVeeloIwQ1b6" + }, + "outputs": [], + "source": [ + "compression_factor = 0.15\n", + "I_r_img = compress_image_with_factor(I, compression_factor, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LkeRyms7jZMd" + }, + "source": [ + "### Cumulative sum of singular values\n", + "\n", + "The cumulative sum of singular values can be a useful indicator for the amount of energy captured by a rank-r approximation. Visualize the RGB-averaged cumulative proportion of singular values in the sample image. The `tf.cumsum` function can be useful for this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CteJ6VbKlndu" + }, + "outputs": [], + "source": [ + "def viz_energy(I):\n", + " # Visualize the energy captured based on rank\n", + " # Computing SVD\n", + " I = tf.convert_to_tensor(I)/255\n", + " I_batched = tf.transpose(I, [2, 0, 1]) \n", + " s, U, V = tf.linalg.svd(I_batched)\n", + " # Plotting average proportion across RGB channels \n", + " props_rgb = tf.map_fn(lambda x: tf.cumsum(x)/tf.reduce_sum(x), s)\n", + " props_rgb_mean = tf.reduce_mean(props_rgb, axis=0)\n", + " plt.figure(figsize=(11,6))\n", + " plt.plot(range(len(I)), props_rgb_mean, color='k')\n", + " plt.xlabel(\"Rank / singular value number\")\n", + " plt.ylabel(\"Cumulative proportion of singular values\")\n", + " plt.title(\"RGB-averaged proportion of energy captured by the first 'r' singular values\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Vl9PKow-GgCp" + }, + "outputs": [], + "source": [ + "viz_energy(I)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vQtwimKuQP19" + }, + "source": [ + "It looks like over 90% of the energy in this image is captured within the first 100 singular values. Now, write a function to compress an input image given a desired energy retention factor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fum5Cvm7R5vH" + }, + "outputs": [], + "source": [ + "def compress_image_with_energy(I, energy_factor, verbose=False):\n", + " # Returns a compressed image based on a desired energy factor\n", + " # Computing SVD\n", + " I_rescaled = tf.convert_to_tensor(I)/255\n", + " I_batched = tf.transpose(I_rescaled, [2, 0, 1]) \n", + " s, U, V = tf.linalg.svd(I_batched)\n", + " # Extracting singular values\n", + " props_rgb = tf.map_fn(lambda x: tf.cumsum(x)/tf.reduce_sum(x), s)\n", + " props_rgb_mean = tf.reduce_mean(props_rgb, axis=0)\n", + " # Find closest r that corresponds to the energy factor\n", + " r = tf.argmin(tf.abs(props_rgb_mean - energy_factor)) + 1\n", + " actual_ef = props_rgb_mean[r]\n", + " I_r, I_r_prop = compress_image(I, r, verbose=verbose)\n", + " print(f\"Proportion of energy captured by the first {r} singular values: {actual_ef:.3f}\")\n", + " return I_r" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y_rChG0OLby1" + }, + "source": [ + "Compress an image to retain 75% of its energy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xDXBaZQ4c5jF" + }, + "outputs": [], + "source": [ + "energy_factor = 0.75\n", + "I_r_img = compress_image_with_energy(I, energy_factor, verbose=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2tmqTW0CYX-v" + }, + "source": [ + "### Error and singular values\n", + "\n", + "There is also an interesting relationship between the approximation error and the singular values. It turns out that the squared Frobenius norm of the approximation is equal to the sum of the squares of its singular values that were left out:\n", + "\n", + "$${||A - A_r||}^2 = \\sum_{i=r+1}^{R}σ_i^2$$\n", + "\n", + "Test out this relationship with a rank-10 approximation of the example matrix in the beginning of this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hctOvN8BckiS" + }, + "outputs": [], + "source": [ + "s, U, V = tf.linalg.svd(A)\n", + "A_10, A_10_size = rank_r_approx(s, U, V, 10)\n", + "squared_norm = tf.norm(A - A_10)**2\n", + "s_squared_sum = tf.reduce_sum(s[10:]**2)\n", + "print(f\"Squared Frobenius norm: {squared_norm:.3f}\")\n", + "print(f\"Sum of squared singular values left out: {s_squared_sum:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vgGQuV-yqYZH" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook introduced the process of implementing the singular value decomposition with TensorFlow and applying it to write an image compression algorithm. Here are a few more tips that may help:\n", + "\n", + "* The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be utilized for a variety of high-performance scientific computing use cases.\n", + "* To learn more about TensorFlow's linear algebra functionalities, visit the docs for the [linalg module](https://www.tensorflow.org/api_docs/python/tf/linalg).\n", + "* The SVD can also be applied to build [recommendation systems](https://developers.google.com/machine-learning/recommendation/labs/movie-rec-programming-exercise).\n", + "\n", + "\n", + "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "matrix_core.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/core/mlp_core.ipynb b/site/en/guide/core/mlp_core.ipynb new file mode 100644 index 00000000000..a5975c20c6e --- /dev/null +++ b/site/en/guide/core/mlp_core.ipynb @@ -0,0 +1,964 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Multilayer perceptrons for digit recognition with Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SjAxxRpBzVYg" + }, + "source": [ + "This notebook uses the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to build an end-to-end machine learning workflow for handwritten digit classification with [multilayer perceptrons](https://developers.google.com/machine-learning/crash-course/introduction-to-neural-networks/anatomy) and the [MNIST dataset](http://yann.lecun.com/exdb/mnist). Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GHVMVIFHSzl1" + }, + "source": [ + "## Multilayer perceptron (MLP) overview\n", + "\n", + "The Multilayer Perceptron (MLP) is a type of feedforward neural network used to approach [multiclass classification](https://developers.google.com/machine-learning/crash-course/multi-class-neural-networks/video-lecture) problems. Before building an MLP, it is crucial to understand the concepts of perceptrons, layers, and activation functions.\n", + "\n", + "Multilayer Perceptrons are made up of functional units called perceptrons. The equation of a perceptron is as follows:\n", + "\n", + "$$Z = \\vec{w}⋅\\mathrm{X} + b$$\n", + "\n", + "where\n", + "\n", + "* $Z$: perceptron output\n", + "* $\\mathrm{X}$: feature matrix\n", + "* $\\vec{w}$: weight vector\n", + "* $b$: bias\n", + "\n", + "When these perceptrons are stacked, they form structures called dense layers which can then be connected to build a neural network. A dense layer's equation is similar to that of a perceptron's but uses a weight matrix and a bias vector instead: \n", + "\n", + "$$Z = \\mathrm{W}⋅\\mathrm{X} + \\vec{b}$$\n", + "\n", + "where\n", + "\n", + "* $Z$: dense layer output\n", + "* $\\mathrm{X}$: feature matrix\n", + "* $\\mathrm{W}$: weight matrix\n", + "* $\\vec{b}$: bias vector\n", + "\n", + "\n", + "In an MLP, multiple dense layers are connected in such a way that the outputs of one layer are fully connected to the inputs of the next layer. Adding non-linear activation functions to the outputs of dense layers can help the MLP classifier learn complex decision boundaries and generalize well to unseen data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup\n", + "\n", + "Import TensorFlow, [pandas](https://pandas.pydata.org), [Matplotlib](https://matplotlib.org) and [seaborn](https://seaborn.pydata.org) to get started." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mSfgqmwBagw_" + }, + "outputs": [], + "source": [ + "# Use seaborn for countplot.\n", + "!pip install -q seaborn" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1rRo8oNqZ-Rj" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "import seaborn as sns\n", + "import tempfile\n", + "import os\n", + "# Preset Matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9xQKvCJ85kCQ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "print(tf.__version__)\n", + "# Set random seed for reproducible results \n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_72b0LCNbjx" + }, + "source": [ + "## Load the data\n", + "\n", + "This tutorial uses the [MNIST dataset](http://yann.lecun.com/exdb/mnist), and demonstrates how to build an MLP model that can classify handwritten digits. The dataset is available from [TensorFlow Datasets](https://www.tensorflow.org/datasets/catalog/mnist).\n", + "\n", + "Split the MNIST dataset into training, validation, and testing sets. The validation set can be used to gauge the model's generalizability during training so that the test set can serve as a final unbiased estimator for the model's performance.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Uiuh0B098_3p" + }, + "outputs": [], + "source": [ + "train_data, val_data, test_data = tfds.load(\"mnist\", \n", + " split=['train[10000:]', 'train[0:10000]', 'test'],\n", + " batch_size=128, as_supervised=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X9uN3Lf6ANtn" + }, + "source": [ + "The MNIST dataset consists of handwritten digits and their corresponding true labels. Visualize a couple of examples below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6V8hSqJ7AMjQ" + }, + "outputs": [], + "source": [ + "x_viz, y_viz = tfds.load(\"mnist\", split=['train[:1500]'], batch_size=-1, as_supervised=True)[0]\n", + "x_viz = tf.squeeze(x_viz, axis=3)\n", + "\n", + "for i in range(9):\n", + " plt.subplot(3,3,1+i)\n", + " plt.axis('off')\n", + " plt.imshow(x_viz[i], cmap='gray')\n", + " plt.title(f\"True Label: {y_viz[i]}\")\n", + " plt.subplots_adjust(hspace=.5)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bRald9dSE4qS" + }, + "source": [ + "Also review the distribution of digits in the training data to verify that each class is well represented in the dataset.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Rj3K4XgQE7qR" + }, + "outputs": [], + "source": [ + "sns.countplot(x=y_viz.numpy());\n", + "plt.xlabel('Digits')\n", + "plt.title(\"MNIST Digit Distribution\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x_Wt4bDx_BRV" + }, + "source": [ + "## Preprocess the data\n", + "\n", + "First, reshape the feature matrices to be 2-dimensional by flattening the images. Next, rescale the data so that the pixel values of [0,255] fit into the range of [0,1]. This step ensures that the input pixels have similar distributions and helps with training convergence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JSyCm2V2_AvI" + }, + "outputs": [], + "source": [ + "def preprocess(x, y):\n", + " # Reshaping the data\n", + " x = tf.reshape(x, shape=[-1, 784])\n", + " # Rescaling the data\n", + " x = x/255\n", + " return x, y\n", + "\n", + "train_data, val_data = train_data.map(preprocess), val_data.map(preprocess)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6o3CrycBXA2s" + }, + "source": [ + "## Build the MLP \n", + "\n", + "Start by visualizing the [ReLU](https://developers.google.com/machine-learning/glossary#ReLU) and [Softmax](https://developers.google.com/machine-learning/glossary#softmax) activation functions. Both functions are available in `tf.nn.relu` and `tf.nn.softmax` respectively. The ReLU is a non-linear activation function that outputs the input if it is positive and 0 otherwise: \n", + "\n", + "$$\\text{ReLU}(X) = max(0, X)$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hYunzt3UyT9G" + }, + "outputs": [], + "source": [ + "x = tf.linspace(-2, 2, 201)\n", + "x = tf.cast(x, tf.float32)\n", + "plt.plot(x, tf.nn.relu(x));\n", + "plt.xlabel('x')\n", + "plt.ylabel('ReLU(x)')\n", + "plt.title('ReLU activation function');" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fuGrM9jMwsRM" + }, + "source": [ + "The softmax activation function is a normalized exponential function that converts $m$ real numbers into a probability distribution with $m$ outcomes/classes. This is useful for predicting class probabilities from a neural network's output:\n", + "\n", + "$$\\text{Softmax}(X) = \\frac{e^{X}}{\\sum_{i=1}^{m}e^{X_i}}$$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fVM8pvhWwuwI" + }, + "outputs": [], + "source": [ + "x = tf.linspace(-4, 4, 201)\n", + "x = tf.cast(x, tf.float32)\n", + "plt.plot(x, tf.nn.softmax(x, axis=0));\n", + "plt.xlabel('x')\n", + "plt.ylabel('Softmax(x)')\n", + "plt.title('Softmax activation function');" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OHW6Yvg2yS6H" + }, + "source": [ + "### The dense layer\n", + "\n", + "Create a class for the dense layer. By definition, the outputs of one layer are fully connected to the inputs of the next layer in an MLP. Therefore, the input dimension for a dense layer can be inferred based on the output dimension of its previous layer and does not need to be specified upfront during its initialization. The weights should also be initialized properly to prevent activation outputs from becoming too large or small. One of the most popular weight initialization methods is the Xavier scheme, where each element of the weight matrix is sampled in the following manner:\n", + "\n", + "$$W_{ij} \\sim \\text{Uniform}(-\\frac{\\sqrt{6}}{\\sqrt{n + m}},\\frac{\\sqrt{6}}{\\sqrt{n + m}})$$\n", + "\n", + "The bias vector can be initialized to zeros." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "re1SSFyBdMrS" + }, + "outputs": [], + "source": [ + "def xavier_init(shape):\n", + " # Computes the xavier initialization values for a weight matrix\n", + " in_dim, out_dim = shape\n", + " xavier_lim = tf.sqrt(6.)/tf.sqrt(tf.cast(in_dim + out_dim, tf.float32))\n", + " weight_vals = tf.random.uniform(shape=(in_dim, out_dim), \n", + " minval=-xavier_lim, maxval=xavier_lim, seed=22)\n", + " return weight_vals" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "otDFX4u6e6ml" + }, + "source": [ + "The Xavier initialization method can also be implemented with `tf.keras.initializers.GlorotUniform`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IM0yJos25FG5" + }, + "outputs": [], + "source": [ + "class DenseLayer(tf.Module):\n", + "\n", + " def __init__(self, out_dim, weight_init=xavier_init, activation=tf.identity):\n", + " # Initialize the dimensions and activation functions\n", + " self.out_dim = out_dim\n", + " self.weight_init = weight_init\n", + " self.activation = activation\n", + " self.built = False\n", + "\n", + " def __call__(self, x):\n", + " if not self.built:\n", + " # Infer the input dimension based on first call\n", + " self.in_dim = x.shape[1]\n", + " # Initialize the weights and biases\n", + " self.w = tf.Variable(self.weight_init(shape=(self.in_dim, self.out_dim)))\n", + " self.b = tf.Variable(tf.zeros(shape=(self.out_dim,)))\n", + " self.built = True\n", + " # Compute the forward pass\n", + " z = tf.add(tf.matmul(x, self.w), self.b)\n", + " return self.activation(z)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "X-7MzpjgyHg6" + }, + "source": [ + "Next, build a class for the MLP model that executes layers sequentially.\n", + "Remember that the model variables are only available after the first sequence of dense layer calls due to dimension inference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6XisRWiCyHAb" + }, + "outputs": [], + "source": [ + "class MLP(tf.Module):\n", + "\n", + " def __init__(self, layers):\n", + " self.layers = layers\n", + " \n", + " @tf.function\n", + " def __call__(self, x, preds=False): \n", + " # Execute the model's layers sequentially\n", + " for layer in self.layers:\n", + " x = layer(x)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "luXKup-43nd7" + }, + "source": [ + "Initialize a MLP model with the following architecture:\n", + "\n", + "- Forward Pass: ReLU(784 x 700) x ReLU(700 x 500) x Softmax(500 x 10)\n", + "\n", + "The softmax activation function does not need to be applied by the MLP. It is computed separately in the loss and prediction functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VmlACuki3oPi" + }, + "outputs": [], + "source": [ + "hidden_layer_1_size = 700\n", + "hidden_layer_2_size = 500\n", + "output_size = 10\n", + "\n", + "mlp_model = MLP([\n", + " DenseLayer(out_dim=hidden_layer_1_size, activation=tf.nn.relu),\n", + " DenseLayer(out_dim=hidden_layer_2_size, activation=tf.nn.relu),\n", + " DenseLayer(out_dim=output_size)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tyBATDoRmDkg" + }, + "source": [ + "### Define the loss function\n", + "\n", + "The cross-entropy loss function is a great choice for multiclass classification problems since it measures the negative-log-likelihood of the data according to the model's probability predictions. The higher the probability assigned to the true class, the lower the loss. The equation for the cross-entropy loss is as follows:\n", + "\n", + "$$L = -\\frac{1}{n}\\sum_{i=1}^{n}\\sum_{i=j}^{n} {y_j}^{[i]}⋅\\log(\\hat{{y_j}}^{[i]})$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{n\\times m}{\\hat{y}}$: a matrix of predicted class distributions\n", + "* $\\underset{n\\times m}{y}$: a one hot encoded matrix of true classes\n", + "\n", + "The `tf.nn.sparse_softmax_cross_entropy_with_logits` function can be used to compute the cross-entropy loss. This function does not require the model's last layer to apply the softmax activation function nor does it require the class labels to be one hot encoded" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rskOYA7FVCwg" + }, + "outputs": [], + "source": [ + "def cross_entropy_loss(y_pred, y):\n", + " # Compute cross entropy loss with a sparse operation\n", + " sparse_ce = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=y_pred)\n", + " return tf.reduce_mean(sparse_ce)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BvWxED1km8jh" + }, + "source": [ + "Write a basic accuracy function that calculates the proportion of correct classifications during training. In order to generate class predictions from softmax outputs, return the index that corresponds to the largest class probability. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jPJMWx2UgiBm" + }, + "outputs": [], + "source": [ + "def accuracy(y_pred, y):\n", + " # Compute accuracy after extracting class predictions\n", + " class_preds = tf.argmax(tf.nn.softmax(y_pred), axis=1)\n", + " is_equal = tf.equal(y, class_preds)\n", + " return tf.reduce_mean(tf.cast(is_equal, tf.float32))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JSiNRhTOnKZr" + }, + "source": [ + "### Train the model\n", + "\n", + "Using an optimizer can result in significantly faster convergence compared to standard gradient descent. The Adam optimizer is implemented below. Visit the [Optimizers](https://www.tensorflow.org/guide/core/optimizers_core) guide to learn more about designing custom optimizers with TensorFlow Core." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iGIBDk3cAv6a" + }, + "outputs": [], + "source": [ + "class Adam:\n", + "\n", + " def __init__(self, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, ep=1e-7):\n", + " # Initialize optimizer parameters and variable slots\n", + " self.beta_1 = beta_1\n", + " self.beta_2 = beta_2\n", + " self.learning_rate = learning_rate\n", + " self.ep = ep\n", + " self.t = 1.\n", + " self.v_dvar, self.s_dvar = [], []\n", + " self.built = False\n", + " \n", + " def apply_gradients(self, grads, vars):\n", + " # Initialize variables on the first call\n", + " if not self.built:\n", + " for var in vars:\n", + " v = tf.Variable(tf.zeros(shape=var.shape))\n", + " s = tf.Variable(tf.zeros(shape=var.shape))\n", + " self.v_dvar.append(v)\n", + " self.s_dvar.append(s)\n", + " self.built = True\n", + " # Update the model variables given their gradients\n", + " for i, (d_var, var) in enumerate(zip(grads, vars)):\n", + " self.v_dvar[i].assign(self.beta_1*self.v_dvar[i] + (1-self.beta_1)*d_var)\n", + " self.s_dvar[i].assign(self.beta_2*self.s_dvar[i] + (1-self.beta_2)*tf.square(d_var))\n", + " v_dvar_bc = self.v_dvar[i]/(1-(self.beta_1**self.t))\n", + " s_dvar_bc = self.s_dvar[i]/(1-(self.beta_2**self.t))\n", + " var.assign_sub(self.learning_rate*(v_dvar_bc/(tf.sqrt(s_dvar_bc) + self.ep)))\n", + " self.t += 1.\n", + " return " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "osEK3rqpYfKd" + }, + "source": [ + "Now, write a custom training loop that updates the MLP parameters with mini-batch gradient descent. Using mini-batches for training provides both memory efficiency and faster convergence." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CJLeY2ao1aw6" + }, + "outputs": [], + "source": [ + "def train_step(x_batch, y_batch, loss, acc, model, optimizer):\n", + " # Update the model state given a batch of data\n", + " with tf.GradientTape() as tape:\n", + " y_pred = model(x_batch)\n", + " batch_loss = loss(y_pred, y_batch)\n", + " batch_acc = acc(y_pred, y_batch)\n", + " grads = tape.gradient(batch_loss, model.variables)\n", + " optimizer.apply_gradients(grads, model.variables)\n", + " return batch_loss, batch_acc\n", + "\n", + "def val_step(x_batch, y_batch, loss, acc, model):\n", + " # Evaluate the model on given a batch of validation data\n", + " y_pred = model(x_batch)\n", + " batch_loss = loss(y_pred, y_batch)\n", + " batch_acc = acc(y_pred, y_batch)\n", + " return batch_loss, batch_acc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oC85kuZgmh3q" + }, + "outputs": [], + "source": [ + "def train_model(mlp, train_data, val_data, loss, acc, optimizer, epochs):\n", + " # Initialize data structures\n", + " train_losses, train_accs = [], []\n", + " val_losses, val_accs = [], []\n", + "\n", + " # Format training loop and begin training\n", + " for epoch in range(epochs):\n", + " batch_losses_train, batch_accs_train = [], []\n", + " batch_losses_val, batch_accs_val = [], []\n", + "\n", + " # Iterate over the training data\n", + " for x_batch, y_batch in train_data:\n", + " # Compute gradients and update the model's parameters\n", + " batch_loss, batch_acc = train_step(x_batch, y_batch, loss, acc, mlp, optimizer)\n", + " # Keep track of batch-level training performance\n", + " batch_losses_train.append(batch_loss)\n", + " batch_accs_train.append(batch_acc)\n", + "\n", + " # Iterate over the validation data\n", + " for x_batch, y_batch in val_data:\n", + " batch_loss, batch_acc = val_step(x_batch, y_batch, loss, acc, mlp)\n", + " batch_losses_val.append(batch_loss)\n", + " batch_accs_val.append(batch_acc)\n", + "\n", + " # Keep track of epoch-level model performance\n", + " train_loss, train_acc = tf.reduce_mean(batch_losses_train), tf.reduce_mean(batch_accs_train)\n", + " val_loss, val_acc = tf.reduce_mean(batch_losses_val), tf.reduce_mean(batch_accs_val)\n", + " train_losses.append(train_loss)\n", + " train_accs.append(train_acc)\n", + " val_losses.append(val_loss)\n", + " val_accs.append(val_acc)\n", + " print(f\"Epoch: {epoch}\")\n", + " print(f\"Training loss: {train_loss:.3f}, Training accuracy: {train_acc:.3f}\")\n", + " print(f\"Validation loss: {val_loss:.3f}, Validation accuracy: {val_acc:.3f}\")\n", + " return train_losses, train_accs, val_losses, val_accs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FvbfXlN5lwwB" + }, + "source": [ + "Train the MLP model for 10 epochs with batch size of 128. Hardware accelerators like GPUs or TPUs can also help speed up training time. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zPlT8QfxptYl" + }, + "outputs": [], + "source": [ + "train_losses, train_accs, val_losses, val_accs = train_model(mlp_model, train_data, val_data, \n", + " loss=cross_entropy_loss, acc=accuracy,\n", + " optimizer=Adam(), epochs=10)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j_RVmt43G12R" + }, + "source": [ + "### Performance evaluation\n", + "\n", + "Start by writing a plotting function to visualize the model's loss and accuracy during training. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VXTCYVtNDjAM" + }, + "outputs": [], + "source": [ + "def plot_metrics(train_metric, val_metric, metric_type):\n", + " # Visualize metrics vs training Epochs\n", + " plt.figure()\n", + " plt.plot(range(len(train_metric)), train_metric, label = f\"Training {metric_type}\")\n", + " plt.plot(range(len(val_metric)), val_metric, label = f\"Validation {metric_type}\")\n", + " plt.xlabel(\"Epochs\")\n", + " plt.ylabel(metric_type)\n", + " plt.legend()\n", + " plt.title(f\"{metric_type} vs Training epochs\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DC-qIvZbHo0G" + }, + "outputs": [], + "source": [ + "plot_metrics(train_losses, val_losses, \"cross entropy loss\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "P-w2xk2PIDve" + }, + "outputs": [], + "source": [ + "plot_metrics(train_accs, val_accs, \"accuracy\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tbrJJaFrD_XR" + }, + "source": [ + "## Save and load the model\n", + "\n", + "Start by making an export module that takes in raw data and performs the following operations:\n", + "- Data preprocessing \n", + "- Probability prediction\n", + "- Class prediction" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1sszfWuJJZoo" + }, + "outputs": [], + "source": [ + "class ExportModule(tf.Module):\n", + " def __init__(self, model, preprocess, class_pred):\n", + " # Initialize pre and postprocessing functions\n", + " self.model = model\n", + " self.preprocess = preprocess\n", + " self.class_pred = class_pred\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=[None, None, None, None], dtype=tf.uint8)]) \n", + " def __call__(self, x):\n", + " # Run the ExportModule for new data points\n", + " x = self.preprocess(x)\n", + " y = self.model(x)\n", + " y = self.class_pred(y)\n", + " return y " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p8x6gjTDVi5d" + }, + "outputs": [], + "source": [ + "def preprocess_test(x):\n", + " # The export module takes in unprocessed and unlabeled data\n", + " x = tf.reshape(x, shape=[-1, 784])\n", + " x = x/255\n", + " return x\n", + "\n", + "def class_pred_test(y):\n", + " # Generate class predictions from MLP output\n", + " return tf.argmax(tf.nn.softmax(y), axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vu9H5STrJzdo" + }, + "source": [ + "This export module can now be saved with the `tf.saved_model.save` function. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fN9pPBQTKTe3" + }, + "outputs": [], + "source": [ + "mlp_model_export = ExportModule(model=mlp_model,\n", + " preprocess=preprocess_test,\n", + " class_pred=class_pred_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "idS7rQKbKwRS" + }, + "outputs": [], + "source": [ + "models = tempfile.mkdtemp()\n", + "save_path = os.path.join(models, 'mlp_model_export')\n", + "tf.saved_model.save(mlp_model_export, save_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_zZxO8iqBGZ-" + }, + "source": [ + "Load the saved model with `tf.saved_model.load` and examine its performance on the unseen test data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "W5cwBTUqxldW" + }, + "outputs": [], + "source": [ + "mlp_loaded = tf.saved_model.load(save_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bmv0u6j_b5OC" + }, + "outputs": [], + "source": [ + "def accuracy_score(y_pred, y):\n", + " # Generic accuracy function\n", + " is_equal = tf.equal(y_pred, y)\n", + " return tf.reduce_mean(tf.cast(is_equal, tf.float32))\n", + "\n", + "x_test, y_test = tfds.load(\"mnist\", split=['test'], batch_size=-1, as_supervised=True)[0]\n", + "test_classes = mlp_loaded(x_test)\n", + "test_acc = accuracy_score(test_classes, y_test)\n", + "print(f\"Test Accuracy: {test_acc:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j5t9vgv_ciQ_" + }, + "source": [ + "The model does a great job of classifying handwritten digits in the training dataset and also generalizes well to unseen data. Now, examine the model's class-wise accuracy to ensure good performance for each digit. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UD8YiC1Vfeyp" + }, + "outputs": [], + "source": [ + "print(\"Accuracy breakdown by digit:\")\n", + "print(\"---------------------------\")\n", + "label_accs = {}\n", + "for label in range(10):\n", + " label_ind = (y_test == label)\n", + " # extract predictions for specific true label\n", + " pred_label = test_classes[label_ind]\n", + " labels = y_test[label_ind]\n", + " # compute class-wise accuracy\n", + " label_accs[accuracy_score(pred_label, labels).numpy()] = label\n", + "for key in sorted(label_accs):\n", + " print(f\"Digit {label_accs[key]}: {key:.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rcykuJFhdGb0" + }, + "source": [ + "It looks like the model struggles with some digits a little more than others which is quite common in many multiclass classification problems. As a final exercise, plot a confusion matrix of the model's predictions and its corresponding true labels to gather more class-level insights. Sklearn and seaborn have functions for generating and visualizing confusion matrices. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JqCaqPwwh1tN" + }, + "outputs": [], + "source": [ + "import sklearn.metrics as sk_metrics\n", + "\n", + "def show_confusion_matrix(test_labels, test_classes):\n", + " # Compute confusion matrix and normalize\n", + " plt.figure(figsize=(10,10))\n", + " confusion = sk_metrics.confusion_matrix(test_labels.numpy(), \n", + " test_classes.numpy())\n", + " confusion_normalized = confusion / confusion.sum(axis=1, keepdims=True)\n", + " axis_labels = range(10)\n", + " ax = sns.heatmap(\n", + " confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,\n", + " cmap='Blues', annot=True, fmt='.4f', square=True)\n", + " plt.title(\"Confusion matrix\")\n", + " plt.ylabel(\"True label\")\n", + " plt.xlabel(\"Predicted label\")\n", + "\n", + "show_confusion_matrix(y_test, test_classes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JT-WA7GVda6d" + }, + "source": [ + "Class-level insights can help identify reasons for misclassifications and improve model performance in future training cycles." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VFLfEH4ManbW" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook introduced a few techniques to handle a multiclass classification problem with an [MLP](https://developers.google.com/machine-learning/crash-course/multi-class-neural-networks/softmax). Here are a few more tips that may help:\n", + "\n", + "- The [TensorFlow Core APIs](https://www.tensorflow.org/guide/core) can be used to build machine learning workflows with high levels of configurability\n", + "- Initialization schemes can help prevent model parameters from vanishing or exploding during training.\n", + "- Overfitting is another common problem for neural networks, though it wasn't a problem for this tutorial. Visit the [Overfit and underfit](overfit_and_underfit.ipynb) tutorial for more help with this.\n", + "\n", + "For more examples of using the TensorFlow Core APIs, check out the [guide](https://www.tensorflow.org/guide/core). If you want to learn more about loading and preparing data, see the tutorials on [image data loading](https://www.tensorflow.org/tutorials/load_data/images) or [CSV data loading](https://www.tensorflow.org/tutorials/load_data/csv)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "FhGuhbZ6M5tl" + ], + "name": "mlp_core.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/core/optimizers_core.ipynb b/site/en/guide/core/optimizers_core.ipynb new file mode 100644 index 00000000000..e22f0327419 --- /dev/null +++ b/site/en/guide/core/optimizers_core.ipynb @@ -0,0 +1,612 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "FhGuhbZ6M5tl" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "AwOEIRJC6Une" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EIdT9iu_Z4Rb" + }, + "source": [ + "# Optimizers with Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bBIlTPscrIT9" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SjAxxRpBzVYg" + }, + "source": [ + "## Introduction\n", + "\n", + "This notebook introduces the process of creating custom optimizers with the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core). Visit the [Core APIs overview](https://www.tensorflow.org/guide/core) to learn more about TensorFlow Core and its intended use cases. \n", + "\n", + "The [Keras optimizers](https://www.tensorflow.org/api_docs/python/tf/keras/optimizers) module is the recommended optimization toolkit for many general training purposes. It includes a variety of prebuilt optimiziers as well as subclassing functionality for customization. The Keras optimizers are also compatible with custom layers, models, and training loops built with the Core APIs. These prebuilt and customizable optimizers are suitable for most cases, but the Core APIs allow for complete control over the optimization process. For example, techniques such as Sharpness-Aware Minimization (SAM) require the model and optimizer to be coupled, which does not fit the traditional definition of ML optimizers. This guide walks through the process of building custom optimizers from scratch with the Core APIs, giving you the power to have full control over the structure, implementation, and behavior of your optimizers." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nBmqYyodNRd_" + }, + "source": [ + "## Optimizers overview\n", + "\n", + "An optimizer is an algorithm used to minimize a loss function with respect to a model's trainable parameters. The most straightforward optimization technique is gradient descent, which iteratively updates a model's parameters by taking a step in the direction of its loss function's steepest descent. Its step size is directly proportional to the size of the gradient, which can be problematic when the gradient is either too large or too small. There are many other gradient-based optimizers such as Adam, Adagrad, and RMSprop that leverage various mathematical properties of gradients for memory efficiency and fast convergence." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nchsZfwEVtVs" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d9idwpXCltUl" + }, + "outputs": [], + "source": [ + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "# Preset Matplotlib figure sizes.\n", + "matplotlib.rcParams['figure.figsize'] = [9, 6]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9xQKvCJ85kCQ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "print(tf.__version__)\n", + "# set random seed for reproducible results \n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0UmF5aU3MnwX" + }, + "source": [ + "## Gradient descent\n", + "\n", + "The basic optimizer class should have an initialization method and a function to update a list of variables given a list of gradients. Start by implementing the basic gradient descent optimizer which updates each variable by subtracting its gradient scaled by a learning rate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MWjmUmeOQFFN" + }, + "outputs": [], + "source": [ + "class GradientDescent(tf.Module):\n", + "\n", + " def __init__(self, learning_rate=1e-3):\n", + " # Initialize parameters\n", + " self.learning_rate = learning_rate\n", + " self.title = f\"Gradient descent optimizer: learning rate={self.learning_rate}\"\n", + "\n", + " def apply_gradients(self, grads, vars):\n", + " # Update variables\n", + " for grad, var in zip(grads, vars):\n", + " var.assign_sub(self.learning_rate*grad)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZSekgBHDRzmp" + }, + "source": [ + "To test this optimizer, create a sample loss function to minimize with respect to a single variable, $x$. Compute its gradient function and solve for its minimizing parameter value:\n", + "\n", + "$$L = 2x^4 + 3x^3 + 2$$\n", + "\n", + "$$\\frac{dL}{dx} = 8x^3 + 9x^2$$\n", + "\n", + "$\\frac{dL}{dx}$ is 0 at $x = 0$, which is a saddle point and at $x = - \\frac{9}{8}$, which is the global minimum. Therefore, the loss function is optimized at $x^\\star = - \\frac{9}{8}$." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VCtJaUo6Ry8V" + }, + "outputs": [], + "source": [ + "x_vals = tf.linspace(-2, 2, 201)\n", + "x_vals = tf.cast(x_vals, tf.float32)\n", + "\n", + "def loss(x):\n", + " return 2*(x**4) + 3*(x**3) + 2\n", + "\n", + "def grad(f, x):\n", + " with tf.GradientTape() as tape:\n", + " tape.watch(x)\n", + " result = f(x)\n", + " return tape.gradient(result, x)\n", + "\n", + "plt.plot(x_vals, loss(x_vals), c='k', label = \"Loss function\")\n", + "plt.plot(x_vals, grad(loss, x_vals), c='tab:blue', label = \"Gradient function\")\n", + "plt.plot(0, loss(0), marker=\"o\", c='g', label = \"Inflection point\")\n", + "plt.plot(-9/8, loss(-9/8), marker=\"o\", c='r', label = \"Global minimum\")\n", + "plt.legend()\n", + "plt.ylim(0,5)\n", + "plt.xlabel(\"x\")\n", + "plt.ylabel(\"loss\")\n", + "plt.title(\"Sample loss function and gradient\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fLlIBJ9yuwhE" + }, + "source": [ + "Write a function to test the convergence of an optimizer with a single variable loss function. Assume that convergence has been achieved when the updated parameter's value at timestep $t$ is the same as its value held at timestep $t-1$. Terminate the test after a set number of iterations and also keep track of any exploding gradients during the process. In order to truly challenge the optimization algorithm, initialize the parameter poorly. In the above example, $x = 2$ is a good choice since it involves an steep gradient and also leads into an inflection point." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SLQTc41ouv0F" + }, + "outputs": [], + "source": [ + "def convergence_test(optimizer, loss_fn, grad_fn=grad, init_val=2., max_iters=2000):\n", + " # Function for optimizer convergence test\n", + " print(optimizer.title)\n", + " print(\"-------------------------------\")\n", + " # Initializing variables and structures\n", + " x_star = tf.Variable(init_val)\n", + " param_path = []\n", + " converged = False\n", + "\n", + " for iter in range(1, max_iters + 1):\n", + " x_grad = grad_fn(loss_fn, x_star)\n", + "\n", + " # Case for exploding gradient\n", + " if tf.math.is_nan(x_grad):\n", + " print(f\"Gradient exploded at iteration {iter}\\n\")\n", + " return []\n", + "\n", + " # Updating the variable and storing its old-version\n", + " x_old = x_star.numpy()\n", + " optimizer.apply_gradients([x_grad], [x_star])\n", + " param_path.append(x_star.numpy())\n", + "\n", + " # Checking for convergence\n", + " if x_star == x_old:\n", + " print(f\"Converged in {iter} iterations\\n\")\n", + " converged = True\n", + " break\n", + " \n", + " # Print early termination message\n", + " if not converged:\n", + " print(f\"Exceeded maximum of {max_iters} iterations. Test terminated.\\n\")\n", + " return param_path" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vK-7_TsmyAgI" + }, + "source": [ + "Test the convergence of the gradient descent optimizer for the following learning rates: 1e-3, 1e-2, 1e-1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lWRn8c91mqB0" + }, + "outputs": [], + "source": [ + "param_map_gd = {}\n", + "learning_rates = [1e-3, 1e-2, 1e-1]\n", + "for learning_rate in learning_rates:\n", + " param_map_gd[learning_rate] = (convergence_test(\n", + " GradientDescent(learning_rate=learning_rate), loss_fn=loss))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TydrGHF5y6iI" + }, + "source": [ + "Visualize the path of the parameters over a contour plot of the loss function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "piffzGHI_u5G" + }, + "outputs": [], + "source": [ + "def viz_paths(param_map, x_vals, loss_fn, title, max_iters=2000):\n", + " # Creating a controur plot of the loss function\n", + " t_vals = tf.range(1., max_iters + 100.)\n", + " t_grid, x_grid = tf.meshgrid(t_vals, x_vals)\n", + " loss_grid = tf.math.log(loss_fn(x_grid))\n", + " plt.pcolormesh(t_vals, x_vals, loss_grid, vmin=0, shading='nearest')\n", + " colors = ['r', 'w', 'c']\n", + " # Plotting the parameter paths over the contour plot\n", + " for i, learning_rate in enumerate(param_map):\n", + " param_path = param_map[learning_rate]\n", + " if len(param_path) > 0:\n", + " x_star = param_path[-1]\n", + " plt.plot(t_vals[:len(param_path)], param_path, c=colors[i])\n", + " plt.plot(len(param_path), x_star, marker='o', c=colors[i], \n", + " label = f\"x*: learning rate={learning_rate}\")\n", + " plt.xlabel(\"Iterations\")\n", + " plt.ylabel(\"Parameter value\")\n", + " plt.legend()\n", + " plt.title(f\"{title} parameter paths\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ssyj2sO4BcNY" + }, + "outputs": [], + "source": [ + "viz_paths(param_map_gd, x_vals, loss, \"Gradient descent\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MmM-5eDLFnmC" + }, + "source": [ + "Gradient descent seems to get stuck at the inflection point when using smaller learning rates. Increasing the learning rate can encourage faster movement around the plateau region due to a larger step size; however, this comes at the risk of having exploding gradients in early iterations when the loss function is extremely steep." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m5CDeXN8S1SF" + }, + "source": [ + "## Gradient descent with momentum\n", + "\n", + "Gradient descent with momentum not only uses the gradient to update a variable but also involves the change in position of a variable based on its previous update. The momentum parameter determines the level of influence the update at timestep $t-1$ has on the update at timestep $t$. Accumulating momentum helps to move variables past plataeu regions faster than basic gradient descent. The momentum update rule is as follows:\n", + "\n", + "$$\\Delta_x^{[t]} = lr \\cdot L^\\prime(x^{[t-1]}) + p \\cdot \\Delta_x^{[t-1]}$$\n", + "\n", + "$$x^{[t]} = x^{[t-1]} - \\Delta_x^{[t]}$$\n", + "\n", + "where\n", + "\n", + "* $x$: the variable being optimized\n", + "* $\\Delta_x$: change in $x$ \n", + "* $lr$: learning rate\n", + "* $L^\\prime(x)$: gradient of the loss function with respect to x\n", + "* $p$: momentum parameter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rOBY8Tz4S0dX" + }, + "outputs": [], + "source": [ + "class Momentum(tf.Module):\n", + "\n", + " def __init__(self, learning_rate=1e-3, momentum=0.7):\n", + " # Initialize parameters\n", + " self.learning_rate = learning_rate\n", + " self.momentum = momentum\n", + " self.change = 0.\n", + " self.title = f\"Gradient descent optimizer: learning rate={self.learning_rate}\"\n", + "\n", + " def apply_gradients(self, grads, vars):\n", + " # Update variables \n", + " for grad, var in zip(grads, vars):\n", + " curr_change = self.learning_rate*grad + self.momentum*self.change\n", + " var.assign_sub(curr_change)\n", + " self.change = curr_change" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t_nDu38gW6Fu" + }, + "source": [ + "Test the convergence of the momentum optimizer for the following learning rates: 1e-3, 1e-2, 1e-1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tA6oQL-sW2xg" + }, + "outputs": [], + "source": [ + "param_map_mtm = {}\n", + "learning_rates = [1e-3, 1e-2, 1e-1]\n", + "for learning_rate in learning_rates:\n", + " param_map_mtm[learning_rate] = (convergence_test(\n", + " Momentum(learning_rate=learning_rate),\n", + " loss_fn=loss, grad_fn=grad))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wz_LV0EPYE6k" + }, + "source": [ + "Visualize the path of the parameters over a contour plot of the loss function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qbW1eEKaX3T9" + }, + "outputs": [], + "source": [ + "viz_paths(param_map_mtm, x_vals, loss, \"Momentum\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4bEFnhPRTBXh" + }, + "source": [ + "## Adaptive moment estimation (Adam)\n", + "\n", + "The Adaptive Moment Estimation (Adam) algorithm is an efficient and highly generalizable optimization technique that leverages two key gradient descent methedologies: momentum, and root mean square propogation (RMSP). Momentum helps accelerate gradient descent by using the first moment (sum of gradients) along with a decay parameter. RMSP is similar; however, it leverages the second moment (sum of gradients squared). \n", + "\n", + "The Adam algorithm combines both the first and second moment to provide a more generalizable update rule. The sign of a variable, $x$, can be determined by computing $\\frac{x}{\\sqrt{x^2}}$. The Adam optimizer uses this fact to calculate an update step which is effectively a smoothed sign. Instead of calculating $\\frac{x}{\\sqrt{x^2}}$, the optimizer calculates a smoothed version of $x$ (first moment) and $x^2$ (second moment) for each variable update. \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WjgyqRiZ7XhA" + }, + "source": [ + "**Adam algorithm**\n", + "\n", + "$\\beta_1 \\gets 0.9 \\; \\triangleright \\text{literature value}$\n", + "\n", + "$\\beta_2 \\gets 0.999 \\; \\triangleright \\text{literature value}$\n", + "\n", + "$lr \\gets \\text{1e-3} \\; \\triangleright \\text{configurable learning rate}$\n", + "\n", + "$\\epsilon \\gets \\text{1e-7} \\; \\triangleright \\text{prevents divide by 0 error}$\n", + "\n", + "$V_{dv} \\gets \\vec {\\underset{n\\times1}{0}} \\;\\triangleright \\text{stores momentum updates for each variable}$\n", + "\n", + "$S_{dv} \\gets \\vec {\\underset{n\\times1}{0}} \\; \\triangleright \\text{stores RMSP updates for each variable}$\n", + "\n", + "$t \\gets 1$\n", + "\n", + "$\\text{On iteration } t:$\n", + "\n", + "$\\;\\;\\;\\; \\text{For} (\\frac{dL}{dv}, v) \\text{ in gradient variable pairs}:$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; V_{dv\\_i} = \\beta_1V_{dv\\_i} + (1 - \\beta_1)\\frac{dL}{dv} \\; \\triangleright \\text{momentum update}$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; S_{dv\\_i} = \\beta_2V_{dv\\_i} + (1 - \\beta_2)(\\frac{dL}{dv})^2 \\; \\triangleright \\text{RMSP update}$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; v_{dv}^{bc} = \\frac{V_{dv\\_i}}{(1-\\beta_1)^t} \\; \\triangleright \\text{momentum bias correction}$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; s_{dv}^{bc} = \\frac{S_{dv\\_i}}{(1-\\beta_2)^t} \\; \\triangleright \\text{RMSP bias correction}$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; v = v - lr\\frac{v_{dv}^{bc}}{\\sqrt{s_{dv}^{bc}} + \\epsilon} \\; \\triangleright \\text{parameter update}$\n", + "\n", + "$\\;\\;\\;\\;\\;\\;\\;\\; t = t + 1$\n", + "\n", + "**End of algorithm**\n", + "\n", + "Given that $V_{dv}$ and $S_{dv}$ are initialized to 0 and that $\\beta_1$ and $\\beta_2$ are close to 1, the momentum and RMSP updates are naturally biased towards 0; therefore, the variables can benefit from bias correction. Bias correction also helps to control the osccilation of weights as they approach the global minimum." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hm5vffRJRsEc" + }, + "outputs": [], + "source": [ + "class Adam(tf.Module):\n", + " \n", + " def __init__(self, learning_rate=1e-3, beta_1=0.9, beta_2=0.999, ep=1e-7):\n", + " # Initialize the Adam parameters\n", + " self.beta_1 = beta_1\n", + " self.beta_2 = beta_2\n", + " self.learning_rate = learning_rate\n", + " self.ep = ep\n", + " self.t = 1.\n", + " self.v_dvar, self.s_dvar = [], []\n", + " self.title = f\"Adam: learning rate={self.learning_rate}\"\n", + " self.built = False\n", + "\n", + " def apply_gradients(self, grads, vars):\n", + " # Set up moment and RMSprop slots for each variable on the first call\n", + " if not self.built:\n", + " for var in vars:\n", + " v = tf.Variable(tf.zeros(shape=var.shape))\n", + " s = tf.Variable(tf.zeros(shape=var.shape))\n", + " self.v_dvar.append(v)\n", + " self.s_dvar.append(s)\n", + " self.built = True\n", + " # Perform Adam updates\n", + " for i, (d_var, var) in enumerate(zip(grads, vars)):\n", + " # Moment calculation\n", + " self.v_dvar[i] = self.beta_1*self.v_dvar[i] + (1-self.beta_1)*d_var\n", + " # RMSprop calculation\n", + " self.s_dvar[i] = self.beta_2*self.s_dvar[i] + (1-self.beta_2)*tf.square(d_var)\n", + " # Bias correction\n", + " v_dvar_bc = self.v_dvar[i]/(1-(self.beta_1**self.t))\n", + " s_dvar_bc = self.s_dvar[i]/(1-(self.beta_2**self.t))\n", + " # Update model variables\n", + " var.assign_sub(self.learning_rate*(v_dvar_bc/(tf.sqrt(s_dvar_bc) + self.ep)))\n", + " # Increment the iteration counter\n", + " self.t += 1." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UWN4Qus7flUO" + }, + "source": [ + "Test the performance of the Adam optimizer with the same learning rates used with the gradient descent examples. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GXHCxtemFBpR" + }, + "outputs": [], + "source": [ + "param_map_adam = {}\n", + "learning_rates = [1e-3, 1e-2, 1e-1]\n", + "for learning_rate in learning_rates:\n", + " param_map_adam[learning_rate] = (convergence_test(\n", + " Adam(learning_rate=learning_rate), loss_fn=loss))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jgpUcs_xXEjX" + }, + "source": [ + "Visualize the path of the parameters over a contour plot of the loss function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ctvOUmlzFK8s" + }, + "outputs": [], + "source": [ + "viz_paths(param_map_adam, x_vals, loss, \"Adam\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_oGScF8zJcY4" + }, + "source": [ + "In this particular example, the Adam optimizer has slower convergence compared to traditional gradient descent when using small learning rates. However, the algorithm successfully moves past the plataeu region and converges to the global minimum when a larger learning rate. Exploding gradients are no longer an issue due to Adam's dynamic scaling of learning rates when encountering large gradients." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VFLfEH4ManbW" + }, + "source": [ + "## Conclusion\n", + "\n", + "This notebook introduced the basics of writing and comparing optimizers with the [TensorFlow Core APIs](https://www.tensorflow.org/guide/core). Although prebuilt optimizers like Adam are generalizable, they may not always be the best choice for every model or dataset. Having fine-grained control over the optimization process can help streamline ML training workflows and improve overall performance. Refer to the following documentation for more examples of custom optimizers:\n", + "\n", + "* This Adam optimizer is used in the [Multilayer perceptrons](https://www.tensorflow.org/guide/core/mlp_core) tutorial and the [Distributed training]()\n", + "* [Model Garden](https://blog.tensorflow.org/2020/03/introducing-model-garden-for-tensorflow-2.html) has a variety of [custom optimizers](https://github.com/tensorflow/models/tree/master/official/modeling/optimization) written with the Core APIs.\n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "optimizers_core.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/core/quickstart_core.ipynb b/site/en/guide/core/quickstart_core.ipynb new file mode 100644 index 00000000000..70586fd3f0c --- /dev/null +++ b/site/en/guide/core/quickstart_core.ipynb @@ -0,0 +1,591 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "rX8mhOLljYeM" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "BZSlp3DAjdYf" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3wF5wszaj97Y" + }, + "source": [ + "# Quickstart for the TensorFlow Core APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DUNzJc4jTj6G" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "04QgGZc9bF5D" + }, + "source": [ + "This quickstart tutorial demonstrates how you can use the [TensorFlow Core low-level APIs](https://www.tensorflow.org/guide/core) to build and train a multiple linear regression model that predicts fuel efficiency. It uses the [Auto MPG](https://archive.ics.uci.edu/ml/datasets/auto+mpg) dataset which contains fuel efficiency data for late-1970s and early 1980s automobiles.\n", + "\n", + "You will follow the typical stages of a machine learning process:\n", + "\n", + "1. Load the dataset.\n", + "2. Build an [input pipeline](../data.ipynb).\n", + "3. Build a multiple [linear regression](https://developers.google.com/machine-learning/glossary#linear-regression) model.\n", + "4. Evaluate the performance of the model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nnrWf3PCEzXL" + }, + "source": [ + "## Setup\n", + "\n", + "Import TensorFlow and other necessary libraries to get started:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0trJmd6DjqBZ" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import pandas as pd\n", + "import matplotlib\n", + "from matplotlib import pyplot as plt\n", + "print(\"TensorFlow version:\", tf.__version__)\n", + "# Set a random seed for reproducible results \n", + "tf.random.set_seed(22)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7NAbSZiaoJ4z" + }, + "source": [ + "## Load and preprocess the dataset\n", + "\n", + "Next, you need to load and preprocess the [Auto MPG dataset](https://archive.ics.uci.edu/ml/datasets/auto+mpg) from the [UCI Machine Learning Repository](https://archive.ics.uci.edu/ml/). This dataset uses a variety of quantitative and categorical features such as cylinders, displacement, horsepower and weight to predict the fuel efficiencies of automobiles in the late-1970s and early 1980s.\n", + "\n", + "The dataset contains a few unknown values. Make sure to drop any missing values with `pandas.DataFrame.dropna`, and convert the dataset to a `tf.float32` tensor type with the `tf.convert_to_tensor` and `tf.cast` functions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HglhDsUfrJ98" + }, + "outputs": [], + "source": [ + "url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data'\n", + "column_names = ['MPG', 'Cylinders', 'Displacement', 'Horsepower', 'Weight',\n", + " 'Acceleration', 'Model Year', 'Origin']\n", + "\n", + "dataset = pd.read_csv(url, names=column_names, na_values='?', comment='\\t',\n", + " sep=' ', skipinitialspace=True)\n", + "\n", + "dataset = dataset.dropna()\n", + "dataset_tf = tf.convert_to_tensor(dataset, dtype=tf.float32)\n", + "dataset.tail()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0vgoDL3hYesB" + }, + "source": [ + "Next, split the dataset into training and test sets. Make sure to shuffle the dataset with `tf.random.shuffle` to avoid biased splits." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0mJU4kt6YiAp" + }, + "outputs": [], + "source": [ + "dataset_shuffled = tf.random.shuffle(dataset_tf, seed=22)\n", + "train_data, test_data = dataset_shuffled[100:], dataset_shuffled[:100]\n", + "x_train, y_train = train_data[:, 1:], train_data[:, 0]\n", + "x_test, y_test = test_data[:, 1:], test_data[:, 0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Bscb2Vsbi3TE" + }, + "source": [ + "Perform basic feature engineering by one-hot-encoding the `\"Origin\"` feature. The `tf.one_hot` function is useful for transforming this categorical column into 3 separate binary columns." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_B8N9IV1i6IV" + }, + "outputs": [], + "source": [ + "def onehot_origin(x):\n", + " origin = tf.cast(x[:, -1], tf.int32)\n", + " # Use `origin - 1` to account for 1-indexed feature\n", + " origin_oh = tf.one_hot(origin - 1, 3)\n", + " x_ohe = tf.concat([x[:, :-1], origin_oh], axis = 1)\n", + " return x_ohe\n", + "\n", + "x_train_ohe, x_test_ohe = onehot_origin(x_train), onehot_origin(x_test)\n", + "x_train_ohe.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qnoCDzzedite" + }, + "source": [ + "This example shows a multiple regression problem with predictors or features on vastly different scales. Therefore, it is beneficial to standardize the data so that each feature has zero mean and unit variance. Use the `tf.reduce_mean` and `tf.math.reduce_std` functions for standardization. The regression model's prediction can then be unstandardized to obtain its value in terms of the original units." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dJJFdvqydhyp" + }, + "outputs": [], + "source": [ + "class Normalize(tf.Module):\n", + " def __init__(self, x):\n", + " # Initialize the mean and standard deviation for normalization\n", + " self.mean = tf.math.reduce_mean(x, axis=0)\n", + " self.std = tf.math.reduce_std(x, axis=0)\n", + "\n", + " def norm(self, x):\n", + " # Normalize the input\n", + " return (x - self.mean)/self.std\n", + "\n", + " def unnorm(self, x):\n", + " # Unnormalize the input\n", + " return (x * self.std) + self.mean" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5BONV6fYYwZb" + }, + "outputs": [], + "source": [ + "norm_x = Normalize(x_train_ohe)\n", + "norm_y = Normalize(y_train)\n", + "x_train_norm, y_train_norm = norm_x.norm(x_train_ohe), norm_y.norm(y_train)\n", + "x_test_norm, y_test_norm = norm_x.norm(x_test_ohe), norm_y.norm(y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BPZ68wASog_I" + }, + "source": [ + "## Build a machine learning model\n", + "\n", + "Build a linear regression model with the TensorFlow Core APIs. The equation for multiple linear regression is as follows:\n", + "\n", + "$${\\mathrm{Y}} = {\\mathrm{X}}w + b$$\n", + "\n", + "where\n", + "\n", + "* $\\underset{m\\times 1}{\\mathrm{Y}}$: target vector\n", + "* $\\underset{m\\times n}{\\mathrm{X}}$: feature matrix\n", + "* $\\underset{n\\times 1}w$: weight vector\n", + "* $b$: bias\n", + "\n", + "By using the `@tf.function` decorator, the corresponding Python code is traced to generate a callable TensorFlow graph. This approach is beneficial for saving and loading the model after training. It can also provide a performance boost for models with many layers and complex operations. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "h3IKyzTCDNGo" + }, + "outputs": [], + "source": [ + "class LinearRegression(tf.Module):\n", + "\n", + " def __init__(self):\n", + " self.built = False\n", + "\n", + " @tf.function\n", + " def __call__(self, x):\n", + " # Initialize the model parameters on the first call\n", + " if not self.built:\n", + " # Randomly generate the weight vector and bias term\n", + " rand_w = tf.random.uniform(shape=[x.shape[-1], 1])\n", + " rand_b = tf.random.uniform(shape=[])\n", + " self.w = tf.Variable(rand_w)\n", + " self.b = tf.Variable(rand_b)\n", + " self.built = True\n", + " y = tf.add(tf.matmul(x, self.w), self.b)\n", + " return tf.squeeze(y, axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l2hiez2eIUz8" + }, + "source": [ + "For each example, the model returns a prediction for the input automobile's MPG by computing the weighted sum of its features plus a bias term. This prediction can then be unstandardized to obtain its value in terms of the original units." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OeOrNdnkEEcR" + }, + "outputs": [], + "source": [ + "lin_reg = LinearRegression()\n", + "prediction = lin_reg(x_train_norm[:1])\n", + "prediction_unnorm = norm_y.unnorm(prediction)\n", + "prediction_unnorm.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FIHANxNSvWr9" + }, + "source": [ + "## Define a loss function\n", + "\n", + "Now, define a loss function to evaluate the model's performance during the training process.\n", + "\n", + "Since regression problems deal with continuous outputs, the mean squared error (MSE) is an ideal choice for the loss function. The MSE is defined by the following equation:\n", + "\n", + "$$MSE = \\frac{1}{m}\\sum_{i=1}^{m}(\\hat{y}_i -y_i)^2$$\n", + "\n", + "where\n", + "\n", + "* $\\hat{y}$: vector of predictions\n", + "* $y$: vector of true targets\n", + "\n", + "The goal of this regression problem is to find the optimal weight vector, $w$, and bias, $b$, that minimizes the MSE loss function. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8tYNVUkmw35s" + }, + "outputs": [], + "source": [ + "def mse_loss(y_pred, y):\n", + " return tf.reduce_mean(tf.square(y_pred - y))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "htI-7aJPqclK" + }, + "source": [ + "## Train and evaluate your model\n", + "\n", + "Using mini-batches for training provides both memory efficiency and faster convergence. The `tf.data.Dataset` API has useful functions for batching and shuffling. The API enables you to build complex input pipelines from simple, reusable pieces. Learn more about building TensorFlow input pipelines in [this guide](https://www.tensorflow.org/guide/data)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kxST2w_Nq0C5" + }, + "outputs": [], + "source": [ + "batch_size = 64\n", + "train_dataset = tf.data.Dataset.from_tensor_slices((x_train_norm, y_train_norm))\n", + "train_dataset = train_dataset.shuffle(buffer_size=x_train.shape[0]).batch(batch_size)\n", + "test_dataset = tf.data.Dataset.from_tensor_slices((x_test_norm, y_test_norm))\n", + "test_dataset = test_dataset.shuffle(buffer_size=x_test.shape[0]).batch(batch_size)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C9haUW8Yq3xD" + }, + "source": [ + "Next, write a training loop to iteratively update your model's parameters by making use of the MSE loss function and its gradients with respect to the input parameters.\n", + "\n", + "This iterative method is referred to as [gradient descent](https://developers.google.com/machine-learning/glossary#gradient-descent). At each iteration, the model's parameters are updated by taking a step in the opposite direction of their computed gradients. The size of this step is determined by the learning rate, which is a configurable hyperparameter. Recall that the gradient of a function indicates the direction of its steepest ascent; therefore, taking a step in the opposite direction indicates the direction of steepest descent, which ultimately helps to minimize the MSE loss function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y7suUbJXVLqP" + }, + "outputs": [], + "source": [ + "# Set training parameters\n", + "epochs = 100\n", + "learning_rate = 0.01\n", + "train_losses, test_losses = [], []\n", + "\n", + "# Format training loop\n", + "for epoch in range(epochs):\n", + " batch_losses_train, batch_losses_test = [], []\n", + "\n", + " # Iterate through the training data\n", + " for x_batch, y_batch in train_dataset:\n", + " with tf.GradientTape() as tape:\n", + " y_pred_batch = lin_reg(x_batch)\n", + " batch_loss = mse_loss(y_pred_batch, y_batch)\n", + " # Update parameters with respect to the gradient calculations\n", + " grads = tape.gradient(batch_loss, lin_reg.variables)\n", + " for g,v in zip(grads, lin_reg.variables):\n", + " v.assign_sub(learning_rate * g)\n", + " # Keep track of batch-level training performance \n", + " batch_losses_train.append(batch_loss)\n", + " \n", + " # Iterate through the testing data\n", + " for x_batch, y_batch in test_dataset:\n", + " y_pred_batch = lin_reg(x_batch)\n", + " batch_loss = mse_loss(y_pred_batch, y_batch)\n", + " # Keep track of batch-level testing performance \n", + " batch_losses_test.append(batch_loss)\n", + "\n", + " # Keep track of epoch-level model performance\n", + " train_loss = tf.reduce_mean(batch_losses_train)\n", + " test_loss = tf.reduce_mean(batch_losses_test)\n", + " train_losses.append(train_loss)\n", + " test_losses.append(test_loss)\n", + " if epoch % 10 == 0:\n", + " print(f'Mean squared error for step {epoch}: {train_loss.numpy():0.3f}')\n", + "\n", + "# Output final losses\n", + "print(f\"\\nFinal train loss: {train_loss:0.3f}\")\n", + "print(f\"Final test loss: {test_loss:0.3f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4mDAAPFqVVgn" + }, + "source": [ + "Plot the changes in MSE loss over time. Calculating performance metrics on a designated [validation set](https://developers.google.com/machine-learning/glossary#validation-set) or [test set](https://developers.google.com/machine-learning/glossary#test-set) ensures the model does not overfit to the training dataset and can generalize well to unseen data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "F7dTAzgHDUh7" + }, + "outputs": [], + "source": [ + "matplotlib.rcParams['figure.figsize'] = [9, 6]\n", + "\n", + "plt.plot(range(epochs), train_losses, label = \"Training loss\")\n", + "plt.plot(range(epochs), test_losses, label = \"Testing loss\")\n", + "plt.xlabel(\"Epoch\")\n", + "plt.ylabel(\"Mean squared error loss\")\n", + "plt.legend()\n", + "plt.title(\"MSE loss vs training iterations\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Aj8NrlzlJqDG" + }, + "source": [ + "It seems like the model does a good job of fitting the training data while also generalizing well to the unseen test data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AUNIPubuPYDR" + }, + "source": [ + "## Save and load the model\n", + "\n", + "Start by making an export module that takes in raw data and performs the following operations:\n", + "- Feature extraction \n", + "- Normalization \n", + "- Prediction\n", + "- Unnormalization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "g-uOrGa9ZehG" + }, + "outputs": [], + "source": [ + "class ExportModule(tf.Module):\n", + " def __init__(self, model, extract_features, norm_x, norm_y):\n", + " # Initialize pre and postprocessing functions\n", + " self.model = model\n", + " self.extract_features = extract_features\n", + " self.norm_x = norm_x\n", + " self.norm_y = norm_y\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=[None, None], dtype=tf.float32)]) \n", + " def __call__(self, x):\n", + " # Run the ExportModule for new data points\n", + " x = self.extract_features(x)\n", + " x = self.norm_x.norm(x)\n", + " y = self.model(x)\n", + " y = self.norm_y.unnorm(y)\n", + " return y " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YPYYLQ8EZiU8" + }, + "outputs": [], + "source": [ + "lin_reg_export = ExportModule(model=lin_reg,\n", + " extract_features=onehot_origin,\n", + " norm_x=norm_x,\n", + " norm_y=norm_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6v8xi06XZWiC" + }, + "source": [ + "If you want to save the model at its current state, use the `tf.saved_model.save` function. To load a saved model for making predictions, use the `tf.saved_model.load` function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K1IvMoHbptht" + }, + "outputs": [], + "source": [ + "import tempfile\n", + "import os\n", + "\n", + "models = tempfile.mkdtemp()\n", + "save_path = os.path.join(models, 'lin_reg_export')\n", + "tf.saved_model.save(lin_reg_export, save_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rYb6DrEH0GMv" + }, + "outputs": [], + "source": [ + "lin_reg_loaded = tf.saved_model.load(save_path)\n", + "test_preds = lin_reg_loaded(x_test)\n", + "test_preds[:10].numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-47O6_GLdRuT" + }, + "source": [ + "## Conclusion\n", + "\n", + "Congratulations! You have trained a regression model using the TensorFlow Core low-level APIs.\n", + "\n", + "For more examples of using TensorFlow Core APIs, check out the following guides:\n", + "* [Logistic regression](./logistic_regression_core.ipynb) for binary classification\n", + "* [Multi-layer perceptrons](./mlp_core.ipynb) for hand-written digit recognition\n" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "rX8mhOLljYeM" + ], + "name": "quickstart_core.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/create_op.md b/site/en/guide/create_op.md index 26ca8910225..fa4f573fa32 100644 --- a/site/en/guide/create_op.md +++ b/site/en/guide/create_op.md @@ -47,7 +47,7 @@ To incorporate your custom op you'll need to: test the op in C++. If you define gradients, you can verify them with the Python `tf.test.compute_gradient_error`. See - [`relu_op_test.py`](https://www.tensorflow.org/code/tensorflow/python/kernel_tests/relu_op_test.py) as + [`relu_op_test.py`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/kernel_tests/nn_ops/relu_op_test.py) as an example that tests the forward functions of Relu-like operators and their gradients. @@ -55,8 +55,8 @@ To incorporate your custom op you'll need to: * Some familiarity with C++. * Must have installed the - [TensorFlow binary](../../install), or must have - [downloaded TensorFlow source](../../install/source.md), + [TensorFlow binary](https://www.tensorflow.org/install), or must have + [downloaded TensorFlow source](https://www.tensorflow.org/install/source), and be able to build it. ## Define the op interface @@ -152,17 +152,17 @@ REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), ZeroOutOp); > Important: Instances of your OpKernel may be accessed concurrently. > Your `Compute` method must be thread-safe. Guard any access to class > members with a mutex. Or better yet, don't share state via class members! -> Consider using a [`ResourceMgr`](https://www.tensorflow.org/code/tensorflow/core/framework/resource_mgr.h) +> Consider using a [`ResourceMgr`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/resource_mgr.h) > to keep track of op state. ### Multi-threaded CPU kernels To write a multi-threaded CPU kernel, the Shard function in -[`work_sharder.h`](https://www.tensorflow.org/code/tensorflow/core/util/work_sharder.h) +[`work_sharder.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/work_sharder.h) can be used. This function shards a computation function across the threads configured to be used for intra-op threading (see intra_op_parallelism_threads in -[`config.proto`](https://www.tensorflow.org/code/tensorflow/core/protobuf/config.proto)). +[`config.proto`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto)). ### GPU kernels @@ -360,12 +360,13 @@ g++ -std=c++14 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFL On macOS, the additional flag "-undefined dynamic_lookup" is required when building the `.so` file. -> Note on `gcc` version `>=5`: gcc uses the new C++ -> [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`. The binary pip -> packages available on the TensorFlow website are built with `gcc4` that uses -> the older ABI. If you compile your op library with `gcc>=5`, add -> `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to make the library -> compatible with the older abi. +> Note on `gcc` version `>=5`: gcc uses the new C++ +> [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`. +> TensorFlow 2.8 and earlier were built with `gcc4` that uses the older ABI. If +> you are using these versions of TensorFlow and are trying to compile your op +> library with `gcc>=5`, add `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to +> make the library compatible with the older ABI. TensorFlow 2.9+ packages are +> compatible with the newer ABI by default. ### Compile the op using bazel (TensorFlow source installation) @@ -518,16 +519,16 @@ This asserts that the input is a vector, and returns having set the * The `context`, which can either be an `OpKernelContext` or `OpKernelConstruction` pointer (see - [`tensorflow/core/framework/op_kernel.h`](https://www.tensorflow.org/code/tensorflow/core/framework/op_kernel.h)), + [`tensorflow/core/framework/op_kernel.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_kernel.h)), for its `SetStatus()` method. * The condition. For example, there are functions for validating the shape of a tensor in - [`tensorflow/core/framework/tensor_shape.h`](https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.h) + [`tensorflow/core/framework/tensor_shape.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.h) * The error itself, which is represented by a `Status` object, see - [`tensorflow/core/lib/core/status.h`](https://www.tensorflow.org/code/tensorflow/core/lib/core/status.h). A + [`tensorflow/core/platform/status.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/status.h). A `Status` has both a type (frequently `InvalidArgument`, but see the list of types) and a message. Functions for constructing an error may be found in - [`tensorflow/core/lib/core/errors.h`][validation-macros]. + [`tensorflow/core/platform/errors.h`][validation-macros]. Alternatively, if you want to test whether a `Status` object returned from some function is an error, and if so return it, use @@ -667,7 +668,7 @@ There are shortcuts for common type constraints: The specific lists of types allowed by these are defined by the functions (like `NumberTypes()`) in -[`tensorflow/core/framework/types.h`](https://www.tensorflow.org/code/tensorflow/core/framework/types.h). +[`tensorflow/core/framework/types.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.h). In this example the attr `t` must be one of the numeric types: ```c++ @@ -1225,7 +1226,7 @@ There are several ways to preserve backwards-compatibility. type into a list of varying types). The full list of safe and unsafe changes can be found in -[`tensorflow/core/framework/op_compatibility_test.cc`](https://www.tensorflow.org/code/tensorflow/core/framework/op_compatibility_test.cc). +[`tensorflow/core/framework/op_compatibility_test.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_compatibility_test.cc). If you cannot make your change to an operation backwards compatible, then create a new operation with a new name with the new semantics. @@ -1242,16 +1243,16 @@ made when TensorFlow changes major versions, and must conform to the You can implement different OpKernels and register one for CPU and another for GPU, just like you can [register kernels for different types](#polymorphism). There are several examples of kernels with GPU support in -[`tensorflow/core/kernels/`](https://www.tensorflow.org/code/tensorflow/core/kernels/). +[`tensorflow/core/kernels/`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/). Notice some kernels have a CPU version in a `.cc` file, a GPU version in a file ending in `_gpu.cu.cc`, and some code shared in common in a `.h` file. For example, the `tf.pad` has everything but the GPU kernel in [`tensorflow/core/kernels/pad_op.cc`][pad_op]. The GPU kernel is in -[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op_gpu.cu.cc), +[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/pad_op_gpu.cu.cc), and the shared code is a templated class defined in -[`tensorflow/core/kernels/pad_op.h`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.h). +[`tensorflow/core/kernels/pad_op.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/pad_op.h). We organize the code this way for two reasons: it allows you to share common code among the CPU and GPU implementations, and it puts the GPU implementation into a separate file so that it can be compiled only by the GPU compiler. @@ -1272,16 +1273,16 @@ kept on the CPU, add a `HostMemory()` call to the kernel registration, e.g.: #### Compiling the kernel for the GPU device Look at -[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc) +[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc) for an example that uses a CUDA kernel to implement an op. The `tf_custom_op_library` accepts a `gpu_srcs` argument in which the list of source files containing the CUDA kernels (`*.cu.cc` files) can be specified. For use with a binary installation of TensorFlow, the CUDA kernels have to be compiled with NVIDIA's `nvcc` compiler. Here is the sequence of commands you can use to compile the -[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc) +[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc) and -[cuda_op_kernel.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cc) +[cuda_op_kernel.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/adding_an_op/cuda_op_kernel.cc) into a single dynamically loadable library: ```bash @@ -1379,6 +1380,13 @@ Note that at the time the gradient function is called, only the data flow graph of ops is available, not the tensor data itself. Thus, all computation must be performed using other tensorflow ops, to be run at graph execution time. +Add type hints when registering the custom gradient for an op type to make the +code more readable, debuggable, easier to maintain, and more robust through data +validation. For example, when taking an `op` as a parameter in a function, +specify that the gradient function will take an +tf.Operation +as its parameter type. + ### Shape functions in C++ The TensorFlow API has a feature called "shape inference" that provides @@ -1404,7 +1412,7 @@ be set to the first input's shape. If the output is selected by its index as in There are a number of common shape functions that apply to many ops, such as `shape_inference::UnchangedShape` which can be -found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/framework/common_shape_fns.h) and used as follows: +found in [common_shape_fns.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/common_shape_fns.h) and used as follows: ```c++ REGISTER_OP("ZeroOut") @@ -1451,7 +1459,7 @@ provides access to the attributes of the op). Since shape inference is an optional feature, and the shapes of tensors may vary dynamically, shape functions must be robust to incomplete shape information for -any of the inputs. The `Merge` method in [`InferenceContext`](https://www.tensorflow.org/code/tensorflow/core/framework/shape_inference.h) +any of the inputs. The `Merge` method in [`InferenceContext`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/shape_inference.h) allows the caller to assert that two shapes are the same, even if either or both of them do not have complete information. Shape functions are defined for all of the core TensorFlow ops and provide many different usage examples. @@ -1476,7 +1484,7 @@ If you have a complicated shape function, you should consider adding a test for validating that various input shape combinations produce the expected output shape combinations. You can see examples of how to write these tests in some our -[core ops tests](https://www.tensorflow.org/code/tensorflow/core/ops/array_ops_test.cc). +[core ops tests](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/array_ops_test.cc). (The syntax of `INFER_OK` and `INFER_ERROR` are a little cryptic, but try to be compact in representing input and output shape specifications in tests. For now, see the surrounding comments in those tests to get a sense of the shape @@ -1489,20 +1497,20 @@ To build a `pip` package for your op, see the guide shows how to build custom ops from the TensorFlow pip package instead of building TensorFlow from source. -[core-array_ops]:https://www.tensorflow.org/code/tensorflow/core/ops/array_ops.cc -[python-user_ops]:https://www.tensorflow.org/code/tensorflow/python/user_ops/user_ops.py -[tf-kernels]:https://www.tensorflow.org/code/tensorflow/core/kernels/ -[user_ops]:https://www.tensorflow.org/code/tensorflow/core/user_ops/ -[pad_op]:https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.cc -[standard_ops-py]:https://www.tensorflow.org/code/tensorflow/python/ops/standard_ops.py -[standard_ops-cc]:https://www.tensorflow.org/code/tensorflow/cc/ops/standard_ops.h -[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD -[validation-macros]:https://www.tensorflow.org/code/tensorflow/core/lib/core/errors.h -[op_def_builder]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.h -[register_types]:https://www.tensorflow.org/code/tensorflow/core/framework/register_types.h -[FinalizeAttr]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.cc -[DataTypeString]:https://www.tensorflow.org/code/tensorflow/core/framework/types.cc -[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD -[types-proto]:https://www.tensorflow.org/code/tensorflow/core/framework/types.proto -[TensorShapeProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.proto -[TensorProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor.proto +[core-array_ops]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/array_ops.cc +[python-user_ops]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/user_ops/user_ops.py +[tf-kernels]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/ +[user_ops]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/user_ops/ +[pad_op]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/kernels/pad_op.cc +[standard_ops-py]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/standard_ops.py +[standard_ops-cc]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/ops/standard_ops.h +[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/BUILD +[validation-macros]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/errors.h +[op_def_builder]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def_builder.h +[register_types]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/register_types.h +[FinalizeAttr]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def_builder.cc +[DataTypeString]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.cc +[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/BUILD +[types-proto]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto +[TensorShapeProto]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto +[TensorProto]:https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto diff --git a/site/en/guide/data.ipynb b/site/en/guide/data.ipynb index fd3232be1b9..739ef131005 100644 --- a/site/en/guide/data.ipynb +++ b/site/en/guide/data.ipynb @@ -15,7 +15,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "cellView": "form", "id": "llMNufAK7nfK" }, "outputs": [], @@ -139,8 +138,8 @@ "\n", "Once you have a `Dataset` object, you can *transform* it into a new `Dataset` by\n", "chaining method calls on the `tf.data.Dataset` object. For example, you can\n", - "apply per-element transformations such as `Dataset.map()`, and multi-element\n", - "transformations such as `Dataset.batch()`. See the documentation for\n", + "apply per-element transformations such as `Dataset.map`, and multi-element\n", + "transformations such as `Dataset.batch`. Refer to the documentation for\n", "`tf.data.Dataset` for a complete list of transformations.\n", "\n", "The `Dataset` object is a Python iterable. This makes it possible to consume its\n", @@ -238,9 +237,9 @@ "structure of elements include `tuple`, `dict`, `NamedTuple`, and\n", "`OrderedDict`. In particular, `list` is not a valid construct for\n", "expressing the structure of dataset elements. This is because\n", - "early tf.data users felt strongly about `list` inputs (e.g. passed\n", + "early `tf.data` users felt strongly about `list` inputs (for example, when passed\n", "to `tf.data.Dataset.from_tensors`) being automatically packed as\n", - "tensors and `list` outputs (e.g. return values of user-defined\n", + "tensors and `list` outputs (for example, return values of user-defined\n", "functions) being coerced into a `tuple`. As a consequence, if you\n", "would like a `list` input to be treated as a structure, you need\n", "to convert it into `tuple` and if you would like a `list` output\n", @@ -328,7 +327,7 @@ }, "source": [ "The `Dataset` transformations support datasets of any structure. When using the\n", - "`Dataset.map()`, and `Dataset.filter()` transformations,\n", + "`Dataset.map`, and `Dataset.filter` transformations,\n", "which apply a function to each element, the element structure determines the\n", "arguments of the function:" ] @@ -416,11 +415,11 @@ "source": [ "### Consuming NumPy arrays\n", "\n", - "See [Loading NumPy arrays](../tutorials/load_data/numpy.ipynb) for more examples.\n", + "Refer to the [Loading NumPy arrays](../tutorials/load_data/numpy.ipynb) tutorial for more examples.\n", "\n", "If all of your input data fits in memory, the simplest way to create a `Dataset`\n", "from them is to convert them to `tf.Tensor` objects and use\n", - "`Dataset.from_tensor_slices()`." + "`Dataset.from_tensor_slices`." ] }, { @@ -472,7 +471,7 @@ "\n", "Another common data source that can easily be ingested as a `tf.data.Dataset` is the python generator.\n", "\n", - "Caution: While this is a convienient approach it has limited portability and scalibility. It must run in the same python process that created the generator, and is still subject to the Python [GIL](https://en.wikipedia.org/wiki/Global_interpreter_lock)." + "Caution: While this is a convenient approach it has limited portability and scalability. It must run in the same python process that created the generator, and is still subject to the Python [GIL](https://en.wikipedia.org/wiki/Global_interpreter_lock)." ] }, { @@ -548,7 +547,7 @@ "\n", "It's also important to note that the `output_shapes` and `output_types` follow the same nesting rules as other dataset methods.\n", "\n", - "Here is an example generator that demonstrates both aspects, it returns tuples of arrays, where the second array is a vector with unknown length." + "Here is an example generator that demonstrates both aspects: it returns tuples of arrays, where the second array is a vector with unknown length." ] }, { @@ -589,7 +588,7 @@ "source": [ "The first output is an `int32` the second is a `float32`.\n", "\n", - "The first item is a scalar, shape `()`, and the second is a vector of unknown length, shape `(None,)` " + "The first item is a scalar, shape `()`, and the second is a vector of unknown length, shape `(None,)`" ] }, { @@ -601,8 +600,8 @@ "outputs": [], "source": [ "ds_series = tf.data.Dataset.from_generator(\n", - " gen_series, \n", - " output_types=(tf.int32, tf.float32), \n", + " gen_series,\n", + " output_types=(tf.int32, tf.float32),\n", " output_shapes=((), (None,)))\n", "\n", "ds_series" @@ -710,8 +709,8 @@ "outputs": [], "source": [ "ds = tf.data.Dataset.from_generator(\n", - " lambda: img_gen.flow_from_directory(flowers), \n", - " output_types=(tf.float32, tf.float32), \n", + " lambda: img_gen.flow_from_directory(flowers),\n", + " output_types=(tf.float32, tf.float32),\n", " output_shapes=([32,256,256,3], [32,5])\n", ")\n", "\n", @@ -726,7 +725,7 @@ }, "outputs": [], "source": [ - "for images, label in ds.take(1):\n", + "for images, labels in ds.take(1):\n", " print('images.shape: ', images.shape)\n", " print('labels.shape: ', labels.shape)\n" ] @@ -739,7 +738,7 @@ "source": [ "### Consuming TFRecord data\n", "\n", - "See [Loading TFRecords](../tutorials/load_data/tfrecord.ipynb) for an end-to-end example.\n", + "Refer to the [Loading TFRecords](../tutorials/load_data/tfrecord.ipynb) tutorial for an end-to-end example.\n", "\n", "The `tf.data` API supports a variety of file formats so that you can process\n", "large datasets that do not fit in memory. For example, the TFRecord file format\n", @@ -825,7 +824,7 @@ "source": [ "### Consuming text data\n", "\n", - "See [Loading Text](../tutorials/load_data/text.ipynb) for an end to end example.\n", + "Refer to the [Load text](../tutorials/load_data/text.ipynb) tutorial for an end-to-end example.\n", "\n", "Many datasets are distributed as one or more text files. The\n", "`tf.data.TextLineDataset` provides an easy way to extract lines from one or more\n", @@ -916,7 +915,7 @@ "source": [ "By default, a `TextLineDataset` yields *every* line of each file, which may\n", "not be desirable, for example, if the file starts with a header line, or contains comments. These lines can be removed using the `Dataset.skip()` or\n", - "`Dataset.filter()` transformations. Here, you skip the first line, then filter to\n", + "`Dataset.filter` transformations. Here, you skip the first line, then filter to\n", "find only survivors." ] }, @@ -985,7 +984,7 @@ "id": "ChDHNi3qbDch" }, "source": [ - "See [Loading CSV Files](../tutorials/load_data/csv.ipynb), and [Loading Pandas DataFrames](../tutorials/load_data/pandas_dataframe.ipynb) for more examples. \n", + "Refer to the [Loading CSV Files](../tutorials/load_data/csv.ipynb) and [Loading Pandas DataFrames](../tutorials/load_data/pandas_dataframe.ipynb) tutorials for more examples.\n", "\n", "The CSV file format is a popular format for storing tabular data in plain text.\n", "\n", @@ -1045,11 +1044,11 @@ "id": "47yippqaHFk6" }, "source": [ - "A more scalable approach is to load from disk as necessary. \n", + "A more scalable approach is to load from disk as necessary.\n", "\n", "The `tf.data` module provides methods to extract records from one or more CSV files that comply with [RFC 4180](https://tools.ietf.org/html/rfc4180).\n", "\n", - "The `experimental.make_csv_dataset` function is the high level interface for reading sets of csv files. It supports column type inference and many other features, like batching and shuffling, to make usage simple." + "The `tf.data.experimental.make_csv_dataset` function is the high-level interface for reading sets of CSV files. It supports column type inference and many other features, like batching and shuffling, to make usage simple." ] }, { @@ -1122,7 +1121,7 @@ "id": "TSVgJJ1HJD6M" }, "source": [ - "There is also a lower-level `experimental.CsvDataset` class which provides finer grained control. It does not support column type inference. Instead you must specify the type of each column. " + "There is also a lower-level `experimental.CsvDataset` class which provides finer grained control. It does not support column type inference. Instead you must specify the type of each column." ] }, { @@ -1133,7 +1132,7 @@ }, "outputs": [], "source": [ - "titanic_types = [tf.int32, tf.string, tf.float32, tf.int32, tf.int32, tf.float32, tf.string, tf.string, tf.string, tf.string] \n", + "titanic_types = [tf.int32, tf.string, tf.float32, tf.int32, tf.int32, tf.float32, tf.string, tf.string, tf.string, tf.string]\n", "dataset = tf.data.experimental.CsvDataset(titanic_file, titanic_types , header=True)\n", "\n", "for line in dataset.take(10):\n", @@ -1386,7 +1385,7 @@ "The simplest form of batching stacks `n` consecutive elements of a dataset into\n", "a single element. The `Dataset.batch()` transformation does exactly this, with\n", "the same constraints as the `tf.stack()` operator, applied to each component\n", - "of the elements: i.e. for each component *i*, all elements must have a tensor\n", + "of the elements: i.e., for each component *i*, all elements must have a tensor\n", "of the exact same shape." ] }, @@ -1457,10 +1456,10 @@ "### Batching tensors with padding\n", "\n", "The above recipe works for tensors that all have the same size. However, many\n", - "models (e.g. sequence models) work with input data that can have varying size\n", - "(e.g. sequences of different lengths). To handle this case, the\n", + "models (including sequence models) work with input data that can have varying size\n", + "(for example, sequences of different lengths). To handle this case, the\n", "`Dataset.padded_batch` transformation enables you to batch tensors of\n", - "different shape by specifying one or more dimensions in which they may be\n", + "different shapes by specifying one or more dimensions in which they may be\n", "padded." ] }, @@ -1604,7 +1603,7 @@ "id": "DlEM5f9loSHR" }, "source": [ - "If you would like to perform a custom computation (e.g. to collect statistics) at the end of each epoch then it's simplest to restart the dataset iteration on each epoch:" + "If you would like to perform a custom computation (for example, to collect statistics) at the end of each epoch then it's simplest to restart the dataset iteration on each epoch:" ] }, { @@ -1693,7 +1692,7 @@ "source": [ "As with `Dataset.batch` the order relative to `Dataset.repeat` matters.\n", "\n", - "`Dataset.shuffle` doesn't signal the end of an epoch until the shuffle buffer is empty. So a shuffle placed before a repeat will show every element of one epoch before moving to the next: " + "`Dataset.shuffle` doesn't signal the end of an epoch until the shuffle buffer is empty. So a shuffle placed before a repeat will show every element of one epoch before moving to the next:" ] }, { @@ -1906,7 +1905,7 @@ "\n", "For performance reasons, use TensorFlow operations for\n", "preprocessing your data whenever possible. However, it is sometimes useful to\n", - "call external Python libraries when parsing your input data. You can use the `tf.py_function()` operation in a `Dataset.map()` transformation." + "call external Python libraries when parsing your input data. You can use the `tf.py_function` operation in a `Dataset.map` transformation." ] }, { @@ -1915,7 +1914,7 @@ "id": "R2u7CeA67DU8" }, "source": [ - "For example, if you want to apply a random rotation, the `tf.image` module only has `tf.image.rot90`, which is not very useful for image augmentation. \n", + "For example, if you want to apply a random rotation, the `tf.image` module only has `tf.image.rot90`, which is not very useful for image augmentation.\n", "\n", "Note: `tensorflow_addons` has a TensorFlow compatible `rotate` in `tensorflow_addons.image.rotate`.\n", "\n", @@ -1932,6 +1931,7 @@ "source": [ "import scipy.ndimage as ndimage\n", "\n", + "@tf.py_function(Tout=tf.float32)\n", "def random_rotate_image(image):\n", " image = ndimage.rotate(image, np.random.uniform(-30, 30), reshape=False)\n", " return image" @@ -1969,7 +1969,7 @@ "source": [ "def tf_random_rotate_image(image, label):\n", " im_shape = image.shape\n", - " [image,] = tf.py_function(random_rotate_image, [image], [tf.float32])\n", + " image = random_rotate_image(image)\n", " image.set_shape(im_shape)\n", " return image, label" ] @@ -2124,7 +2124,7 @@ "id": "t0JMgvXEz9y1" }, "source": [ - "For an end to end time series example see: [Time series forecasting](../../tutorials/structured_data/time_series.ipynb)." + "For an end-to-end time series example see: [Time series forecasting](../../tutorials/structured_data/time_series.ipynb)." ] }, { @@ -2155,7 +2155,7 @@ "id": "o6GLGhxgpazJ" }, "source": [ - "Typically, models based on this sort of data will want a contiguous time slice. \n", + "Typically, models based on this sort of data will want a contiguous time slice.\n", "\n", "The simplest approach would be to batch the data:" ] @@ -2283,7 +2283,7 @@ "id": "fF6pEdlduq8E" }, "source": [ - "While using `Dataset.batch` works, there are situations where you may need finer control. The `Dataset.window` method gives you complete control, but requires some care: it returns a `Dataset` of `Datasets`. See [Dataset structure](#dataset_structure) for details." + "While using `Dataset.batch` works, there are situations where you may need finer control. The `Dataset.window` method gives you complete control, but requires some care: it returns a `Dataset` of `Datasets`. Go to the [Dataset structure](#dataset_structure) section for details." ] }, { @@ -2328,7 +2328,7 @@ "id": "sgLIwq9Anc34" }, "source": [ - "In nearly all cases, you will want to `.batch` the dataset first:" + "In nearly all cases, you will want to `Dataset.batch` the dataset first:" ] }, { @@ -2422,7 +2422,7 @@ "\n", "When working with a dataset that is very class-imbalanced, you may want to resample the dataset. `tf.data` provides two methods to do this. The credit card fraud dataset is a good example of this sort of problem.\n", "\n", - "Note: See [Imbalanced Data](../tutorials/keras/imbalanced_data.ipynb) for a full tutorial.\n" + "Note: Go to [Classification on imbalanced data](../tutorials/structured_data/imbalanced_data.ipynb) for a full tutorial.\n" ] }, { @@ -2529,7 +2529,7 @@ "id": "ov14SRrQyQE3" }, "source": [ - "One approach to resampling a dataset is to use `sample_from_datasets`. This is more applicable when you have a separate `data.Dataset` for each class.\n", + "One approach to resampling a dataset is to use `sample_from_datasets`. This is more applicable when you have a separate `tf.data.Dataset` for each class.\n", "\n", "Here, just use filter to generate them from the credit card fraud data:" ] @@ -2593,7 +2593,7 @@ "id": "2K4ObOms082B" }, "source": [ - "Now the dataset produces examples of each class with 50/50 probability:" + "Now the dataset produces examples of each class with a 50/50 probability:" ] }, { @@ -2627,11 +2627,11 @@ "it needs a separate `tf.data.Dataset` per class. You could use `Dataset.filter`\n", "to create those two datasets, but that results in all the data being loaded twice.\n", "\n", - "The `data.Dataset.rejection_resample` method can be applied to a dataset to rebalance it, while only loading it once. Elements will be dropped from the dataset to achieve balance.\n", + "The `tf.data.Dataset.rejection_resample` method can be applied to a dataset to rebalance it, while only loading it once. Elements will be dropped or repeated to achieve balance.\n", "\n", - "`data.Dataset.rejection_resample` takes a `class_func` argument. This `class_func` is applied to each dataset element, and is used to determine which class an example belongs to for the purposes of balancing.\n", + "The `rejection_resample` method takes a `class_func` argument. This `class_func` is applied to each dataset element, and is used to determine which class an example belongs to for the purposes of balancing.\n", "\n", - "The goal here is to balance the lable distribution, and the elements of `creditcard_ds` are already `(features, label)` pairs. So the `class_func` just needs to return those labels:" + "The goal here is to balance the label distribution, and the elements of `creditcard_ds` are already `(features, label)` pairs. So the `class_func` just needs to return those labels:" ] }, { @@ -2699,7 +2699,7 @@ "id": "j3d2jyEhx9kD" }, "source": [ - "Now the dataset produces examples of each class with 50/50 probability:" + "Now the dataset produces examples of each class with a 50/50 probability:" ] }, { @@ -2729,7 +2729,7 @@ "id": "SOGg1UFhUE4z" }, "source": [ - "Tensorflow supports [taking checkpoints](https://www.tensorflow.org/guide/checkpoint) so that when your training process restarts it can restore the latest checkpoint to recover most of its progress. In addition to checkpointing the model variables, you can also checkpoint the progress of the dataset iterator. This could be useful if you have a large dataset and don't want to start the dataset from the beginning on each restart. Note however that iterator checkpoints may be large, since transformations such as `shuffle` and `prefetch` require buffering elements within the iterator. \n", + "Tensorflow supports [taking checkpoints](./checkpoint.ipynb) so that when your training process restarts it can restore the latest checkpoint to recover most of its progress. In addition to checkpointing the model variables, you can also checkpoint the progress of the dataset iterator. This could be useful if you have a large dataset and don't want to start the dataset from the beginning on each restart. Note however that iterator checkpoints may be large, since transformations such as `Dataset.shuffle` and `Dataset.prefetch` require buffering elements within the iterator.\n", "\n", "To include your iterator in a checkpoint, pass the iterator to the `tf.train.Checkpoint` constructor." ] @@ -2765,7 +2765,7 @@ "id": "gxWglTwX9Fex" }, "source": [ - "Note: It is not possible to checkpoint an iterator which relies on external state such as a `tf.py_function`. Attempting to do so will raise an exception complaining about the external state." + "Note: It is not possible to checkpoint an iterator which relies on an external state, such as a `tf.py_function`. Attempting to do so will raise an exception complaining about the external state." ] }, { @@ -2774,7 +2774,7 @@ "id": "uLRdedPpbDdD" }, "source": [ - "## Using tf.data with tf.keras" + "## Using `tf.data` with `tf.keras`" ] }, { @@ -2784,7 +2784,7 @@ }, "source": [ "The `tf.keras` API simplifies many aspects of creating and executing machine\n", - "learning models. Its `.fit()` and `.evaluate()` and `.predict()` APIs support datasets as inputs. Here is a quick dataset and model setup:" + "learning models. Its `Model.fit` and `Model.evaluate` and `Model.predict` APIs support datasets as inputs. Here is a quick dataset and model setup:" ] }, { @@ -2819,7 +2819,7 @@ "])\n", "\n", "model.compile(optimizer='adam',\n", - " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), \n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", " metrics=['accuracy'])" ] }, @@ -2849,7 +2849,7 @@ "id": "FzpAQfJMJF41" }, "source": [ - "If you pass an infinite dataset, for example by calling `Dataset.repeat()`, you just need to also pass the `steps_per_epoch` argument:" + "If you pass an infinite dataset, for example by calling `Dataset.repeat`, you just need to also pass the `steps_per_epoch` argument:" ] }, { @@ -2913,7 +2913,7 @@ "id": "aZYhJ_YSIl6w" }, "source": [ - "The labels are not required in when calling `Model.predict`. " + "The labels are not required when calling `Model.predict`." ] }, { @@ -2953,8 +2953,8 @@ ], "metadata": { "colab": { - "collapsed_sections": [], "name": "data.ipynb", + "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/data_performance.ipynb b/site/en/guide/data_performance.ipynb index 78427505020..81d8b3fd5b3 100644 --- a/site/en/guide/data_performance.ipynb +++ b/site/en/guide/data_performance.ipynb @@ -274,6 +274,8 @@ "source": [ "### Prefetching\n", "\n", + "\n", + "\n", "Prefetching overlaps the preprocessing and model execution of a training step.\n", "While the model is executing training step `s`, the input pipeline is reading the data for step `s+1`.\n", "Doing so reduces the step time to the maximum (as opposed to the sum) of the training and the time it takes to extract the data.\n", @@ -321,6 +323,8 @@ "source": [ "### Parallelizing data extraction\n", "\n", + "\n", + "\n", "In a real-world setting, the input data may be stored remotely (for example, on Google Cloud Storage or HDFS).\n", "A dataset pipeline that works well when reading data locally might become bottlenecked on I/O when reading data remotely because of the following differences between local and remote storage:\n", "\n", @@ -420,6 +424,8 @@ "source": [ "### Parallelizing data transformation\n", "\n", + "\n", + "\n", "When preparing data, input elements may need to be pre-processed.\n", "To this end, the `tf.data` API offers the `tf.data.Dataset.map` transformation, which applies a user-defined function to each element of the input dataset.\n", "Because input elements are independent of one another, the pre-processing can be parallelized across multiple CPU cores.\n", @@ -527,6 +533,8 @@ "source": [ "### Caching\n", "\n", + "\n", + "\n", "The `tf.data.Dataset.cache` transformation can cache a dataset, either in memory or on local storage.\n", "This will save some operations (like file opening and data reading) from being executed during each epoch." ] @@ -572,6 +580,8 @@ "source": [ "### Vectorizing mapping\n", "\n", + "\n", + "\n", "Invoking a user-defined function passed into the `map` transformation has overhead related to scheduling and executing the user-defined function.\n", "Vectorize the user-defined function (that is, have it operate over a batch of inputs at once) and apply the `batch` transformation _before_ the `map` transformation.\n", "\n", @@ -687,6 +697,8 @@ "source": [ "### Reducing memory footprint\n", "\n", + "\n", + "\n", "A number of transformations, including `interleave`, `prefetch`, and `shuffle`, maintain an internal buffer of elements. If the user-defined function passed into the `map` transformation changes the size of the elements, then the ordering of the map transformation and the transformations that buffer elements affects the memory usage. In general, choose the order that results in lower memory footprint, unless different ordering is desirable for performance.\n", "\n", "#### Caching partial computations\n", @@ -713,12 +725,12 @@ "Here is a summary of the best practices for designing performant TensorFlow\n", "input pipelines:\n", "\n", - "* [Use the `prefetch` transformation](#Pipelining) to overlap the work of a producer and consumer\n", - "* [Parallelize the data reading transformation](#Parallelizing-data-extraction) using the `interleave` transformation\n", - "* [Parallelize the `map` transformation](#Parallelizing-data-transformation) by setting the `num_parallel_calls` argument\n", - "* [Use the `cache` transformation](#Caching) to cache data in memory during the first epoch\n", - "* [Vectorize user-defined functions](#Map-and-batch) passed in to the `map` transformation\n", - "* [Reduce memory usage](#Reducing-memory-footprint) when applying the `interleave`, `prefetch`, and `shuffle` transformations" + "* [Use the `prefetch` transformation](#prefetching) to overlap the work of a producer and consumer\n", + "* [Parallelize the data reading transformation](#parallelizing_data_extraction) using the `interleave` transformation\n", + "* [Parallelize the `map` transformation](#parallelizing_data_transformation) by setting the `num_parallel_calls` argument\n", + "* [Use the `cache` transformation](#caching) to cache data in memory during the first epoch\n", + "* [Vectorize user-defined functions](#vectorizing_mapping) passed in to the `map` transformation\n", + "* [Reduce memory usage](#reducing_memory_footprint) when applying the `interleave`, `prefetch`, and `shuffle` transformations" ] }, { @@ -1153,7 +1165,6 @@ "colab": { "collapsed_sections": [], "name": "data_performance.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/distributed_training.ipynb b/site/en/guide/distributed_training.ipynb index b115d68afd0..04b7118b1f2 100644 --- a/site/en/guide/distributed_training.ipynb +++ b/site/en/guide/distributed_training.ipynb @@ -130,7 +130,7 @@ "| **Custom training loop** | Supported | Supported | Supported | Experimental support | Experimental support |\n", "| **Estimator API** | Limited Support | Not supported | Limited Support | Limited Support | Limited Support |\n", "\n", - "Note: [Experimental support](https://www.tensorflow.org/guide/versions#what_is_not_covered) means the APIs are not covered by any compatibilities guarantees.\n", + "Note: [Experimental support](https://www.tensorflow.org/guide/versions#what_is_not_covered) means the APIs are not covered by any compatibility guarantees.\n", "\n", "Warning: Estimator support is limited. Basic training and evaluation are experimental, and advanced features—such as scaffold—are not implemented. You should be using Keras or custom training loops if a use case is not covered. Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. Go to the [migration guide](https://tensorflow.org/guide/migrate) for details." ] @@ -421,7 +421,7 @@ "source": [ "This strategy serves two main purposes:\n", "\n", - "* It allows writing distribution-aware library code unconditionally. For example, in `tf.optimizer`s you can use `tf.distribute.get_strategy` and use that strategy for reducing gradients—it will always return a strategy object on which you can call the `Strategy.reduce` API.\n" + "* It allows writing distribution-aware library code unconditionally. For example, in `tf.keras.optimizers` you can use `tf.distribute.get_strategy` and use that strategy for reducing gradients—it will always return a strategy object on which you can call the `Strategy.reduce` API.\n" ] }, { @@ -503,14 +503,14 @@ "source": [ "## Use tf.distribute.Strategy with Keras Model.fit\n", "\n", - "`tf.distribute.Strategy` is integrated into `tf.keras`, which is TensorFlow's implementation of the [Keras API specification](https://keras.io). `tf.keras` is a high-level API to build and train models. By integrating into the `tf.keras` backend, it's seamless for you to distribute your training written in the Keras training framework [using Model.fit](/keras/customizing_what_happens_in_fit.ipynb).\n", + "`tf.distribute.Strategy` is integrated into `tf.keras`, which is TensorFlow's implementation of the [Keras API specification](https://keras.io/api/). `tf.keras` is a high-level API to build and train models. By integrating into the `tf.keras` backend, it's seamless for you to distribute your training written in the Keras training framework [using Model.fit](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit).\n", "\n", "Here's what you need to change in your code:\n", "\n", "1. Create an instance of the appropriate `tf.distribute.Strategy`.\n", - "2. Move the creation of Keras model, optimizer and metrics inside `strategy.scope`.\n", + "2. Move the creation of Keras model, optimizer and metrics inside `strategy.scope`. Thus the code in the model's `call()`, `train_step()`, and `test_step()` methods will all be distributed and executed on the accelerator(s).\n", "\n", - "TensorFlow distribution strategies support all types of Keras models—[Sequential](/keras/sequential_model.ipynb), [Functional](/keras/functional.ipynb), and [subclassed](/keras/custom_layers_and_models.ipynb).\n", + "TensorFlow distribution strategies support all types of Keras models—[Sequential](https://www.tensorflow.org/guide/keras/sequential_model), [Functional](https://www.tensorflow.org/guide/keras/functional), and [subclassed](https://www.tensorflow.org/guide/keras/custom_layers_and_models)\n", "\n", "Here is a snippet of code to do this for a very simple Keras model with one `Dense` layer:" ] @@ -526,9 +526,10 @@ "mirrored_strategy = tf.distribute.MirroredStrategy()\n", "\n", "with mirrored_strategy.scope():\n", - " model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])\n", - "\n", - "model.compile(loss='mse', optimizer='sgd')" + " model = tf.keras.Sequential([\n", + " tf.keras.layers.Dense(1, input_shape=(1,),\n", + " kernel_regularizer=tf.keras.regularizers.L2(1e-4))])\n", + " model.compile(loss='mse', optimizer='sgd')" ] }, { @@ -585,6 +586,17 @@ "In both cases—with `Dataset` or NumPy—each batch of the given input is divided equally among the multiple replicas. For instance, if you are using the `MirroredStrategy` with 2 GPUs, each batch of size 10 will be divided among the 2 GPUs, with each receiving 5 input examples in each step. Each epoch will then train faster as you add more GPUs. Typically, you would want to increase your batch size as you add more accelerators, so as to make effective use of the extra computing power. You will also need to re-tune your learning rate, depending on the model. You can use `strategy.num_replicas_in_sync` to get the number of replicas." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8ZmJqErtS4A1" + }, + "outputs": [], + "source": [ + "mirrored_strategy.num_replicas_in_sync" + ] + }, { "cell_type": "code", "execution_count": null, @@ -600,7 +612,7 @@ "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100)\n", "dataset = dataset.batch(global_batch_size)\n", "\n", - "LEARNING_RATES_BY_BATCH_SIZE = {5: 0.1, 10: 0.15}\n", + "LEARNING_RATES_BY_BATCH_SIZE = {5: 0.1, 10: 0.15, 20:0.175}\n", "learning_rate = LEARNING_RATES_BY_BATCH_SIZE[global_batch_size]" ] }, @@ -636,7 +648,7 @@ "source": [ "## Use tf.distribute.Strategy with custom training loops\n", "\n", - "As demonstrated above, using `tf.distribute.Strategy` with Keras `Model.fit` requires changing only a couple lines of your code. With a little more effort, you can also use `tf.distribute.Strategy` [with custom training loops](/keras/writing_a_training_loop_from_scratch.ipynb).\n", + "As demonstrated above, using `tf.distribute.Strategy` with Keras `Model.fit` requires changing only a couple lines of your code. With a little more effort, you can also use `tf.distribute.Strategy` [with custom training loops](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch).\n", "\n", "If you need more flexibility and control over your training loops than is possible with Estimator or Keras, you can write custom training loops. For instance, when using a GAN, you may want to take a different number of generator or discriminator steps each round. Similarly, the high level frameworks are not very suitable for Reinforcement Learning training.\n", "\n", @@ -663,7 +675,9 @@ "outputs": [], "source": [ "with mirrored_strategy.scope():\n", - " model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])\n", + " model = tf.keras.Sequential([\n", + " tf.keras.layers.Dense(1, input_shape=(1,),\n", + " kernel_regularizer=tf.keras.regularizers.L2(1e-4))])\n", " optimizer = tf.keras.optimizers.SGD()" ] }, @@ -684,7 +698,7 @@ }, "outputs": [], "source": [ - "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(100).batch(\n", + "dataset = tf.data.Dataset.from_tensors(([1.], [1.])).repeat(1000).batch(\n", " global_batch_size)\n", "dist_dataset = mirrored_strategy.experimental_distribute_dataset(dataset)" ] @@ -706,20 +720,21 @@ }, "outputs": [], "source": [ + "# Sets `reduction=NONE` to leave it to tf.nn.compute_average_loss() below.\n", "loss_object = tf.keras.losses.BinaryCrossentropy(\n", " from_logits=True,\n", " reduction=tf.keras.losses.Reduction.NONE)\n", "\n", - "def compute_loss(labels, predictions):\n", - " per_example_loss = loss_object(labels, predictions)\n", - " return tf.nn.compute_average_loss(per_example_loss, global_batch_size=global_batch_size)\n", - "\n", "def train_step(inputs):\n", " features, labels = inputs\n", "\n", " with tf.GradientTape() as tape:\n", " predictions = model(features, training=True)\n", - " loss = compute_loss(labels, predictions)\n", + " per_example_loss = loss_object(labels, predictions)\n", + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " model_losses = model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", "\n", " gradients = tape.gradient(loss, model.trainable_variables)\n", " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", @@ -740,9 +755,16 @@ "source": [ "A few other things to note in the code above:\n", "\n", - "1. You used `tf.nn.compute_average_loss` to compute the loss. `tf.nn.compute_average_loss` sums the per example loss and divides the sum by the `global_batch_size`. This is important because later after the gradients are calculated on each replica, they are aggregated across the replicas by **summing** them.\n", - "2. You also used the `tf.distribute.Strategy.reduce` API to aggregate the results returned by `tf.distribute.Strategy.run`. `tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can `reduce` them to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n", - "3. When you call `apply_gradients` within a distribution strategy scope, its behavior is modified. Specifically, before applying gradients on each parallel instance during synchronous training, it performs a sum-over-all-replicas of the gradients.\n" + " 1. You used `tf.nn.compute_average_loss` to reduce the per-example prediction losses to a scalar. `tf.nn.compute_average_loss` sums the per example loss and divides the sum by the global batch size. This is important because later after the gradients are calculated on each replica, they are aggregated across the replicas by **summing** them.\n", + "\n", + " By default, the global batch size is taken to be `tf.get_strategy().num_replicas_in_sync * tf.shape(per_example_loss)[0]`. It can also be specified explicitly as a keyword argument `global_batch_size=`. Without short batches, the default is equivalent to `tf.nn.compute_average_loss(..., global_batch_size=global_batch_size)` with the `global_batch_size` defined above. (For more on short batches and how to avoid or handle them, see the [Custom Training tutorial](../tutorials/distribute/custom_training.ipynb).)\n", + "\n", + " 2. You used `tf.nn.scale_regularization_loss` to scale regularization losses registered with the `Model` object, if any, by `1/num_replicas_in_sync` as well. For those regularization losses that are input-dependent, it falls on the modeling code, not the custom training loop, to perform the averaging over the per-replica(!) batch size; that way the modeling code can remain agnostic of replication while the training loop remains agnostic of how regularization losses are computed.\n", + "\n", + " 3. When you call `apply_gradients` within a distribution strategy scope, its behavior is modified. Specifically, before applying gradients on each parallel instance during synchronous training, it performs a sum-over-all-replicas of the gradients.\n", + "\n", + " 4. You also used the `tf.distribute.Strategy.reduce` API to aggregate the results returned by `tf.distribute.Strategy.run` for reporting. `tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can `reduce` them to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n", + "\n" ] }, { diff --git a/site/en/guide/dtensor_overview.ipynb b/site/en/guide/dtensor_overview.ipynb new file mode 100644 index 00000000000..1b55ee0283f --- /dev/null +++ b/site/en/guide/dtensor_overview.ipynb @@ -0,0 +1,1082 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "1ljvLya59ep5" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Authors.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VcQIa1uG86Wh" + }, + "source": [ + "# DTensor concepts" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6dWNQEum9AfY" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MGZuakHVlVQf" + }, + "source": [ + "## Overview\n", + "\n", + "This colab introduces DTensor, an extension to TensorFlow for synchronous distributed computing.\n", + "\n", + "DTensor provides a global programming model that allows developers to compose applications that operate on Tensors globally while managing the distribution across devices internally. DTensor distributes the program and tensors according to the sharding directives through a procedure called *[Single program, multiple data (SPMD)](https://en.wikipedia.org/wiki/SPMD) expansion*.\n", + "\n", + "By decoupling the application from sharding directives, DTensor enables running the same application on a single device, multiple devices, or even multiple clients, while preserving its global semantics.\n", + "\n", + "This guide introduces DTensor concepts for distributed computing, and how DTensor integrates with TensorFlow. For a demo of using DTensor in model training, refer to the [Distributed training with DTensor](../tutorials/distribute/dtensor_ml_tutorial.ipynb) tutorial." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h7ZTDq7KngwA" + }, + "source": [ + "## Setup\n", + "\n", + "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", + "\n", + "Begin by importing TensorFlow, `dtensor`, and configure TensorFlow to use 6 virtual CPUs. Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q92lo0zjwej8" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "from tensorflow.experimental import dtensor\n", + "\n", + "print('TensorFlow version:', tf.__version__)\n", + "\n", + "def configure_virtual_cpus(ncpu):\n", + " phy_devices = tf.config.list_physical_devices('CPU')\n", + " tf.config.set_logical_device_configuration(phy_devices[0], [\n", + " tf.config.LogicalDeviceConfiguration(),\n", + " ] * ncpu)\n", + "\n", + "configure_virtual_cpus(6)\n", + "DEVICES = [f'CPU:{i}' for i in range(6)]\n", + "\n", + "tf.config.list_logical_devices('CPU')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O-lsrxUnlsCC" + }, + "source": [ + "## DTensor's model of distributed tensors\n", + "\n", + "DTensor introduces two concepts: `dtensor.Mesh` and `dtensor.Layout`. They are abstractions to model the sharding of tensors across topologically related devices.\n", + "\n", + "- `Mesh` defines the device list for computation.\n", + "- `Layout` defines how to shard the Tensor dimension on a `Mesh`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JjiHaH0ql9yo" + }, + "source": [ + "### Mesh\n", + "\n", + "`Mesh` represents a logical Cartisian topology of a set of devices. Each dimension of the Cartisian grid is called a **Mesh dimension**, and referred to with a name. Names of mesh dimension within the same `Mesh` must be unique.\n", + "\n", + "Names of mesh dimensions are referenced by `Layout` to describe the sharding behavior of a `tf.Tensor` along each of its axes. This is described in more detail later in the section on `Layout`.\n", + "\n", + "`Mesh` can be thought of as a multi-dimensional array of devices." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_J6cOieEbaUw" + }, + "source": [ + "In a 1 dimensional `Mesh`, all devices form a list in a single mesh dimension. The following example uses `dtensor.create_mesh` to create a mesh from 6 CPU devices along a mesh dimension `'x'` with a size of 6 devices:\n", + "\n", + "\"A\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QLH5fgdBmA58" + }, + "outputs": [], + "source": [ + "mesh_1d = dtensor.create_mesh([('x', 6)], devices=DEVICES)\n", + "print(mesh_1d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hSZwaUwnEgXB" + }, + "source": [ + "A `Mesh` can be multi dimensional as well. In the following example, 6 CPU devices form a `3x2` mesh, where the `'x'` mesh dimension has a size of 3 devices, and the `'y'` mesh dimension has a size of 2 devices:\n", + "\n", + "\"A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "op6TmKUQE-sZ" + }, + "outputs": [], + "source": [ + "mesh_2d = dtensor.create_mesh([('x', 3), ('y', 2)], devices=DEVICES)\n", + "print(mesh_2d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "deAqdrDPFn2f" + }, + "source": [ + "### Layout\n", + "\n", + "**`Layout`** specifies how a tensor is distributed, or sharded, on a `Mesh`.\n", + "\n", + "Note: In order to avoid confusions between `Mesh` and `Layout`, the term *dimension* is always associated with `Mesh`, and the term *axis* with `Tensor` and `Layout` in this guide.\n", + "\n", + "The rank of `Layout` should be the same as the rank of the `Tensor` where the `Layout` is applied. For each of the `Tensor`'s axes the `Layout` may specify a mesh dimension to shard the tensor across, or specify the axis as \"unsharded\".\n", + "The tensor is replicated across any mesh dimensions that it is not sharded across.\n", + "\n", + "The rank of a `Layout` and the number of dimensions of a `Mesh` do not need to match. The `unsharded` axes of a `Layout` do not need to be associated to a mesh dimension, and `unsharded` mesh dimensions do not need to be associated with a `layout` axis.\n", + "\n", + "\"Diagram" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Px_bF1c-bQ7e" + }, + "source": [ + "Let's analyze a few examples of `Layout` for the `Mesh`'s created in the previous section." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fqzCNlWAbm-c" + }, + "source": [ + "On a 1-dimensional mesh such as `[(\"x\", 6)]` (`mesh_1d` in the previous section), `Layout([\"unsharded\", \"unsharded\"], mesh_1d)` is a layout for a rank-2 tensor replicated across 6 devices.\n", + "\"A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-a3EnmZag6x1" + }, + "outputs": [], + "source": [ + "layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh_1d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ywRJwuLDt2Qq" + }, + "source": [ + "Using the same tensor and mesh the layout `Layout(['unsharded', 'x'])` would shard the second axis of the tensor across the 6 devices.\n", + "\n", + "\"A" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7BgqL0jUvV5a" + }, + "outputs": [], + "source": [ + "layout = dtensor.Layout([dtensor.UNSHARDED, 'x'], mesh_1d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DgciDNmK76l9" + }, + "source": [ + "Given a 2-dimensional 3x2 mesh such as `[(\"x\", 3), (\"y\", 2)]`, (`mesh_2d` from the previous section), `Layout([\"y\", \"x\"], mesh_2d)` is a layout for a rank-2 `Tensor` whose first axis is sharded across mesh dimension `\"y\"`, and whose second axis is sharded across mesh dimension `\"x\"`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Eyp_qOSyvieo" + }, + "source": [ + "\"A\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p8OrehEuhPbS" + }, + "outputs": [], + "source": [ + "layout = dtensor.Layout(['y', 'x'], mesh_2d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1Kyg0V3ehMNJ" + }, + "source": [ + "For the same `mesh_2d`, the layout `Layout([\"x\", dtensor.UNSHARDED], mesh_2d)` is a layout for a rank-2 `Tensor` that is replicated across `\"y\"`, and whose first axis is sharded on mesh dimension `x`.\n", + "\n", + "\"A\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IkWe6mVl7uRb" + }, + "outputs": [], + "source": [ + "layout = dtensor.Layout([\"x\", dtensor.UNSHARDED], mesh_2d)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TTalu6M-ISYb" + }, + "source": [ + "### Single-client and multi-client applications\n", + "\n", + "DTensor supports both single-client and multi-client applications. The colab Python kernel is an example of a single client DTensor application, where there is a single Python process.\n", + "\n", + "In a multi-client DTensor application, multiple Python processes collectively perform as a coherent application. The Cartisian grid of a `Mesh` in a multi-client DTensor application can span across devices regardless of whether they are attached locally to the current client or attached remotely to another client. The set of all devices used by a `Mesh` are called the *global device list*.\n", + "\n", + "The creation of a `Mesh` in a multi-client DTensor application is a collective operation where the *global device list* is identical for all of the participating clients, and the creation of the `Mesh` serves as a global barrier.\n", + "\n", + "During `Mesh` creation, each client provides its *local device list* together with the expected *global device list*. DTensor validates that both lists are consistent. Please refer to the API documentation for `dtensor.create_mesh` and `dtensor.create_distributed_mesh`\n", + " for more information on multi-client mesh creation and the *global device list*.\n", + "\n", + "Single-client can be thought of as a special case of multi-client, with 1 client. In a single-client application, the *global device list* is identical to the *local device list*.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P_F7DWkXkB4w" + }, + "source": [ + "## DTensor as a sharded tensor\n", + "\n", + "Now, start coding with `DTensor`. The helper function, `dtensor_from_array`, demonstrates creating DTensors from something that looks like a `tf.Tensor`. The function performs two steps:\n", + "\n", + " - Replicates the tensor to every device on the mesh.\n", + " - Shards the copy according to the layout requested in its arguments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "s6aws-b8dN9L" + }, + "outputs": [], + "source": [ + "def dtensor_from_array(arr, layout, shape=None, dtype=None):\n", + " \"\"\"Convert a DTensor from something that looks like an array or Tensor.\n", + "\n", + " This function is convenient for quick doodling DTensors from a known,\n", + " unsharded data object in a single-client environment. This is not the\n", + " most efficient way of creating a DTensor, but it will do for this\n", + " tutorial.\n", + " \"\"\"\n", + " if shape is not None or dtype is not None:\n", + " arr = tf.constant(arr, shape=shape, dtype=dtype)\n", + "\n", + " # replicate the input to the mesh\n", + " a = dtensor.copy_to_mesh(arr,\n", + " layout=dtensor.Layout.replicated(layout.mesh, rank=layout.rank))\n", + " # shard the copy to the desirable layout\n", + " return dtensor.relayout(a, layout=layout)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r3o6IysrlGMu" + }, + "source": [ + "### Anatomy of a DTensor\n", + "\n", + "A DTensor is a `tf.Tensor` object, but augumented with the `Layout` annotation that defines its sharding behavior. A DTensor consists of the following:\n", + "\n", + " - Global tensor meta-data, including the global shape and dtype of the tensor.\n", + " - A `Layout`, which defines the `Mesh` the `Tensor` belongs to, and how the `Tensor` is sharded onto the `Mesh`.\n", + " - A list of **component tensors**, one item per local device in the `Mesh`.\n", + "\n", + "With `dtensor_from_array`, you can create your first DTensor, `my_first_dtensor`, and examine its contents:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mQu_nScGUvYH" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 6)], devices=DEVICES)\n", + "layout = dtensor.Layout([dtensor.UNSHARDED], mesh)\n", + "\n", + "my_first_dtensor = dtensor_from_array([0, 1], layout)\n", + "\n", + "# Examine the DTensor content\n", + "print(my_first_dtensor)\n", + "print(\"global shape:\", my_first_dtensor.shape)\n", + "print(\"dtype:\", my_first_dtensor.dtype)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r8LQy1nqmvFy" + }, + "source": [ + "#### Layout and `fetch_layout`\n", + "\n", + "The layout of a DTensor is not a regular attribute of `tf.Tensor`. Instead, DTensor provides a function, `dtensor.fetch_layout` to access the layout of a DTensor:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dCSFyaAjmzGu" + }, + "outputs": [], + "source": [ + "print(dtensor.fetch_layout(my_first_dtensor))\n", + "assert layout == dtensor.fetch_layout(my_first_dtensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ed7i3l2lmatm" + }, + "source": [ + "#### Component tensors, `pack` and `unpack`\n", + "\n", + "A DTensor consists of a list of **component tensors**. The component tensor for a device in the `Mesh` is the `Tensor` object representing the piece of the global DTensor that is stored on this device.\n", + "\n", + "A DTensor can be unpacked into component tensors through `dtensor.unpack`. You can make use of `dtensor.unpack` to inspect the components of the DTensor, and confirm they are on all devices of the `Mesh`.\n", + "\n", + "Note that the positions of component tensors in the global view may overlap each other. For example, in the case of a fully replicated layout, all components are identical replicas of the global tensor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BGbjqVAOnXMk" + }, + "outputs": [], + "source": [ + "for component_tensor in dtensor.unpack(my_first_dtensor):\n", + " print(\"Device:\", component_tensor.device, \",\", component_tensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-tqIQM52k788" + }, + "source": [ + "As shown, `my_first_dtensor` is a tensor of `[0, 1]` replicated to all 6 devices." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6By3k-CGn3yv" + }, + "source": [ + "The inverse operation of `dtensor.unpack` is `dtensor.pack`. Component tensors can be packed back into a DTensor.\n", + "\n", + "The components must have the same rank and dtype, which will be the rank and dtype of the returned DTensor. However, there is no strict requirement on the device placement of component tensors as inputs of `dtensor.unpack`: the function will automatically copy the component tensors to their respective corresponding devices.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9lT-6qQwxOgf" + }, + "outputs": [], + "source": [ + "packed_dtensor = dtensor.pack(\n", + " [[0, 1], [0, 1], [0, 1],\n", + " [0, 1], [0, 1], [0, 1]],\n", + " layout=layout\n", + ")\n", + "print(packed_dtensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zvS3autrpK2U" + }, + "source": [ + "### Sharding a DTensor to a Mesh\n", + "\n", + "So far you've worked with the `my_first_dtensor`, which is a rank-1 DTensor fully replicated across a dim-1 `Mesh`.\n", + "\n", + "Next, create and inspect DTensors that are sharded across a dim-2 `Mesh`. The following example does this with a 3x2 `Mesh` on 6 CPU devices, where size of mesh dimension `'x'` is 3 devices, and size of mesh dimension`'y'` is 2 devices:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KWb9Ae0VJ-Rc" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ndSeQSFWKQk9" + }, + "source": [ + "#### Fully sharded rank-2 Tensor on a dim-2 Mesh\n", + "\n", + "Create a 3x2 rank-2 DTensor, sharding its first axis along the `'x'` mesh dimension, and its second axis along the `'y'` mesh dimension.\n", + "\n", + "- Because the tensor shape equals to the mesh dimension along all of the sharded axes, each device receives a single element of the DTensor.\n", + "- The rank of the component tensor is always the same as the rank of the global shape. DTensor adopts this convention as a simple way to preserve information for locating the relation between a component tensor and the global DTensor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ax_ZHouJp1MX" + }, + "outputs": [], + "source": [ + "fully_sharded_dtensor = dtensor_from_array(\n", + " tf.reshape(tf.range(6), (3, 2)),\n", + " layout=dtensor.Layout([\"x\", \"y\"], mesh))\n", + "\n", + "for raw_component in dtensor.unpack(fully_sharded_dtensor):\n", + " print(\"Device:\", raw_component.device, \",\", raw_component)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zhsLC-NgrC2p" + }, + "source": [ + "#### Fully replicated rank-2 Tensor on a dim-2 Mesh\n", + "\n", + "For comparison, create a 3x2 rank-2 DTensor, fully replicated to the same dim-2 Mesh.\n", + "\n", + " - Because the DTensor is fully replicated, each device receives a full replica of the 3x2 DTensor.\n", + " - The rank of the component tensors are the same as the rank of the global shape -- this fact is trivial, because in this case, the shape of the component tensors are the same as the global shape anyway." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xmyC6H6Ec90P" + }, + "outputs": [], + "source": [ + "fully_replicated_dtensor = dtensor_from_array(\n", + " tf.reshape(tf.range(6), (3, 2)),\n", + " layout=dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh))\n", + "# Or, layout=tensor.Layout.fully_replicated(mesh, rank=2)\n", + "\n", + "for component_tensor in dtensor.unpack(fully_replicated_dtensor):\n", + " print(\"Device:\", component_tensor.device, \",\", component_tensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KWoyv_oHMzk1" + }, + "source": [ + "#### Hybrid rank-2 Tensor on a dim-2 Mesh\n", + "\n", + "What about somewhere between fully sharded and fully replicated?\n", + "\n", + "DTensor allows a `Layout` to be a hybrid, sharded along some axes, but replicated along others.\n", + "\n", + "For example, you can shard the same 3x2 rank-2 DTensor in the following way:\n", + "\n", + " - 1st axis sharded along the `'x'` mesh dimension.\n", + " - 2nd axis replicated along the `'y'` mesh dimension.\n", + "\n", + "To achieve this sharding scheme, you just need to replace the sharding spec of the 2nd axis from `'y'` to `dtensor.UNSHARDED`, to indicate your intention of replicating along the 2nd axis. The layout object will look like `Layout(['x', dtensor.UNSHARDED], mesh)`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DygnbkQ1Lu42" + }, + "outputs": [], + "source": [ + "hybrid_sharded_dtensor = dtensor_from_array(\n", + " tf.reshape(tf.range(6), (3, 2)),\n", + " layout=dtensor.Layout(['x', dtensor.UNSHARDED], mesh))\n", + "\n", + "for component_tensor in dtensor.unpack(hybrid_sharded_dtensor):\n", + " print(\"Device:\", component_tensor.device, \",\", component_tensor)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T7FtZ9kQRZgE" + }, + "source": [ + "You can inspect the component tensors of the created DTensor and verify they are indeed sharded according to your scheme. It may be helpful to illustrate the situation with a chart:\n", + "\n", + " \"A\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "auAkA38XjL-q" + }, + "source": [ + "#### Tensor.numpy() and sharded DTensor\n", + "\n", + "Be aware that calling the `.numpy()` method on a sharded DTensor raises an error. The rationale for erroring is to protect against unintended gathering of data from multiple computing devices to the host CPU device backing the returned NumPy array:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hNdwmnL0jAXS" + }, + "outputs": [], + "source": [ + "print(fully_replicated_dtensor.numpy())\n", + "\n", + "try:\n", + " fully_sharded_dtensor.numpy()\n", + "except tf.errors.UnimplementedError:\n", + " print(\"got an error as expected for fully_sharded_dtensor\")\n", + "\n", + "try:\n", + " hybrid_sharded_dtensor.numpy()\n", + "except tf.errors.UnimplementedError:\n", + " print(\"got an error as expected for hybrid_sharded_dtensor\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8WcMkiagPF_6" + }, + "source": [ + "## TensorFlow API on DTensor\n", + "\n", + "DTensor strives to be a drop-in replacement for tensor in your program. The TensorFlow Python API that consume `tf.Tensor`, such as the Ops library functions, `tf.function`, `tf.GradientTape`, also work with DTensor.\n", + "\n", + "To accomplish this, for each [TensorFlow Graph](https://www.tensorflow.org/guide/intro_to_graphs), DTensor produces and executes an equivalent [SPMD](https://en.wikipedia.org/wiki/SPMD) graph in a procedure called *SPMD expansion*. A few critical steps in DTensor SPMD expansion are:\n", + "\n", + " - Propagating the sharding `Layout` of DTensor in the TensorFlow graph\n", + " - Rewriting TensorFlow Ops on the global DTensor with equivalent TensorFlow Ops on the component tensors, inserting collective and communication Ops when necessary\n", + " - Lowering backend neutral TensorFlow Ops to backend specific TensorFlow Ops.\n", + "\n", + "The final result is that **DTensor is a drop-in replacement for Tensor**.\n", + "\n", + "Note: DTensor is still an experimental API which means you will be exploring and pushing the boundaries and limits of the DTensor programming model.\n", + "\n", + "There are 2 ways of triggering DTensor execution:\n", + "\n", + " - DTensor as operands of a Python function, such as `tf.matmul(a, b)`, will run through DTensor if `a`, `b`, or both are DTensors.\n", + " - Requesting the result of a Python function to be a DTensor, such as `dtensor.call_with_layout(tf.ones, layout, shape=(3, 2))`, will run through DTensor because we requested the output of `tf.ones` to be sharded according to a `layout`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "urKzmqAoPssT" + }, + "source": [ + "### DTensor as operands\n", + "\n", + "Many TensorFlow API functions take `tf.Tensor` as their operands, and returns `tf.Tensor` as their results. For these functions, you can express intention to run a function through DTensor by passing in DTensor as operands. This section uses `tf.matmul(a, b)` as an example." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7LO8ZT7iWVga" + }, + "source": [ + "#### Fully replicated input and output\n", + "\n", + "In this case, the DTensors are fully replicated. On each of the devices of the `Mesh`,\n", + " - the component tensor for operand `a` is `[[1, 2, 3], [4, 5, 6]]` (2x3)\n", + " - the component tensor for operand `b` is `[[6, 5], [4, 3], [2, 1]]` (3x2)\n", + " - the computation consists of a single `MatMul` of `(2x3, 3x2) -> 2x2`,\n", + " - the component tensor for result `c` is `[[20, 14], [56,41]]` (2x2)\n", + "\n", + "Total number of floating point mul operations is `6 device * 4 result * 3 mul = 72`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TiZf2J9JNd2D" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 6)], devices=DEVICES)\n", + "layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)\n", + "a = dtensor_from_array([[1, 2, 3], [4, 5, 6]], layout=layout)\n", + "b = dtensor_from_array([[6, 5], [4, 3], [2, 1]], layout=layout)\n", + "\n", + "c = tf.matmul(a, b) # runs 6 identical matmuls in parallel on 6 devices\n", + "\n", + "# `c` is a DTensor replicated on all devices (same as `a` and `b`)\n", + "print('Sharding spec:', dtensor.fetch_layout(c).sharding_specs)\n", + "print(\"components:\")\n", + "for component_tensor in dtensor.unpack(c):\n", + " print(component_tensor.device, component_tensor.numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QXtR9qgKWgWV" + }, + "source": [ + "#### Sharding operands along the contracted axis\n", + "\n", + "You can reduce the amount of computation per device by sharding the operands `a` and `b`. A popular sharding scheme for `tf.matmul` is to shard the operands along the axis of the contraction, which means sharding `a` along the second axis, and `b` along the first axis.\n", + "\n", + "The global matrix product sharded under this scheme can be performed efficiently, by local matmuls that runs concurrently, followed by a collective reduction to aggregate the local results. This is also the [canonical way](https://github.com/open-mpi/ompi/blob/ee87ec391f48512d3718fc7c8b13596403a09056/docs/man-openmpi/man3/MPI_Reduce.3.rst?plain=1#L265) of implementing a distributed matrix dot product.\n", + "\n", + "Total number of floating point mul operations is `6 devices * 4 result * 1 = 24`, a factor of 3 reduction compared to the fully replicated case (72) above. The factor of 3 is due to the sharding along `x` mesh dimension with a size of `3` devices.\n", + "\n", + "The reduction of the number of operations run sequentially is the main mechansism with which synchronuous model parallelism accelerates training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EyVAUvMePbms" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n", + "a_layout = dtensor.Layout([dtensor.UNSHARDED, 'x'], mesh)\n", + "a = dtensor_from_array([[1, 2, 3], [4, 5, 6]], layout=a_layout)\n", + "b_layout = dtensor.Layout(['x', dtensor.UNSHARDED], mesh)\n", + "b = dtensor_from_array([[6, 5], [4, 3], [2, 1]], layout=b_layout)\n", + "\n", + "c = tf.matmul(a, b)\n", + "# `c` is a DTensor replicated on all devices (same as `a` and `b`)\n", + "print('Sharding spec:', dtensor.fetch_layout(c).sharding_specs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IhD8yYgJiCEh" + }, + "source": [ + "#### Additional sharding\n", + "\n", + "You can perform additional sharding on the inputs, and they are appropriately carried over to the results. For example, you can apply additional sharding of operand `a` along its first axis to the `'y'` mesh dimension. The additional sharding will be carried over to the first axis of the result `c`.\n", + "\n", + "Total number of floating point mul operations is `6 devices * 2 result * 1 = 12`, an additional factor of 2 reduction compared to the case (24) above. The factor of 2 is due to the sharding along `y` mesh dimension with a size of `2` devices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0PYqe0neiOpR" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n", + "\n", + "a_layout = dtensor.Layout(['y', 'x'], mesh)\n", + "a = dtensor_from_array([[1, 2, 3], [4, 5, 6]], layout=a_layout)\n", + "b_layout = dtensor.Layout(['x', dtensor.UNSHARDED], mesh)\n", + "b = dtensor_from_array([[6, 5], [4, 3], [2, 1]], layout=b_layout)\n", + "\n", + "c = tf.matmul(a, b)\n", + "# The sharding of `a` on the first axis is carried to `c'\n", + "print('Sharding spec:', dtensor.fetch_layout(c).sharding_specs)\n", + "print(\"components:\")\n", + "for component_tensor in dtensor.unpack(c):\n", + " print(component_tensor.device, component_tensor.numpy())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c-1NazCVmLWZ" + }, + "source": [ + "### DTensor as output\n", + "\n", + "What about Python functions that do not take operands, but returns a Tensor result that can be sharded? Examples of such functions are:\n", + "\n", + " - `tf.ones`, `tf.zeros`, `tf.random.stateless_normal`\n", + "\n", + "For these Python functions, DTensor provides `dtensor.call_with_layout` which eagerly executes a Python function with DTensor, and ensures that the returned Tensor is a DTensor with the requested `Layout`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "J0jo_8NPtJiO" + }, + "outputs": [], + "source": [ + "help(dtensor.call_with_layout)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V-YdLvfytM7g" + }, + "source": [ + "The eagerly executed Python function usually only contain a single non-trivial TensorFlow Op.\n", + "\n", + "To use a Python function that emits multiple TensorFlow Ops with `dtensor.call_with_layout`, the function should be converted to a `tf.function`. Calling a `tf.function` is a single TensorFlow Op. When the `tf.function` is called, DTensor can perform layout propagation when it analyzes the computing graph of the `tf.function`, before any of the intermediate tensors are materialized." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DLrksgFjqRLS" + }, + "source": [ + "#### APIs that emit a single TensorFlow Op\n", + "\n", + "If a function emits a single TensorFlow Op, you can directly apply `dtensor.call_with_layout` to the function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "G1CuKYSFtFeM" + }, + "outputs": [], + "source": [ + "help(tf.ones)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2m_EAwy-ozOh" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n", + "ones = dtensor.call_with_layout(tf.ones, dtensor.Layout(['x', 'y'], mesh), shape=(6, 4))\n", + "print(ones)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bx-7Xo8Cpb8S" + }, + "source": [ + "#### APIs that emit multiple TensorFlow Ops\n", + "\n", + "If the API emits multiple TensorFlow Ops, convert the function into a single Op through `tf.function`. For example, `tf.random.stateleess_normal`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H8BQSTRFtCih" + }, + "outputs": [], + "source": [ + "help(tf.random.stateless_normal)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TvP81eYopSPm" + }, + "outputs": [], + "source": [ + "ones = dtensor.call_with_layout(\n", + " tf.function(tf.random.stateless_normal),\n", + " dtensor.Layout(['x', 'y'], mesh),\n", + " shape=(6, 4),\n", + " seed=(1, 1))\n", + "print(ones)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qKoojp9ZyWzW" + }, + "source": [ + "Wrapping a Python function that emits a single TensorFlow Op with `tf.function` is allowed. The only caveat is paying the associated cost and complexity of creating a `tf.function` from a Python function." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LbAtKrSkpOaq" + }, + "outputs": [], + "source": [ + "ones = dtensor.call_with_layout(\n", + " tf.function(tf.ones),\n", + " dtensor.Layout(['x', 'y'], mesh),\n", + " shape=(6, 4))\n", + "print(ones)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D-m1816JP3CE" + }, + "source": [ + "### From `tf.Variable` to `dtensor.DVariable`\n", + "\n", + "In Tensorflow, `tf.Variable` is the holder for a mutable `Tensor` value.\n", + "With DTensor, the corresponding variable semantics is provided by `dtensor.DVariable`.\n", + "\n", + "The reason a new type `DVariable` was introduced for DTensor variable is because DVariables have an additional requirement that the layout cannot change from its initial value." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "awRPuR26P0Sc" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"x\", 6)], devices=DEVICES)\n", + "layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)\n", + "\n", + "v = dtensor.DVariable(\n", + " initial_value=dtensor.call_with_layout(\n", + " tf.function(tf.random.stateless_normal),\n", + " layout=layout,\n", + " shape=tf.TensorShape([64, 32]),\n", + " seed=[1, 1],\n", + " dtype=tf.float32))\n", + "\n", + "print(v.handle)\n", + "assert layout == dtensor.fetch_layout(v)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pb9jn473prC_" + }, + "source": [ + "Other than the requirement on matching the `layout`, a `DVariable` behaves the same as a `tf.Variable`. For example, you can add a DVariable to a DTensor,\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "adxFw9wJpqQQ" + }, + "outputs": [], + "source": [ + "a = dtensor.call_with_layout(tf.ones, layout=layout, shape=(64, 32))\n", + "b = v + a # add DVariable and DTensor\n", + "print(b)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QxBdNHWSu-kV" + }, + "source": [ + "You can also assign a DTensor to a DVariable:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oYwfiyw5P94U" + }, + "outputs": [], + "source": [ + "v.assign(a) # assign a DTensor to a DVariable\n", + "print(a)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4fvSk_VUvGnj" + }, + "source": [ + "Attempting to mutate the layout of a `DVariable`, by assigning a DTensor with an incompatible layout produces an error:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3pckUugYP_r-" + }, + "outputs": [], + "source": [ + "# variable's layout is immutable.\n", + "another_mesh = dtensor.create_mesh([(\"x\", 3), (\"y\", 2)], devices=DEVICES)\n", + "b = dtensor.call_with_layout(tf.ones,\n", + " layout=dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], another_mesh),\n", + " shape=(64, 32))\n", + "try:\n", + " v.assign(b)\n", + "except:\n", + " print(\"exception raised\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3LadIcwRvR6f" + }, + "source": [ + "## What's next?\n", + "\n", + "In this colab, you learned about DTensor, an extension to TensorFlow for distributed computing. To try out these concepts in a tutorial, check out [Distributed training with DTensor](../tutorials/distribute/dtensor_ml_tutorial.ipynb)." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "dtensor_overview.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/estimator.ipynb b/site/en/guide/estimator.ipynb index e0ae0a3792f..05e8fb4012a 100644 --- a/site/en/guide/estimator.ipynb +++ b/site/en/guide/estimator.ipynb @@ -68,7 +68,7 @@ "id": "rILQuAiiRlI7" }, "source": [ - "> Warning: Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details." + "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://www.tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators." ] }, { @@ -869,7 +869,6 @@ "A_lvUsSLZzVg" ], "name": "estimator.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/extension_type.ipynb b/site/en/guide/extension_type.ipynb index bb1f7e5b29d..7e8edeea7c9 100644 --- a/site/en/guide/extension_type.ipynb +++ b/site/en/guide/extension_type.ipynb @@ -87,7 +87,7 @@ "source": [ "## Extension types\n", "\n", - "User-defined types can make projects more readable, modular, maintainable. However, most TensorFlow APIs have very limited support for user-defined Python types. This includes both high-level APIs (such as [Keras](https://www.tensorflow.org/guide/keras/overview), [tf.function](https://www.tensorflow.org/guide/function), [tf.SavedModel](https://www.tensorflow.org/guide/saved_model)) and lower-level APIs (such as `tf.while_loop` and `tf.concat`). TensorFlow **extension types** can be used to create user-defined object-oriented types that work seamlessly with TensorFlow's APIs. To create an extension type, simply define a Python class with `tf.experimental.ExtensionType` as its base, and use [type annotations](https://www.python.org/dev/peps/pep-0484/) to specify the type for each field." + "User-defined types can make projects more readable, modular, maintainable. However, most TensorFlow APIs have very limited support for user-defined Python types. This includes both high-level APIs (such as [Keras](https://www.tensorflow.org/guide/keras/overview), [tf.function](https://www.tensorflow.org/guide/function), [`tf.SavedModel`](https://www.tensorflow.org/guide/saved_model)) and lower-level APIs (such as `tf.while_loop` and `tf.concat`). TensorFlow **extension types** can be used to create user-defined object-oriented types that work seamlessly with TensorFlow's APIs. To create an extension type, simply define a Python class with `tf.experimental.ExtensionType` as its base, and use [type annotations](https://www.python.org/dev/peps/pep-0484/) to specify the type for each field." ] }, { @@ -121,7 +121,7 @@ "id": "FiaNXPa7pNK-" }, "source": [ - "The `tf.experimental.ExtensionType` base class works similarly to [`typing.NamedTuple`](https://docs.python.org/3/library/typing.html#typing.NamedTuple) and [`@dataclasses.dataclass`](https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass) from the standard Python library. In particular, it automatically adds a constructor and special methods (such as `__repr__` and `__eq__`) based on the field type annotations." + "The `tf.experimental.ExtensionType` base class works similarly to [`typing.NamedTuple`](https://docs.python.org/3/library/typing.html#typing.NamedTuple) and [`@dataclasses.dataclass`](https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass) from the standard Python library. In particular, it automatically adds a constructor and special methods (such as `__repr__` and `__eq__`) based on the field type annotations." ] }, { @@ -132,9 +132,9 @@ "source": [ "Typically, extension types tend to fall into one of two categories:\n", "\n", - "* ***Data structures***, which group together a collection of related values, and can provide useful operations based on those values. Data structures may be fairly general (such as the `TensorGraph` example above); or they may be highly customized to a specific model.\n", + "* ***Data structures***, which group together a collection of related values, and can provide useful operations based on those values. Data structures may be fairly general (such as the `TensorGraph` example above); or they may be highly customized to a specific model.\n", "\n", - "* ***Tensor-like types***, which specialize or extend the concept of \"Tensor.\" Types in this category have a `rank`, a `shape`, and usually a `dtype`; and it makes sense to use them with Tensor operations (such as `tf.stack`, `tf.add`, or `tf.matmul`). `MaskedTensor` and `CSRSparseMatrix` are examples of tensor-like types." + "* ***Tensor-like types***, which specialize or extend the concept of \"Tensor.\" Types in this category have a `rank`, a `shape`, and usually a `dtype`; and it makes sense to use them with Tensor operations (such as `tf.stack`, `tf.add`, or `tf.matmul`). `MaskedTensor` and `CSRSparseMatrix` are examples of tensor-like types." ] }, { @@ -148,15 +148,15 @@ "Extension types are supported by the following TensorFlow APIs:\n", "\n", "* **Keras**: Extension types can be used as inputs and outputs for Keras `Models` and `Layers`.\n", - "* **tf.data.Dataset**: Extension types can be included in `Datasets`, and returned by dataset `Iterators`.\n", - "* **Tensorflow hub**: Extension types can be used as inputs and outputs for `tf.hub` modules.\n", + "* **`tf.data.Dataset`**: Extension types can be included in `Datasets`, and returned by dataset `Iterators`.\n", + "* **TensorFlow Hub**: Extension types can be used as inputs and outputs for `tf.hub` modules.\n", "* **SavedModel**: Extension types can be used as inputs and outputs for `SavedModel` functions.\n", - "* **tf.function**: Extension types can be used as arguments and return values for functions wrapped with the `@tf.function` decorator.\n", - "* **while loops**: Extension types can be used as loop variables in `tf.while_loop`, and can be used as arguments and return values for the while-loop's body.\n", - "* **conditionals**: Extension types can be conditionally selected using `tf.cond` and `tf.case`.\n", - "* **py_function**: Extension types can be used as arguments and return values for the `func` argument to `tf.py_function`.\n", - "* **Tensor ops**: Extension types can be extended to support most TensorFlow ops that accept Tensor inputs (e.g., `tf.matmul`, `tf.gather`, and `tf.reduce_sum`). See the \"*Dispatch*\" section below for more information.\n", - "* **distribution strategy**: Extension types can be used as per-replica values.\n", + "* **`tf.function`**: Extension types can be used as arguments and return values for functions wrapped with the `@tf.function` decorator.\n", + "* **While loops**: Extension types can be used as loop variables in `tf.while_loop`, and can be used as arguments and return values for the while-loop's body.\n", + "* **Conditionals**: Extension types can be conditionally selected using `tf.cond` and `tf.case`.\n", + "* **`tf.py_function`**: Extension types can be used as arguments and return values for the `func` argument to `tf.py_function`.\n", + "* **Tensor ops**: Extension types can be extended to support most TensorFlow ops that accept Tensor inputs (such as `tf.matmul`, `tf.gather`, and `tf.reduce_sum`). Go to the \"*Dispatch*\" section below for more information.\n", + "* **Distribution strategy**: Extension types can be used as per-replica values.\n", "\n", "For more details, see the section on \"TensorFlow APIs that support ExtensionTypes\" below.\n" ] @@ -178,7 +178,7 @@ "source": [ "### Field types\n", "\n", - "All fields (aka instance variables) must be declared, and a type annotation must be provided for each field. The following type annotations are supported:\n", + "All fields—instance variables—must be declared, and a type annotation must be provided for each field. The following type annotations are supported:\n", "\n", "Type | Example\n", "---- | -------\n", @@ -186,15 +186,15 @@ "Python floats | `f: float`\n", "Python strings | `s: str`\n", "Python booleans | `b: bool`\n", - "Python None | `n: None`\n", + "Python `None` | `n: None`\n", "[Tensor shapes](https://www.tensorflow.org/api_docs/python/tf/TensorShape) | `shape: tf.TensorShape`\n", - "[Tensor dtypes](https://www.tensorflow.org/api_docs/python/tf/dtypes/DType) | `dtype: tf.DType`\n", + "[Tensor `dtype`s](https://www.tensorflow.org/api_docs/python/tf/dtypes/DType) | `dtype: tf.DType`\n", "[Tensors](https://www.tensorflow.org/api_docs/python/tf/Tensor) | `t: tf.Tensor`\n", "[Extension types](https://www.tensorflow.org/api_docs/python/tf/experimental/ExtensionType) | `mt: MyMaskedTensor`\n", - "[Ragged Tensors](https://www.tensorflow.org/api_docs/python/tf/RaggedTensor) | `rt: tf.RaggedTensor`\n", - "[Sparse Tensors](https://www.tensorflow.org/api_docs/python/tf/sparse/SparseTensor) | `st: tf.SparseTensor`\n", - "[Indexed Slices](https://www.tensorflow.org/api_docs/python/tf/IndexedSlices) | `s: tf.IndexedSlices`\n", - "[Optional Tensors](https://www.tensorflow.org/api_docs/python/tf/experimental/Optional) | `o: tf.experimental.Optional`\n", + "[Ragged tensors](https://www.tensorflow.org/api_docs/python/tf/RaggedTensor) | `rt: tf.RaggedTensor`\n", + "[Sparse tensors](https://www.tensorflow.org/api_docs/python/tf/sparse/SparseTensor) | `st: tf.SparseTensor`\n", + "[Indexed slices](https://www.tensorflow.org/api_docs/python/tf/IndexedSlices) | `s: tf.IndexedSlices`\n", + "[Optional tensors](https://www.tensorflow.org/api_docs/python/tf/experimental/Optional) | `o: tf.experimental.Optional`\n", "[Type unions](https://docs.python.org/3/library/typing.html#typing.Union) | `int_or_float: typing.Union[int, float]`\n", "[Tuples](https://docs.python.org/3/library/typing.html#typing.Tuple) | `params: typing.Tuple[int, float, tf.Tensor, int]`\n", "[Var-length tuples](https://docs.python.org/3/library/typing.html#typing.Tuple) | `lengths: typing.Tuple[int, ...]`\n", @@ -210,8 +210,8 @@ "source": [ "### Mutability\n", "\n", - "Extension types are required to be immutable. This ensures that they can be properly tracked by TensorFlow's graph-tracing mechanisms.\n", - "If you find yourself wanting to mutate an extension type value, consider instead defining methods that transform values. For example, rather than defining a `set_mask` method to mutate a `MaskedTensor`, you could define a `replace_mask` method that returns a new `MaskedTensor`:" + "Extension types are required to be immutable. This ensures that they can be properly tracked by TensorFlow's graph-tracing mechanisms.\n", + "If you find yourself wanting to mutate an extension type value, consider instead defining methods that transform values. For example, rather than defining a `set_mask` method to mutate a `MaskedTensor`, you could define a `replace_mask` method that returns a new `MaskedTensor`:" ] }, { @@ -249,7 +249,7 @@ "* A nested `TypeSpec`.\n", "* Tensor API dispatch support.\n", "\n", - "See the \"Customizing ExtensionTypes\" section below for more information on customizing this functionality." + "Go to the \"Customizing `ExtensionType`s\" section below for more information on customizing this functionality." ] }, { @@ -259,7 +259,7 @@ }, "source": [ "### Constructor\n", - "The constructor added by `ExtensionType` takes each field as a named argument (in the order they were listed in the class definition). This constructor will type-check each parameter, and convert them where necessary. In particular, `Tensor` fields are converted using `tf.convert_to_tensor`; `Tuple` fields are converted to `tuple`s; and `Mapping` fields are converted to immutable dicts." + "The constructor added by `ExtensionType` takes each field as a named argument (in the order they were listed in the class definition). This constructor will type-check each parameter, and convert them where necessary. In particular, `Tensor` fields are converted using `tf.convert_to_tensor`; `Tuple` fields are converted to `tuple`s; and `Mapping` fields are converted to immutable dicts." ] }, { @@ -279,7 +279,7 @@ " mask=[[True, True, False], [True, False, True]])\n", "\n", "# Fields are type-checked and converted to the declared types.\n", - "# E.g., mt.values is converted to a Tensor.\n", + "# For example, `mt.values` is converted to a Tensor.\n", "print(mt.values)" ] }, @@ -372,7 +372,7 @@ "source": [ "### Equality operators\n", "\n", - "`ExtensionType` adds default equality operators (`__eq__` and `__ne__`) that consider two values equal if they have the same type and all their fields are equal. Tensor fields are considered equal if they have the same shape and are elementwise equal for all elements." + "`ExtensionType` adds default equality operators (`__eq__` and `__ne__`) that consider two values equal if they have the same type and all their fields are equal. Tensor fields are considered equal if they have the same shape and are elementwise equal for all elements." ] }, { @@ -407,9 +407,9 @@ "source": [ "### Validation method\n", "\n", - "`ExtensionType` adds a `__validate__` method, which can be overriden to perform validation checks on fields. It is run after the constructor is called, and after fields have been type-checked and converted to their declared types, so it can assume that all fields have their declared types.\n", + "`ExtensionType` adds a `__validate__` method, which can be overridden to perform validation checks on fields. It is run after the constructor is called, and after fields have been type-checked and converted to their declared types, so it can assume that all fields have their declared types.\n", "\n", - "he following example updates `MaskedTensor` to validate the `shape`s and `dtype`s of its fields:" + "The following example updates `MaskedTensor` to validate the `shape`s and `dtype`s of its fields:" ] }, { @@ -438,7 +438,7 @@ "outputs": [], "source": [ "try:\n", - " MaskedTensor([1, 2, 3], [0, 1, 0]) # wrong dtype for mask.\n", + " MaskedTensor([1, 2, 3], [0, 1, 0]) # Wrong `dtype` for mask.\n", "except AssertionError as e:\n", " print(f\"Got expected AssertionError: {e}\")" ] @@ -531,7 +531,7 @@ "\n", "Each `ExtensionType` class has a corresponding `TypeSpec` class, which is created automatically and stored as `.Spec`.\n", "\n", - "This class captures all the information from a value *except* for the values of any nested tensors. In particular, the `TypeSpec` for a value is created by replacing any nested Tensor, ExtensionType, or CompositeTensor with its `TypeSpec`.\n" + "This class captures all the information from a value *except* for the values of any nested tensors. In particular, the `TypeSpec` for a value is created by replacing any nested Tensor, ExtensionType, or CompositeTensor with its `TypeSpec`.\n" ] }, { @@ -548,7 +548,7 @@ "\n", "anne = Player(\"Anne\", {\"height\": 8.3, \"speed\": 28.1})\n", "anne_spec = tf.type_spec_from_value(anne)\n", - "print(anne_spec.name) # Records dtype and shape, but not the string value.\n", + "print(anne_spec.name) # Records `dtype` and `shape`, but not the string value.\n", "print(anne_spec.attributes) # Records keys and TensorSpecs for values." ] }, @@ -652,13 +652,13 @@ "id": "gX613uRk0qLz" }, "source": [ - "## Customizing ExtensionTypes\n", + "## Customizing `ExtensionType`s\n", "\n", "In addition to simply declaring fields and their types, extension types may:\n", "\n", "* Override the default printable representation (`__repr__`).\n", "* Define methods.\n", - "* Define classmethods and staticmethods.\n", + "* Define `classmethod`s and `staticmethod`s.\n", "* Define properties.\n", "* Override the default constructor (`__init__`).\n", "* Override the default equality operator (`__eq__`).\n", @@ -675,7 +675,7 @@ "source": [ "### Overriding the default printable representation\n", "\n", - "You can override this default string conversion operator for extension types. The following example updates the `MaskedTensor` class to generate a more readable string representation when values are printed in Eager mode." + "You can override this default string conversion operator for extension types. The following example updates the `MaskedTensor` class to generate a more readable string representation when values are printed in Eager mode." ] }, { @@ -719,7 +719,7 @@ "source": [ "### Defining methods\n", "\n", - "Extension types may define methods, just like any normal Python class. For example, the `MaskedTensor` type could define a `with_default` method that returns a copy of `self` with masked values replaced by a given `default` value. Methods may optionally be annotated with the `@tf.function` decorator." + "Extension types may define methods, just like any normal Python class. For example, the `MaskedTensor` type could define a `with_default` method that returns a copy of `self` with masked values replaced by a given `default` value. Methods may optionally be annotated with the `@tf.function` decorator." ] }, { @@ -746,9 +746,9 @@ "id": "Qwd_gGKp9RP0" }, "source": [ - "### Defining classmethods and staticmethods\n", + "### Defining `classmethod`s and `staticmethod`s\n", "\n", - "Extension types may define methods using the `@classmethod` and `@staticmethod` decorators. For example, the `MaskedTensor` type could define a factory method that masks any element with a given value:" + "Extension types may define methods using the `@classmethod` and `@staticmethod` decorators. For example, the `MaskedTensor` type could define a factory method that masks any element with a given value:" ] }, { @@ -768,7 +768,7 @@ "\n", " @staticmethod\n", " def from_tensor_and_value_to_mask(values, value_to_mask):\n", - " return MaskedTensor(values, values == value_to_mask)\n", + " return MaskedTensor(values, values != value_to_mask)\n", "\n", "x = tf.constant([[1, 0, 2], [3, 0, 0]])\n", "MaskedTensor.from_tensor_and_value_to_mask(x, 0)" @@ -781,7 +781,7 @@ }, "source": [ "### Defining properties\n", - "Extension types may define properties using the `@property` decorator, just like any normal Python class. For example, the `MaskedTensor` type could define a `dtype` property that's a shorthand for the dtype of the values:" + "Extension types may define properties using the `@property` decorator, just like any normal Python class. For example, the `MaskedTensor` type could define a `dtype` property that's a shorthand for the `dtype` of the values:" ] }, { @@ -811,7 +811,7 @@ "source": [ "### Overriding the default constructor\n", "\n", - "You can override the default constructor for extension types. Custom constructors must set a value for every declared field; and after the custom constructor returns, all fields will be type-checked, and values will be converted as described above." + "You can override the default constructor for extension types. Custom constructors must set a value for every declared field; and after the custom constructor returns, all fields will be type-checked, and values will be converted as described above." ] }, { @@ -838,7 +838,7 @@ "id": "qyQxMlwLFQt7" }, "source": [ - "Alternatively, you might consider leaving the default constructor as-is, but adding one or more factory methods. E.g.:" + "Alternatively, you might consider leaving the default constructor as-is, but adding one or more factory methods. For example:" ] }, { @@ -868,7 +868,7 @@ "source": [ "### Overriding the default equality operator (`__eq__`)\n", "\n", - "You can override the default `__eq__` operator for extension types. The follow example updates `MaskedTensor` to ignore masked elements when comparing for equality." + "You can override the default `__eq__` operator for extension types. The following example updates `MaskedTensor` to ignore masked elements when comparing for equality." ] }, { @@ -913,7 +913,7 @@ "source": [ "### Using forward references\n", "\n", - "If the type for a field has not been defined yet, you may use a string containing the name of the type instead. In the following example, the string `\"Node\"` is used to annotate the `children` field because the `Node` type hasn't been (fully) defined yet.\n" + "If the type for a field has not been defined yet, you may use a string containing the name of the type instead. In the following example, the string `\"Node\"` is used to annotate the `children` field because the `Node` type hasn't been (fully) defined yet.\n" ] }, { @@ -939,7 +939,7 @@ "source": [ "### Defining subclasses\n", "\n", - "Extension types may be subclassed using the standard Python syntax. Extension type subclasses may add new fields, methods, and properties; and may override the constructor, the printable representation, and the equality operator. The following example defines a basic `TensorGraph` class that uses three `Tensor` fields to encode a set of edges between nodes. It then defines a subclass that adds a `Tensor` field to record a \"feature value\" for each node. The subclass also defines a method to propagage the feature values along the edges." + "Extension types may be subclassed using the standard Python syntax. Extension type subclasses may add new fields, methods, and properties; and may override the constructor, the printable representation, and the equality operator. The following example defines a basic `TensorGraph` class that uses three `Tensor` fields to encode a set of edges between nodes. It then defines a subclass that adds a `Tensor` field to record a \"feature value\" for each node. The subclass also defines a method to propagate the feature values along the edges." ] }, { @@ -981,7 +981,7 @@ "source": [ "### Defining private fields\n", "\n", - "An extension type's fields may be marked private by prefixing them with an underscore (following standard Python conventions). This does not impact the way that TensorFlow treats the fields in any way; but simply serves as a signal to any users of the extension type that those fields are private.\n" + "An extension type's fields may be marked private by prefixing them with an underscore (following standard Python conventions). This does not impact the way that TensorFlow treats the fields in any way; but simply serves as a signal to any users of the extension type that those fields are private.\n" ] }, { @@ -990,15 +990,15 @@ "id": "oMdH7ORqh8Pl" }, "source": [ - "### Customizing the ExtensionType's `TypeSpec`\n", + "### Customizing the `ExtensionType`'s `TypeSpec`\n", "\n", - "Each `ExtensionType` class has a corresponding `TypeSpec` class, which is created automatically and stored as `.Spec`. For more information, see the section \"Nested TypeSpec\" above.\n", + "Each `ExtensionType` class has a corresponding `TypeSpec` class, which is created automatically and stored as `.Spec`. For more information, see the section \"Nested TypeSpec\" above.\n", "\n", - "To customize the `TypeSpec`, simply define your own nested class named `Spec`, and `ExtensionType` will use that as the basis for the automatically constructed `TypeSpec`. You can customize the `Spec` class by:\n", + "To customize the `TypeSpec`, simply define your own nested class named `Spec`, and `ExtensionType` will use that as the basis for the automatically constructed `TypeSpec`. You can customize the `Spec` class by:\n", "\n", "* Overriding the default printable representation.\n", "* Overriding the default constructor.\n", - "* Defining methods, classmethods, staticmethods, and properties.\n", + "* Defining methods, `classmethod`s, `staticmethod`s, and properties.\n", "\n", "The following example customizes the `MaskedTensor.Spec` class to make it easier to use:" ] @@ -1053,10 +1053,10 @@ "source": [ "## Tensor API dispatch\n", "\n", - "Extension types can be \"tensor-like\", in the sense that they specialize or extend the interface defined by the `tf.Tensor` type. Examples of tensor-like extension types include `RaggedTensor`, `SparseTensor`, and `MaskedTensor`. ***Dispatch decorators*** can be used to override the default behavior of TensorFlow operations when applied to tensor-like extension types. TensorFlow currently defines three dispatch decorators:\n", + "Extension types can be \"tensor-like\", in the sense that they specialize or extend the interface defined by the `tf.Tensor` type. Examples of tensor-like extension types include `RaggedTensor`, `SparseTensor`, and `MaskedTensor`. ***Dispatch decorators*** can be used to override the default behavior of TensorFlow operations when applied to tensor-like extension types. TensorFlow currently defines three dispatch decorators:\n", "\n", "* `@tf.experimental.dispatch_for_api(tf_api)`\n", - "* `@tf.experimental.dispatch_for_unary_elementwise_api(x_type)`\n", + "* `@tf.experimental.dispatch_for_unary_elementwise_apis(x_type)`\n", "* `@tf.experimental.dispatch_for_binary_elementwise_apis(x_type, y_type)`" ] }, @@ -1068,7 +1068,7 @@ "source": [ "### Dispatch for a single API\n", "\n", - "The `tf.experimental.dispatch_for_api` decorator overrides the default behavior of a specified TensorFlow operation when it is called with the specified signature. For example, you can use this decorator to specify how `tf.stack` should process `MaskedTensor` values:" + "The `tf.experimental.dispatch_for_api` decorator overrides the default behavior of a specified TensorFlow operation when it is called with the specified signature. For example, you can use this decorator to specify how `tf.stack` should process `MaskedTensor` values:" ] }, { @@ -1159,9 +1159,9 @@ "source": [ "### Dispatch for all unary elementwise APIs\n", "\n", - "The `tf.experimental.dispatch_for_unary_elementwise_apis` decorator overrides the default behavior of ***all*** unary elementwise ops (such as `tf.math.cos`) whenever the value for the first argument (typically named `x`) matches the type annotation `x_type`. The decorated function should take two arguments:\n", + "The `tf.experimental.dispatch_for_unary_elementwise_apis` decorator overrides the default behavior of ***all*** unary elementwise ops (such as `tf.math.cos`) whenever the value for the first argument (typically named `x`) matches the type annotation `x_type`. The decorated function should take two arguments:\n", "\n", - "* `api_func`: A function that takes a single parameter and performs the elementwise operation (e.g., `tf.abs`).\n", + "* `api_func`: A function that takes a single parameter and performs the elementwise operation (for example, `tf.abs`).\n", "* `x`: The first argument to the elementwise operation.\n", "\n", "The following example updates all unary elementwise operations to handle the `MaskedTensor` type:" @@ -1255,7 +1255,7 @@ "id": "txTGg9pzG0Ux" }, "source": [ - "For a list of the elementwise APIs that are overridden, see the API documentation for `tf.experimental.dispatch_for_unary_elementwise_apis` and `tf.experimental.dispatch_for_binary_elementwise_apis`." + "For a list of the elementwise APIs that are overridden, go to the API documentation for `tf.experimental.dispatch_for_unary_elementwise_apis` and `tf.experimental.dispatch_for_binary_elementwise_apis`." ] }, { @@ -1264,12 +1264,12 @@ "id": "UseRtohYKiE5" }, "source": [ - "## Batchable ExtensionTypes\n", + "## Batchable `ExtensionType`s\n", "\n", - "An `ExtensionType` is *batchable* if a single instance can be used to represent a batch of values. Typically, this is accomplished by adding batch dimensions to all nested `Tensor`s. The following TensorFlow APIs require that any extension type inputs be batchable:\n", + "An `ExtensionType` is *batchable* if a single instance can be used to represent a batch of values. Typically, this is accomplished by adding batch dimensions to all nested `Tensor`s. The following TensorFlow APIs require that any extension type inputs be batchable:\n", "\n", "* `tf.data.Dataset` (`batch`, `unbatch`, `from_tensor_slices`)\n", - "* `tf.Keras` (`fit`, `evaluate`, `predict`)\n", + "* `tf.keras` (`fit`, `evaluate`, `predict`)\n", "* `tf.map_fn`" ] }, @@ -1279,10 +1279,10 @@ "id": "hWPauKGj_yRz" }, "source": [ - "By default, `BatchableExtensionType` creates batched values by batching any nested `Tensor`s, `CompositeTensor`s, and `ExtensionType`s. If this is not appropriate for your class, then you will need to use `tf.experimental.ExtensionTypeBatchEncoder` to override this default behavior. For example, it would not be appropriate to create a batch of `tf.SparseTensor` values by simply stacking individual sparse tensors' `values`, `indices`, and `dense_shape` fields -- in most cases, you can't stack these tensors, since they have incompatible shapes; and even if you could, the result would not be a valid `SparseTensor`.\n", + "By default, `BatchableExtensionType` creates batched values by batching any nested `Tensor`s, `CompositeTensor`s, and `ExtensionType`s. If this is not appropriate for your class, then you will need to use `tf.experimental.ExtensionTypeBatchEncoder` to override this default behavior. For example, it would not be appropriate to create a batch of `tf.SparseTensor` values by simply stacking individual sparse tensors' `values`, `indices`, and `dense_shape` fields -- in most cases, you can't stack these tensors, since they have incompatible shapes; and even if you could, the result would not be a valid `SparseTensor`.\n", "\n", "\n", - "**Note**: `BatchableExtensionType`s do *not* automatically define dispatchers for `tf.stack`, `tf.concat`, `tf.slice`, etc. If your class needs to be supported by these APIs, then use the dispatch decorators described above." + "**Note**: `BatchableExtensionType`s do *not* automatically define dispatchers for `tf.stack`, `tf.concat`, `tf.slice`, etc. If your class needs to be supported by these APIs, then use the dispatch decorators described above." ] }, { @@ -1291,7 +1291,7 @@ "id": "xkOJ8ke8GH7s" }, "source": [ - "### BatchableExtensionType example: Network\n", + "### `BatchableExtensionType` example: `Network`\n", "As an example, consider a simple `Network` class used for load balancing, which tracks how much work is left to do at each node, and how much bandwidth is available to move work between nodes:" ] }, @@ -1317,7 +1317,7 @@ "id": "PaOzUev6g3wT" }, "source": [ - "To make this type batchable, change the base type to `BatchableExtensionType`, and adjust the shape of each field to include optional batch dimensions. The following example also adds a `shape` field to keept track of the batch shape. This `shape` field is not required by `tf.data.Dataset` or `tf.map_fn`, but it *is* required by `tf.Keras`." + "To make this type batchable, change the base type to `BatchableExtensionType`, and adjust the shape of each field to include optional batch dimensions. The following example also adds a `shape` field to keep track of the batch shape. This `shape` field is not required by `tf.data.Dataset` or `tf.map_fn`, but it *is* required by `tf.keras`." ] }, { @@ -1329,7 +1329,7 @@ "outputs": [], "source": [ "class Network(tf.experimental.BatchableExtensionType):\n", - " shape: tf.TensorShape # batch shape. A single network has shape=[].\n", + " shape: tf.TensorShape # batch shape. A single network has shape=[].\n", " work: tf.Tensor # work[*shape, n] = work left to do at node n\n", " bandwidth: tf.Tensor # bandwidth[*shape, n1, n2] = bandwidth from n1->n2\n", "\n", @@ -1426,7 +1426,7 @@ "id": "f_HLsTT02Xul" }, "source": [ - "## TensorFlow APIs that support ExtensionTypes" + "## TensorFlow APIs that support `ExtensionType`s" ] }, { @@ -1437,7 +1437,7 @@ "source": [ "### @tf.function\n", "\n", - "[tf.function](https://www.tensorflow.org/guide/function) is a decorator that precomputes TensorFlow graphs for Python functions, which can substantially improve the performance of your TensorFlow code. Extension type values can be used transparently with `@tf.function`-decorated functions." + "[`tf.function`](https://www.tensorflow.org/guide/function) is a decorator that precomputes TensorFlow graphs for Python functions, which can substantially improve the performance of your TensorFlow code. Extension type values can be used transparently with `@tf.function`-decorated functions." ] }, { @@ -1532,7 +1532,7 @@ }, "outputs": [], "source": [ - "# Example: using tf.cond to select between two MaskedTensors. Note that the\n", + "# Example: using tf.cond to select between two MaskedTensors. Note that the\n", "# two MaskedTensors don't need to have the same shape.\n", "a = MaskedTensor([1., 2, 3], [True, False, True])\n", "b = MaskedTensor([22., 33, 108, 55], [True, True, True, False])\n", @@ -1563,7 +1563,7 @@ "source": [ "### Autograph control flow\n", "\n", - "Extension types are also supported by control flow statements in tf.function (using autograph). In the following example, the `if` statement and `for` statements are automatically converted to `tf.cond` and `tf.while_loop` operations, which support extension types." + "Extension types are also supported by control flow statements in `tf.function` (using autograph). In the following example, the `if` statement and `for` statements are automatically converted to `tf.cond` and `tf.while_loop` operations, which support extension types." ] }, { @@ -1596,10 +1596,10 @@ "source": [ "### Keras\n", "\n", - "[tf.keras](https://www.tensorflow.org/guide/keras) is TensorFlow's high-level API for building and training deep learning models. Extension types may be passed as inputs to a Keras model, passed between Keras layers, and returned by Keras models. Keras currently puts two requirements on extension types:\n", + "[tf.keras](https://www.tensorflow.org/guide/keras) is TensorFlow's high-level API for building and training deep learning models. Extension types may be passed as inputs to a Keras model, passed between Keras layers, and returned by Keras models. Keras currently puts two requirements on extension types:\n", "\n", - "* They must be batchable (see \"Batchable ExtensionTypes\" above).\n", - "* The must have a field or property named `shape`. `shape[0]` is assumed to be the batch dimension.\n", + "* They must be batchable (go to \"Batchable `ExtensionType`s\" above).\n", + "* They must have a field or property named `shape`. `shape[0]` is assumed to be the batch dimension.\n", "\n", "The following two subsections give examples showing how extension types can be used with Keras.\n" ] @@ -1612,7 +1612,7 @@ "source": [ "#### Keras example: `Network`\n", "\n", - "For the first example, consider the `Network` class defined in the \"Batchable ExtensionTypes\" section above, which can be used for load balancing work between nodes. Its definition is repeated here:" + "For the first example, consider the `Network` class defined in the \"Batchable `ExtensionType`s\" section above, which can be used for load balancing work between nodes. Its definition is repeated here:" ] }, { @@ -1624,7 +1624,7 @@ "outputs": [], "source": [ "class Network(tf.experimental.BatchableExtensionType):\n", - " shape: tf.TensorShape # batch shape. A single network has shape=[].\n", + " shape: tf.TensorShape # batch shape. A single network has shape=[].\n", " work: tf.Tensor # work[*shape, n] = work left to do at node n\n", " bandwidth: tf.Tensor # bandwidth[*shape, n1, n2] = bandwidth from n1->n2\n", "\n", @@ -1647,7 +1647,7 @@ }, "outputs": [], "source": [ - "single_network = Network( # A single network w/ 4 nodes.\n", + "single_network = Network( # A single network with 4 nodes.\n", " work=[8.0, 5, 12, 2],\n", " bandwidth=[[0.0, 1, 2, 2], [1, 0, 0, 2], [2, 0, 0, 1], [2, 2, 1, 0]])\n", "\n", @@ -1679,7 +1679,7 @@ " Shifts work from more busy nodes to less busy nodes, constrained by bandwidth.\n", " \"\"\"\n", " def call(self, inputs):\n", - " # This function is defined above, in \"Batchable ExtensionTypes\" section.\n", + " # This function is defined above in the \"Batchable `ExtensionType`s\" section.\n", " return balance_work_greedy(inputs)" ] }, @@ -1689,7 +1689,7 @@ "id": "VWwFJNb1E03q" }, "source": [ - "You can then use this layers to create a simple model. To feed an `ExtensionType` into a model, you can use a `tf.keras.layer.Input` layer with `type_spec` set to the extension type's `TypeSpec`. If the Keras model will be used to process batches, then the `type_spec` must include the batch dimension." + "You can then use these layers to create a simple model. To feed an `ExtensionType` into a model, you can use a `tf.keras.layer.Input` layer with `type_spec` set to the extension type's `TypeSpec`. If the Keras model will be used to process batches, then the `type_spec` must include the batch dimension." ] }, { @@ -1748,7 +1748,7 @@ "source": [ "#### Keras example: MaskedTensor\n", "\n", - "In this example, `MaskedTensor` is extended to support `Keras`. `shape` is defined as a property that is calculated from the `values` field. Keras requires thatyou add this property to both the extension type and its `TypeSpec`. `MaskedTensor` also defines a `__name__` variable, which will be required for `SavedModel` serialization (below)." + "In this example, `MaskedTensor` is extended to support `Keras`. `shape` is defined as a property that is calculated from the `values` field. Keras requires that you add this property to both the extension type and its `TypeSpec`. `MaskedTensor` also defines a `__name__` variable, which will be required for `SavedModel` serialization (below)." ] }, { @@ -1794,7 +1794,7 @@ "id": "oer8BVc8H7_V" }, "source": [ - "Next, the dispatch decorators are used to override the default behavior of several TensorFlow APIs. Since these APIs are used by standard Keras layers (such as the `Dense` layer), overriding these will allow us to use those layers with `MaskedTensor`. For the purposes of this example, `matmul` for masked tensors is defined to treat the masked values as zeros (i.e., to not include them in the product)." + "Next, the dispatch decorators are used to override the default behavior of several TensorFlow APIs. Since these APIs are used by standard Keras layers (such as the `Dense` layer), overriding these will allow us to use those layers with `MaskedTensor`. For the purposes of this example, `matmul` for masked tensors is defined to treat the masked values as zeros (that is, to not include them in the product)." ] }, { @@ -1822,13 +1822,17 @@ " transpose_a=False, transpose_b=False,\n", " adjoint_a=False, adjoint_b=False,\n", " a_is_sparse=False, b_is_sparse=False,\n", - " output_type=None):\n", + " output_type=None,\n", + " grad_a=False, grad_b=False,\n", + " name=None,\n", + " ):\n", " if isinstance(a, MaskedTensor):\n", " a = a.with_default(0)\n", " if isinstance(b, MaskedTensor):\n", " b = b.with_default(0)\n", " return tf.matmul(a, b, transpose_a, transpose_b, adjoint_a,\n", - " adjoint_b, a_is_sparse, b_is_sparse, output_type)" + " adjoint_b, a_is_sparse, b_is_sparse,\n", + " output_type)" ] }, { @@ -1881,7 +1885,7 @@ "\n", "A [SavedModel](https://www.tensorflow.org/guide/saved_model) is a serialized TensorFlow program, including both weights and computation. It can be built from a Keras model or from a custom model. In either case, extension types can be used transparently with the functions and methods defined by a SavedModel.\n", "\n", - "SavedModel can save models, layers, and functions that process extension types, as long as the extension types have a `__name__` field. This name is used to register the extension type, so it can be located when the model is loaded." + "SavedModel can save models, layers, and functions that process extension types, as long as the extension types have a `__name__` field. This name is used to register the extension type, so it can be located when the model is loaded." ] }, { @@ -1954,9 +1958,9 @@ "id": "o6beljh576ee" }, "source": [ - "#### Loading a SavedModel when the ExtensionType is unavailable\n", + "#### Loading a SavedModel when the `ExtensionType` is unavailable\n", "\n", - "If you load a `SavedModel` that uses an `ExtensionType`, but that `ExtensionType` is not available (i.e., has not been imported), then you will see a warning and TensorFlow will fall back to using an \"anonymous extension type\" object. This object will have the same fields as the original type, but will lack any further customization you have added for the type, such as custom methods or properties." + "If you load a `SavedModel` that uses an `ExtensionType`, but that `ExtensionType` is not available (that is, it has not been imported), then you will get a warning and TensorFlow will fall back to using an \"anonymous extension type\" object. This object will have the same fields as the original type, but will lack any further customization you have added for the type, such as custom methods or properties." ] }, { @@ -1965,9 +1969,9 @@ "id": "ec9PcUkJ9bFK" }, "source": [ - "#### Using ExtensionTypes with TensorFlow serving\n", + "#### Using `ExtensionType`s with TensorFlow Serving\n", "\n", - "Currently, [TensorFlow serving](https://www.tensorflow.org/tfx/guide/serving) (and other consumers of the SavedModel \"signatures\" dictionary) require that all inputs and outputs be raw tensors. If you wish to use TensorFlow serving with a model that uses extension types, then you can add wrapper methods that compose or decompose extension type values from tensors. E.g.:" + "Currently, [TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) (and other consumers of the SavedModel \"signatures\" dictionary) require that all inputs and outputs be raw tensors. If you wish to use TensorFlow Serving with a model that uses extension types, then you can add wrapper methods that compose or decompose extension type values from tensors. For example:" ] }, { @@ -2012,9 +2016,9 @@ "id": "4dwBadWQ5G9_" }, "source": [ - "### Datasets\n", + "### `Dataset`s\n", "\n", - "[tf.data](https://www.tensorflow.org/guide/data) is an API that enables you to build complex input pipelines from simple, reusable pieces. Its core data structure is `tf.data.Dataset`, which represents a sequence of elements, in which each element consists of one or more components." + "[`tf.data`](https://www.tensorflow.org/guide/data) is an API that enables you to build complex input pipelines from simple, reusable pieces. Its core data structure is `tf.data.Dataset`, which represents a sequence of elements, in which each element consists of one or more components." ] }, { @@ -2023,7 +2027,7 @@ "id": "GcIR19FuwRJV" }, "source": [ - "#### Building Datasets with extension types\n", + "#### Building `Dataset`s with extension types\n", "\n", "Datasets can be built from extension type values using `Dataset.from_tensors`, `Dataset.from_tensor_slices`, or `Dataset.from_generator`:" ] @@ -2078,9 +2082,9 @@ "id": "wfEm4NInyqtj" }, "source": [ - "#### Batching and unbatching Datasets with extension types\n", + "#### Batching and unbatching `Dataset`s with extension types\n", "\n", - "Datasets with extension types can be batchand and unbatched using `Dataset.batch` adn `Dataset.unbatch`." + "Datasets with extension types can be batchand and unbatched using `Dataset.batch` and `Dataset.unbatch`." ] }, { diff --git a/site/en/guide/function.ipynb b/site/en/guide/function.ipynb index e39c65234b1..f4677f21eb8 100644 --- a/site/en/guide/function.ipynb +++ b/site/en/guide/function.ipynb @@ -61,7 +61,7 @@ "id": "J122XQYG7W6w" }, "source": [ - "In TensorFlow 2, [eager execution](eager.ipynb) is turned on by default. The user interface is intuitive and flexible (running one-off operations is much easier and faster), but this can come at the expense of performance and deployability.\n", + "In TensorFlow 2, [eager execution](basics.ipynb) is turned on by default. The user interface is intuitive and flexible (running one-off operations is much easier and faster), but this can come at the expense of performance and deployability.\n", "\n", "You can use `tf.function` to make graphs out of your programs. It is a transformation tool that creates Python-independent dataflow graphs out of your Python code. This will help you create performant and portable models, and it is required to use `SavedModel`.\n", "\n", @@ -146,7 +146,7 @@ "source": [ "### Usage\n", "\n", - "A `Function` you define (for example by applying the `@tf.function` decorator) is just like a core TensorFlow operation: You can execute it eagerly; you can compute gradients; and so on." + "A `tf.function` that you define (for example by applying the `@tf.function` decorator) is just like a core TensorFlow operation: You can execute it eagerly; you can compute gradients; and so on." ] }, { @@ -157,7 +157,7 @@ }, "outputs": [], "source": [ - "@tf.function # The decorator converts `add` into a `Function`.\n", + "@tf.function # The decorator converts `add` into a `PolymorphicFunction`.\n", "def add(a, b):\n", " return a + b\n", "\n", @@ -184,7 +184,7 @@ "id": "ocWZvqrmHnmX" }, "source": [ - "You can use `Function`s inside other `Function`s." + "You can use `tf.function`s inside other `tf.function`s." ] }, { @@ -208,7 +208,7 @@ "id": "piBhz7gYsHqU" }, "source": [ - "`Function`s can be faster than eager code, especially for graphs with many small ops. But for graphs with a few expensive ops (like convolutions), you may not see much speedup.\n" + "`tf.function`s can be faster than eager code, especially for graphs with many small ops. But for graphs with a few expensive ops (like convolutions), you may not see much speedup.\n" ] }, { @@ -242,7 +242,7 @@ "source": [ "### Tracing\n", "\n", - "This section exposes how `Function` works under the hood, including implementation details *which may change in the future*. However, once you understand why and when tracing happens, it's much easier to use `tf.function` effectively!" + "This section exposes how `tf.function` works under the hood, including implementation details *which may change in the future*. However, once you understand why and when tracing happens, it's much easier to use `tf.function` effectively!" ] }, { @@ -253,17 +253,17 @@ "source": [ "#### What is \"tracing\"?\n", "\n", - "A `Function` runs your program in a [TensorFlow Graph](https://www.tensorflow.org/guide/intro_to_graphs#what_are_graphs). However, a `tf.Graph` cannot represent all the things that you'd write in an eager TensorFlow program. For instance, Python supports polymorphism, but `tf.Graph` requires its inputs to have a specified data type and dimension. Or you may perform side tasks like reading command-line arguments, raising an error, or working with a more complex Python object; none of these things can run in a `tf.Graph`.\n", + "A `tf.function` runs your program in a [TensorFlow Graph](https://www.tensorflow.org/guide/intro_to_graphs#what_are_graphs). However, a `tf.Graph` cannot represent all the things that you'd write in an eager TensorFlow program. For instance, Python supports polymorphism, but `tf.Graph` requires its inputs to have a specified data type and dimension. Or you may perform side tasks like reading command-line arguments, raising an error, or working with a more complex Python object; none of these things can run in a `tf.Graph`.\n", "\n", - "`Function` bridges this gap by separating your code in two stages:\n", + "`tf.function` bridges this gap by separating your code in two stages:\n", "\n", - " 1) In the first stage, referred to as \"**tracing**\", `Function` creates a new `tf.Graph`. Python code runs normally, but all TensorFlow operations (like adding two Tensors) are *deferred*: they are captured by the `tf.Graph` and not run.\n", + " 1) In the first stage, referred to as \"**tracing**\", `tf.function` creates a new `tf.Graph`. Python code runs normally, but all TensorFlow operations (like adding two Tensors) are *deferred*: they are captured by the `tf.Graph` and not run.\n", "\n", " 2) In the second stage, a `tf.Graph` which contains everything that was deferred in the first stage is run. This stage is much faster than the tracing stage.\n", "\n", - "Depending on its inputs, `Function` will not always run the first stage when it is called. See [\"Rules of tracing\"](#rules_of_tracing) below to get a better sense of how it makes that determination. Skipping the first stage and only executing the second stage is what gives you TensorFlow's high performance.\n", + "Depending on its inputs, `tf.function` will not always run the first stage when it is called. See [\"Rules of tracing\"](#rules_of_tracing) below to get a better sense of how it makes that determination. Skipping the first stage and only executing the second stage is what gives you TensorFlow's high performance.\n", "\n", - "When `Function` does decide to trace, the tracing stage is immediately followed by the second stage, so calling the `Function` both creates and runs the `tf.Graph`. Later you will see how you can run only the tracing stage with [`get_concrete_function`](#obtaining_concrete_functions)." + "When `tf.function` does decide to trace, the tracing stage is immediately followed by the second stage, so calling the `tf.function` both creates and runs the `tf.Graph`. Later you will see how you can run only the tracing stage with [`get_concrete_function`](#obtaining_concrete_functions)." ] }, { @@ -272,7 +272,7 @@ "id": "K7scSzLx662f" }, "source": [ - "When you pass arguments of different types into a `Function`, both stages are run:\n" + "When you pass arguments of different types into a `tf.function`, both stages are run:\n" ] }, { @@ -302,7 +302,7 @@ "id": "QPfouGUQrcNb" }, "source": [ - "Note that if you repeatedly call a `Function` with the same argument type, TensorFlow will skip the tracing stage and reuse a previously traced graph, as the generated graph would be identical." + "Note that if you repeatedly call a `tf.function` with the same argument type, TensorFlow will skip the tracing stage and reuse a previously traced graph, as the generated graph would be identical." ] }, { @@ -346,10 +346,11 @@ "So far, you've seen that `tf.function` creates a cached, dynamic dispatch layer over TensorFlow's graph tracing logic. To be more specific about the terminology:\n", "\n", "- A `tf.Graph` is the raw, language-agnostic, portable representation of a TensorFlow computation.\n", - "- A `ConcreteFunction` wraps a `tf.Graph`.\n", - "- A `Function` manages a cache of `ConcreteFunction`s and picks the right one for your inputs.\n", - "- `tf.function` wraps a Python function, returning a `Function` object.\n", - "- **Tracing** creates a `tf.Graph` and wraps it in a `ConcreteFunction`, also known as a **trace.**\n" + "- Tracing is the process through which new `tf.Graph`s are generated from Python code.\n", + "- An instance of `tf.Graph` is specialized to the specific input types it was traced with. Differing types require retracing.\n", + "- Each traced `tf.Graph` has a corresponding `ConcreteFunction`.\n", + "- A `tf.function` manages a cache of `ConcreteFunction`s and picks the right one for your inputs.\n", + "- `tf.function` wraps the Python function that will be traced, returning a `tf.types.experimental.PolymorphicFunction` object.\n" ] }, { @@ -360,18 +361,22 @@ "source": [ "#### Rules of tracing\n", "\n", - "When called, a `Function` matches the call arguments to existing `ConcreteFunction`s using `tf.types.experimental.TraceType` of each argument. If a matching `ConcreteFunction` is found, the call is dispatched to it. If no match is found, a new `ConcreteFunction` is traced. \n", + "When called, a `tf.function` first evaluates the type of each input argument using the `tf.types.experimental.TraceType` of each argument. This is used to construct a `tf.types.experimental.FunctionType` describing the signature of the desired `ConcreteFunction`. We compare this `FunctionType` to the `FunctionType`s of existing `ConcreteFunction`s. If a matching `ConcreteFunction` is found, the call is dispatched to it. If no match is found, a new `ConcreteFunction` is traced for the desired `FunctionType`.\n", "\n", - "If multiple matches are found, the most specific signature is chosen. Matching is done by [subtyping](https://en.wikipedia.org/wiki/Subtyping), much like normal function calls in C++ or Java, for instance. For example, `TensorShape([1, 2])` is a subtype of `TensorShape([None, None])` and so a call to the tf.function with `TensorShape([1, 2])` can be dispatched to the `ConcreteFunction` produced with `TensorShape([None, None])` but if a `ConcreteFunction` with `TensorShape([1, None])` also exists then it will prioritized since it is more specific.\n", + "If multiple matches are found, the most specific signature is chosen. Matching is done by [subtyping](https://en.wikipedia.org/wiki/Subtyping), much like normal function calls in C++ or Java, for instance. For example, `TensorShape([1, 2])` is a subtype of `TensorShape([None, None])` and so a call to the tf.function with `TensorShape([1, 2])` can be dispatched to the `ConcreteFunction` produced with `TensorShape([None, None])` but if a `ConcreteFunction` with `TensorShape([1, None])` also exists then it will be prioritized since it is more specific.\n", "\n", "The `TraceType` is determined from input arguments as follows:\n", "* For `Tensor`, the type is parameterized by the `Tensor`'s `dtype` and `shape`; ranked shapes are a subtype of unranked shapes; fixed dimensions are a subtype of unknown dimensions\n", "* For `Variable`, the type is similar to `Tensor`, but also includes a unique resource ID of the variable, necessary to correctly wire control dependencies\n", "* For Python primitive values, the type corresponds to the **value** itself. For example, the `TraceType` of the value `3` is `LiteralTraceType<3>`, not `int`.\n", "* For Python ordered containers such as `list` and `tuple`, etc., the type is parameterized by the types of their elements; for example, the type of `[1, 2]` is `ListTraceType, LiteralTraceType<2>>` and the type for `[2, 1]` is `ListTraceType, LiteralTraceType<1>>` which is different.\n", - "* For Python mappings such as `dict`, the type is also a mapping from the same keys but to the types of values instead the actual values. For example, the type of `{1: 2, 3: 4}`, is `MappingTraceType<>>, >>>`. However, unlike ordered containers, `{1: 2, 3: 4}` and `{3: 4, 1: 2}` have equivalent types.\n", - "* For Python objects which implement the `__tf_tracing_type__` method, the type is whatever that method returns\n", - "* For any other Python objects, the type is a generic `TraceType` which uses the object's Python equality and hashing for matching. (Note: It relies on [weakref](https://docs.python.org/3/library/weakref.html) to the object and hence only works as long as the object is in scope/not deleted.)\n" + "* For Python mappings such as `dict`, the type is also a mapping from the same keys but to the types of values instead of the actual values. For example, the type of `{1: 2, 3: 4}`, is `MappingTraceType<>>, >>>`. However, unlike ordered containers, `{1: 2, 3: 4}` and `{3: 4, 1: 2}` have equivalent types.\n", + "* For Python objects which implement the `__tf_tracing_type__` method, the type is whatever that method returns.\n", + "* For any other Python objects, the type is a generic `TraceType`, and the matching precedure is:\n", + " * First it checks if the object is the same object used in the previous trace (using Python `id()` or `is`). Note that this will still match if the object has changed, so if you use Python objects as `tf.function` arguments it's best to use *immutable* ones.\n", + " * Next it checks if the object is equal to the object used in the previous trace (using Python `==`).\n", + " \n", + " Note that this procedure only keeps a [weakref](https://docs.python.org/3/library/weakref.html) to the object and hence only works as long as the object is in scope/not deleted.\n" ] }, { @@ -380,7 +385,7 @@ "id": "GNNN4lgRzpIs" }, "source": [ - "Note: `TraceType` is based on the `Function` input parameters so changes to global and [free variables](https://docs.python.org/3/reference/executionmodel.html#binding-of-names) alone will not create a new trace. See [this section](#depending_on_python_global_and_free_variables) for recommended practices when dealing with Python global and free variables." + "Note: `TraceType` is based on the `tf.function` input parameters so changes to global and [free variables](https://docs.python.org/3/reference/executionmodel.html#binding-of-names) alone will not create a new trace. See [this section](#depending_on_python_global_and_free_variables) for recommended practices when dealing with Python global and free variables." ] }, { @@ -391,7 +396,7 @@ "source": [ "### Controlling retracing\n", "\n", - "Retracing, which is when your `Function` creates more than one trace, helps ensures that TensorFlow generates correct graphs for each set of inputs. However, tracing is an expensive operation! If your `Function` retraces a new graph for every call, you'll find that your code executes more slowly than if you didn't use `tf.function`.\n", + "Retracing, which is when your `tf.function` creates more than one trace, helps ensure that TensorFlow generates correct graphs for each set of inputs. However, tracing is an expensive operation! If your `tf.function` retraces a new graph for every call, you'll find that your code executes more slowly than if you didn't use `tf.function`.\n", "\n", "To control the tracing behavior, you can use the following techniques:" ] @@ -402,7 +407,9 @@ "id": "EUtycWJa34TT" }, "source": [ - "#### Pass a fixed `input_signature` to `tf.function`" + "#### Pass a fixed `input_signature` to `tf.function`\n", + "\n", + "This forces `tf.function` to constrain itself to only one `tf.types.experimental.FunctionType` composed of the types enumerated by the `input_signature`. Calls that cannot be dispatched to this `FunctionType` will throw an error." ] }, { @@ -420,11 +427,11 @@ "\n", "print(next_collatz(tf.constant([1, 2])))\n", "# You specified a 1-D tensor in the input signature, so this should fail.\n", - "with assert_raises(ValueError):\n", + "with assert_raises(TypeError):\n", " next_collatz(tf.constant([[1, 2], [3, 4]]))\n", "\n", "# You specified an int32 dtype in the input signature, so this should fail.\n", - "with assert_raises(ValueError):\n", + "with assert_raises(TypeError):\n", " next_collatz(tf.constant([1.0, 2.0]))\n" ] }, @@ -436,7 +443,7 @@ "source": [ "#### Use unknown dimensions for flexibility\n", "\n", - " Since TensorFlow matches tensors based on their shape, using a `None` dimension as a wildcard will allow `Function`s to reuse traces for variably-sized input. Variably-sized input can occur if you have sequences of different length, or images of different sizes for each batch (See the [Transformer](../tutorials/text/transformer.ipynb) and [Deep Dream](../tutorials/generative/deepdream.ipynb) tutorials for example)." + " Since TensorFlow matches tensors based on their shape, using a `None` dimension as a wildcard will allow `tf.function`s to reuse traces for variably-sized input. Variably-sized input can occur if you have sequences of different length, or images of different sizes for each batch. You can check out the [Transformer](https://www.tensorflow.org/text/tutorials/transformer) and [Deep Dream](../tutorials/generative/deepdream.ipynb) tutorials for examples." ] }, { @@ -457,6 +464,41 @@ "print(g(tf.constant([1, 2, 3, 4, 5])))\n" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "37cc12f93cbd" + }, + "source": [ + "#### Use `reduce_retracing` for automatic flexibility\n", + "\n", + "When `reduce_retracing` is enabled, `tf.function` automatically identifies supertypes of the input types it is observing and chooses to trace more generalized graphs automatically. It is less efficient than setting the `input_signature` directly but useful when many types need to be supported." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0403fae03a1f" + }, + "outputs": [], + "source": [ + "@tf.function(reduce_retracing=True)\n", + "def g(x):\n", + " print('Tracing with', x)\n", + " return x\n", + "\n", + "# Traces once.\n", + "print(g(tf.constant([1, 2, 3])))\n", + "\n", + "# Traces again, but more generalized this time.\n", + "print(g(tf.constant([1, 2, 3, 4, 5])))\n", + "\n", + "# No more tracing!\n", + "print(g(tf.constant([1, 2, 3, 4, 5, 6, 7])))\n", + "print(g(tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9])))" + ] + }, { "cell_type": "markdown", "metadata": { @@ -504,7 +546,7 @@ "id": "4pJqkDR_Q2wz" }, "source": [ - "If you need to force retracing, create a new `Function`. Separate `Function` objects are guaranteed not to share traces." + "If you need to force retracing, create a new `tf.function`. Separate `tf.function` objects are guaranteed not to share traces." ] }, { @@ -531,9 +573,9 @@ "source": [ "#### Use the tracing protocol\n", "\n", - "Where possible, you should prefer converting the Python type into a [`tf.experimental.ExtensionType`](www.tensorflow.org/api_docs/python/tf/experimental/ExtensionType) instead. Moreover, the `TraceType` of an `ExtensionType` is the `tf.TypeSpec` associated with it. Therefore, if needed, you can simply [override](https://www.tensorflow.org/guide/extension_type#customizing_the_extensiontypes_typespec) the default `tf.TypeSpec` to take control of an `ExtensionType`'s `Tracing Protocol`.\n", + "Where possible, you should prefer converting the Python type into a `tf.experimental.ExtensionType` instead. Moreover, the `TraceType` of an `ExtensionType` is the `tf.TypeSpec` associated with it. Therefore, if needed, you can simply override the default `tf.TypeSpec` to take control of an `ExtensionType`'s `Tracing Protocol`. Refer to the _Customizing the ExtensionType's TypeSpec_ section in the [Extension types](extension_type.ipynb) guide for details.\n", "\n", - "Otherwise, for direct control over when `Function` should retrace in regards to a particular Python type, you can implement the `Tracing Protocol` for it yourself." + "Otherwise, for direct control over when `tf.function` should retrace in regards to a particular Python type, you can implement the `Tracing Protocol` for it yourself." ] }, { @@ -558,38 +600,45 @@ " flavor = tf.constant([3, 4])\n", "\n", "# As described in the above rules, a generic TraceType for `Apple` and `Mango`\n", - "# is generated (and a corresponding ConcreteFunction is traced) but it fails to \n", - "# match the second function call since the first pair of Apple() and Mango() \n", + "# is generated (and a corresponding ConcreteFunction is traced) but it fails to\n", + "# match the second function call since the first pair of Apple() and Mango()\n", "# have gone out out of scope by then and deleted.\n", "get_mixed_flavor(Apple(), Mango()) # Traces a new concrete function\n", "get_mixed_flavor(Apple(), Mango()) # Traces a new concrete function again\n", "\n", - "# However, we, as the designers of the `Fruit` class, know that each subclass\n", - "# has a fixed flavor and we can reuse an existing traced concrete function if\n", - "# it was the same subclass. Avoiding such unnecessary tracing of concrete\n", - "# functions can have significant performance benefits.\n", + "# However, each subclass of the `Fruit` class has a fixed flavor, and you\n", + "# can reuse an existing traced concrete function if it was the same\n", + "# subclass. Avoiding such unnecessary tracing of concrete functions\n", + "# can have significant performance benefits.\n", "\n", "class FruitTraceType(tf.types.experimental.TraceType):\n", - " def __init__(self, fruit_type):\n", - " self.fruit_type = fruit_type\n", + " def __init__(self, fruit):\n", + " self.fruit_type = type(fruit)\n", + " self.fruit_value = fruit\n", "\n", " def is_subtype_of(self, other):\n", + " # True if self subtypes `other` and `other`'s type matches FruitTraceType.\n", " return (type(other) is FruitTraceType and\n", " self.fruit_type is other.fruit_type)\n", "\n", " def most_specific_common_supertype(self, others):\n", + " # `self` is the specific common supertype if all input types match it.\n", " return self if all(self == other for other in others) else None\n", "\n", + " def placeholder_value(self, placeholder_context=None):\n", + " # Use the fruit itself instead of the type for correct tracing.\n", + " return self.fruit_value\n", + "\n", " def __eq__(self, other):\n", " return type(other) is FruitTraceType and self.fruit_type == other.fruit_type\n", - " \n", + "\n", " def __hash__(self):\n", " return hash(self.fruit_type)\n", "\n", "class FruitWithTraceType:\n", "\n", " def __tf_tracing_type__(self, context):\n", - " return FruitTraceType(type(self))\n", + " return FruitTraceType(self)\n", "\n", "class AppleWithTraceType(FruitWithTraceType):\n", " flavor = tf.constant([1, 2])\n", @@ -597,7 +646,7 @@ "class MangoWithTraceType(FruitWithTraceType):\n", " flavor = tf.constant([3, 4])\n", "\n", - "# Now if we try calling it again:\n", + "# Now if you try calling it again:\n", "get_mixed_flavor(AppleWithTraceType(), MangoWithTraceType()) # Traces a new concrete function\n", "get_mixed_flavor(AppleWithTraceType(), MangoWithTraceType()) # Re-uses the traced concrete function" ] @@ -678,8 +727,7 @@ }, "outputs": [], "source": [ - "print(double_strings.structured_input_signature)\n", - "print(double_strings.structured_outputs)" + "print(double_strings.function_type)" ] }, { @@ -750,7 +798,7 @@ "source": [ "### Obtaining graphs\n", "\n", - "Each concrete function is a callable wrapper around a `tf.Graph`. Although retrieving the actual `tf.Graph` object is not something you'll normally need to do, you can obtain it easily from any concrete function." + "Although retrieving the actual `tf.Graph` object is not something you'll normally need to do, you can obtain it easily from any concrete function." ] }, { @@ -766,6 +814,36 @@ " print(f'{node.input} -> {node.name}')\n" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "2d49c486ccd4" + }, + "source": [ + "In reality, `tf.Graph`s are not directly callable. We actually use an `tf.types.experimental.AtomicFunction` to perform the computations described by the `tf.Graph`. You can access the `AtomicFunction` describing the traced `tf.Graph` and call it directly instead of the `ConcreteFunction`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4c3879aa0be0" + }, + "outputs": [], + "source": [ + "atomic_fn = double_strings.inference_fn\n", + "atomic_fn(tf.constant(\"a\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c3bd1036c18c" + }, + "source": [ + "This has the advantage of having lower Python overhead for high-performance scenarios. But it should only be used for forward inference (no gradient support), and captured tensor values (if any) would need to be explicitly supplied." + ] + }, { "cell_type": "markdown", "metadata": { @@ -822,7 +900,7 @@ "id": "KxwJ8znPI0Cg" }, "source": [ - "If you're curious you can inspect the code autograph generates." + "If you're curious you can inspect the code AutoGraph generates." ] }, { @@ -961,9 +1039,9 @@ "id": "JeD2U-yrbfVb" }, "source": [ - "When wrapping Python/NumPy data in a Dataset, be mindful of `tf.data.Dataset.from_generator` versus ` tf.data.Dataset.from_tensors`. The former will keep the data in Python and fetch it via `tf.py_function` which can have performance implications, whereas the latter will bundle a copy of the data as one large `tf.constant()` node in the graph, which can have memory implications.\n", + "When wrapping Python/NumPy data in a Dataset, be mindful of `tf.data.Dataset.from_generator` versus ` tf.data.Dataset.from_tensor_slices`. The former will keep the data in Python and fetch it via `tf.py_function` which can have performance implications, whereas the latter will bundle a copy of the data as one large `tf.constant()` node in the graph, which can have memory implications.\n", "\n", - "Reading data from files via `TFRecordDataset`, `CsvDataset`, etc. is the most effective way to consume data, as then TensorFlow itself can manage the asynchronous loading and prefetching of data, without having to involve Python. To learn more, see the [`tf.data`: Build TensorFlow input pipelines](../../guide/data) guide." + "Reading data from files via `TFRecordDataset`, `CsvDataset`, etc. is the most effective way to consume data, as then TensorFlow itself can manage the asynchronous loading and prefetching of data, without having to involve Python. To learn more, see the [`tf.data`: Build TensorFlow input pipelines](data.ipynb) guide." ] }, { @@ -1018,7 +1096,7 @@ "source": [ "## Limitations\n", "\n", - "TensorFlow `Function` has a few limitations by design that you should be aware of when converting a Python function to a `Function`." + "`tf.function` has a few limitations by design that you should be aware of when converting a Python function to a `tf.function`." ] }, { @@ -1029,7 +1107,7 @@ "source": [ "### Executing Python side effects\n", "\n", - "Side effects, like printing, appending to lists, and mutating globals, can behave unexpectedly inside a `Function`, sometimes executing twice or not all. They only happen the first time you call a `Function` with a set of inputs. Afterwards, the traced `tf.Graph` is reexecuted, without executing the Python code.\n", + "Side effects, like printing, appending to lists, and mutating globals, can behave unexpectedly inside a `tf.function`, sometimes executing twice or not all. They only happen the first time you call a `tf.function` with a set of inputs. Afterwards, the traced `tf.Graph` is reexecuted, without executing the Python code.\n", "\n", "The general rule of thumb is to avoid relying on Python side effects in your logic and only use them to debug your traces. Otherwise, TensorFlow APIs like `tf.data`, `tf.print`, `tf.summary`, `tf.Variable.assign`, and `tf.TensorArray` are the best way to ensure your code will be executed by the TensorFlow runtime with each call." ] @@ -1058,7 +1136,66 @@ "id": "e1I0dPiqTV8H" }, "source": [ - "If you would like to execute Python code during each invocation of a `Function`, `tf.py_function` is an exit hatch. The drawback of `tf.py_function` is that it's not portable or particularly performant, cannot be saved with SavedModel, and does not work well in distributed (multi-GPU, TPU) setups. Also, since `tf.py_function` has to be wired into the graph, it casts all inputs/outputs to tensors." + "If you would like to execute Python code during each invocation of a `tf.function`, `tf. py_function` is an exit hatch. The drawbacks of `tf.py_function` are that it's not portable or particularly performant, cannot be saved with `SavedModel`, and does not work well in distributed (multi-GPU, TPU) setups. Also, since `tf.py_function` has to be wired into the graph, it casts all inputs/outputs to tensors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZbI7XA_e6yA2" + }, + "outputs": [], + "source": [ + "@tf.py_function(Tout=tf.float32)\n", + "def py_plus(x, y):\n", + " print('Executing eagerly.')\n", + " return x + y\n", + "\n", + "@tf.function\n", + "def tf_wrapper(x, y):\n", + " print('Tracing.')\n", + " return py_plus(x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h5ttN_sI7TdQ" + }, + "source": [ + "The `tf.function` will trace the first time:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mAK4XINl7Ldy" + }, + "outputs": [], + "source": [ + "tf_wrapper(tf.constant(1.0), tf.constant(2.0)).numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Atxvrd_o7dSy" + }, + "source": [ + "But the `tf.py_function` inside executes eagerly every time:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vv7qTiTU7bjy" + }, + "outputs": [], + "source": [ + "tf_wrapper(tf.constant(1.0), tf.constant(2.0)).numpy()" ] }, { @@ -1100,7 +1237,7 @@ "id": "5eZTFRv_k_nR" }, "source": [ - "Sometimes unexpected behaviors are very hard to notice. In the example below, the `counter` is intended to safeguard the increment of a variable. However because it is a python integer and not a TensorFlow object, it's value is captured during the first trace. When the `tf.function` is used, the `assign_add` will be recorded unconditionally in the underlying graph. Therefore `v` will increase by 1, every time the `tf.function` is called. This issue is common among users that try to migrate their Grpah-mode Tensorflow code to Tensorflow 2 using `tf.function` decorators, when python side-effects (the `counter` in the example) are used to determine what ops to run (`assign_add` in the example). Usually, users realize this only after seeing suspicious numerical results, or significantly lower performance than expected (e.g. if the guarded operation is very costly)." + "Sometimes unexpected behaviors are very hard to notice. In the example below, the `counter` is intended to safeguard the increment of a variable. However because it is a python integer and not a TensorFlow object, it's value is captured during the first trace. When the `tf.function` is used, the `assign_add` will be recorded unconditionally in the underlying graph. Therefore `v` will increase by 1, every time the `tf.function` is called. This issue is common among users that try to migrate their Graph-mode Tensorflow code to Tensorflow 2 using `tf.function` decorators, when python side-effects (the `counter` in the example) are used to determine what ops to run (`assign_add` in the example). Usually, users realize this only after seeing suspicious numerical results, or significantly lower performance than expected (e.g. if the guarded operation is very costly)." ] }, { @@ -1173,7 +1310,7 @@ "id": "pbFG5CX4LwQA" }, "source": [ - "In summary, as a rule of thumb, you should avoid mutating python objects such as integers or containers like lists that live outside the `Function`. Instead, use arguments and TF objects. For example, the section [\"Accumulating values in a loop\"](#accumulating_values_in_a_loop) has one example of how list-like operations can be implemented.\n", + "In summary, as a rule of thumb, you should avoid mutating python objects such as integers or containers like lists that live outside the `tf.function`. Instead, use arguments and TF objects. For example, the section [\"Accumulating values in a loop\"](#accumulating_values_in_a_loop) has one example of how list-like operations can be implemented.\n", "\n", "You can, in some cases, capture and manipulate state if it is a [`tf.Variable`](https://www.tensorflow.org/guide/variable). This is how the weights of Keras models are updated with repeated calls to the same `ConcreteFunction`." ] @@ -1367,7 +1504,7 @@ "source": [ "### Recursive tf.functions are not supported\n", "\n", - "Recursive `Function`s are not supported and could cause infinite loops. For example," + "Recursive `tf.function`s are not supported and could cause infinite loops. For example," ] }, { @@ -1395,7 +1532,7 @@ "id": "LyRyooKGUxNV" }, "source": [ - "Even if a recursive `Function` seems to work, the python function will be traced multiple times and could have performance implication. For example," + "Even if a recursive `tf.function` seems to work, the Python function will be traced multiple times and could have performance implications. For example," ] }, { @@ -1425,7 +1562,7 @@ "source": [ "## Known Issues\n", "\n", - "If your `Function` is not evaluating correctly, the error may be explained by these known issues which are planned to be fixed in the future." + "If your `tf.function` is not evaluating correctly, the error may be explained by these known issues which are planned to be fixed in the future." ] }, { @@ -1436,7 +1573,7 @@ "source": [ "### Depending on Python global and free variables\n", "\n", - "`Function` creates a new `ConcreteFunction` when called with a new value of a Python argument. However, it does not do that for the Python closure, globals, or nonlocals of that `Function`. If their value changes in between calls to the `Function`, the `Function` will still use the values they had when it was traced. This is different from how regular Python functions work.\n", + "`tf.function` creates a new `ConcreteFunction` when called with a new value of a Python argument. However, it does not do that for the Python closure, globals, or nonlocals of that `tf.function`. If their value changes in between calls to the `tf.function`, the `tf.function` will still use the values they had when it was traced. This is different from how regular Python functions work.\n", "\n", "For that reason, you should follow a functional programming style that uses arguments instead of closing over outer names." ] @@ -1482,7 +1619,7 @@ "id": "ZoPg5w1Pjqnb" }, "source": [ - "Another way to update a global value, is to make it a `tf.Variable` and use the `Variable.assign` method instead.\n" + "Another way to update a global value is to make it a `tf.Variable` and use the `Variable.assign` method instead.\n" ] }, { @@ -1520,7 +1657,7 @@ "id": "hvwe9gTIWfx6" }, "source": [ - "#### Depending on Python objects" + "### Depending on Python objects" ] }, { @@ -1529,7 +1666,11 @@ "id": "BJkZS-SwPvOQ" }, "source": [ - "The recommendation to pass Python objects as arguments into `tf.function` has a number of known issues, that are expected to be fixed in the future. In general, you can rely on consistent tracing if you use a Python primitive or `tf.nest`-compatible structure as an argument or pass in a *different* instance of an object into a `Function`. However, `Function` will *not* create a new trace when you pass **the same object and only change its attributes**." + "Passing custom Python objects as arguments to `tf.function` is supported but has certain limitations.\n", + "\n", + "For maximum feature coverage, consider transforming the objects into [Extension types](extension_type.ipynb) before passing them to `tf.function`. You can also use Python primitives and `tf.nest`-compatible structures.\n", + "\n", + "However, as covered in the [rules of tracing](#rules_of_tracing), when a custom `TraceType` is not provided by the custom Python class, `tf.function` is forced to use instance-based equality which means it will **not create a new trace** when you pass the **same object with modified attributes**." ] }, { @@ -1574,11 +1715,11 @@ "id": "Ytcgg2qFWaBF" }, "source": [ - "Using the same `Function` to evaluate the updated instance of the model will be buggy since the updated model has the [same cache key](#rules_of_tracing) as the original model.\n", + "Using the same `tf.function` to evaluate the modified instance of the model will be buggy since it still has the [same instance-based TraceType](#rules_of_tracing) as the original model.\n", "\n", - "For that reason, you're recommended to write your `Function` to avoid depending on mutable object attributes or create new objects.\n", + "For that reason, you're recommended to write your `tf.function` to avoid depending on mutable object attributes or implement the [Tracing Protocol](#use_the_tracing_protocol) for the objects to inform `tf.function` about such attributes.\n", "\n", - "If that is not possible, one workaround is to make new `Function`s each time you modify your object to force retracing:" + "If that is not possible, one workaround is to make new `tf.function`s each time you modify your object to force retracing:" ] }, { @@ -1594,8 +1735,8 @@ "\n", "new_model = SimpleModel()\n", "evaluate_no_bias = tf.function(evaluate).get_concrete_function(new_model, x)\n", - "# Don't pass in `new_model`, `Function` already captured its state during tracing.\n", - "print(evaluate_no_bias(x)) " + "# Don't pass in `new_model`. `tf.function` already captured its state during tracing.\n", + "print(evaluate_no_bias(x))" ] }, { @@ -1608,7 +1749,7 @@ "source": [ "print(\"Adding bias!\")\n", "new_model.bias += 5.0\n", - "# Create new Function and ConcreteFunction since you modified new_model.\n", + "# Create new `tf.function` and `ConcreteFunction` since you modified `new_model`.\n", "evaluate_with_bias = tf.function(evaluate).get_concrete_function(new_model, x)\n", "print(evaluate_with_bias(x)) # Don't pass in `new_model`." ] @@ -1665,7 +1806,7 @@ "source": [ "### Creating tf.Variables\n", "\n", - "`Function` only supports singleton `tf.Variable`s created once on the first call, and reused across subsequent function calls. The code snippet below would create a new `tf.Variable` in every function call, which results in a `ValueError` exception.\n", + "`tf.function` only supports singleton `tf.Variable`s created once on the first call, and reused across subsequent function calls. The code snippet below would create a new `tf.Variable` in every function call, which results in a `ValueError` exception.\n", "\n", "Example:" ] @@ -1726,7 +1867,7 @@ }, "source": [ "#### Using with multiple Keras optimizers\n", - "You may encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when using more than one Keras optimizer with a `tf.function`. This error occurs because optimizers internally create `tf.Variables` when they apply gradients for the first time." + "You may encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when using more than one Keras optimizer with a `tf.function`. This error occurs because optimizers internally create `tf.Variable`s when they apply gradients for the first time." ] }, { @@ -1739,7 +1880,7 @@ "source": [ "opt1 = tf.keras.optimizers.Adam(learning_rate = 1e-2)\n", "opt2 = tf.keras.optimizers.Adam(learning_rate = 1e-3)\n", - " \n", + "\n", "@tf.function\n", "def train_step(w, x, y, optimizer):\n", " with tf.GradientTape() as tape:\n", @@ -1763,7 +1904,46 @@ "id": "7Q8BRPCThTjB" }, "source": [ - "If you need to change the optimizer during training, a workaround is to create a new `Function` for each optimizer, calling the [`ConcreteFunction`](#obtaining_concrete_functions) directly." + "If you need to change a stateful object between calls, it's simplest to define a `tf.Module` subclass, and create instances to hold those objects:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3P59ocmIslHz" + }, + "outputs": [], + "source": [ + "class TrainStep(tf.Module):\n", + " def __init__(self, optimizer):\n", + " self.optimizer = optimizer\n", + "\n", + " @tf.function\n", + " def __call__(self, w, x, y):\n", + " with tf.GradientTape() as tape:\n", + " L = tf.reduce_sum(tf.square(w*x - y))\n", + " gradients = tape.gradient(L, [w])\n", + " self.optimizer.apply_gradients(zip(gradients, [w]))\n", + "\n", + "\n", + "opt1 = tf.keras.optimizers.Adam(learning_rate = 1e-2)\n", + "opt2 = tf.keras.optimizers.Adam(learning_rate = 1e-3)\n", + "\n", + "train_o1 = TrainStep(opt1)\n", + "train_o2 = TrainStep(opt2)\n", + "\n", + "train_o1(w, x, y)\n", + "train_o2(w, x, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dUHUi881smHF" + }, + "source": [ + "You could also do this manually by creating multiple instances of the `@tf.function` wrapper, one for each optimizer:" ] }, { @@ -1788,14 +1968,14 @@ "x = tf.constant([-1.])\n", "y = tf.constant([2.])\n", "\n", - "# Make a new Function and ConcreteFunction for each optimizer.\n", - "train_step_1 = tf.function(train_step).get_concrete_function(w, x, y, opt1)\n", - "train_step_2 = tf.function(train_step).get_concrete_function(w, x, y, opt2)\n", + "# Make a new tf.function and ConcreteFunction for each optimizer.\n", + "train_step_1 = tf.function(train_step)\n", + "train_step_2 = tf.function(train_step)\n", "for i in range(10):\n", " if i % 2 == 0:\n", - " train_step_1(w, x, y) # `opt1` is not used as a parameter. \n", + " train_step_1(w, x, y, opt1)\n", " else:\n", - " train_step_2(w, x, y) # `opt2` is not used as a parameter." + " train_step_2(w, x, y, opt2)" ] }, { @@ -1806,9 +1986,9 @@ "source": [ "#### Using with multiple Keras models\n", "\n", - "You may also encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when passing different model instances to the same `Function`.\n", + "You may also encounter `ValueError: tf.function only supports singleton tf.Variables created on the first call.` when passing different model instances to the same `tf.function`.\n", "\n", - "This error occurs because Keras models (which [do not have their input shape defined](https://www.tensorflow.org/guide/keras/custom_layers_and_models#best_practice_deferring_weight_creation_until_the_shape_of_the_inputs_is_known)) and Keras layers create `tf.Variables`s when they are first called. You may be attempting to initialize those variables inside a `Function`, which has already been called. To avoid this error, try calling `model.build(input_shape)` to initialize all the weights before training the model.\n" + "This error occurs because Keras models (which [do not have their input shape defined](https://www.tensorflow.org/guide/keras/custom_layers_and_models#best_practice_deferring_weight_creation_until_the_shape_of_the_inputs_is_known)) and Keras layers create `tf.Variable`s when they are first called. You may be attempting to initialize those variables inside a `tf.function`, which has already been called. To avoid this error, try calling `model.build(input_shape)` to initialize all the weights before training the model.\n" ] }, { @@ -1819,14 +1999,15 @@ "source": [ "## Further reading\n", "\n", - "To learn about how to export and load a `Function`, see the [SavedModel guide](../../guide/saved_model). To learn more about graph optimizations that are performed after tracing, see the [Grappler guide](../../guide/graph_optimization). To learn how to optimize your data pipeline and profile your model, see the [Profiler guide](../../guide/profiler.md)." + "To learn about how to export and load a `tf.function`, see the [SavedModel guide](../../guide/saved_model). To learn more about graph optimizations that are performed after tracing, see the [Grappler guide](../../guide/graph_optimization). To learn how to optimize your data pipeline and profile your model, see the [Profiler guide](../../guide/profiler.md)." ] } ], "metadata": { "colab": { - "collapsed_sections": [], "name": "function.ipynb", + "private_outputs": true, + "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/gpu_performance_analysis.md b/site/en/guide/gpu_performance_analysis.md index ecbb03ad3ad..2832686d8f1 100644 --- a/site/en/guide/gpu_performance_analysis.md +++ b/site/en/guide/gpu_performance_analysis.md @@ -169,8 +169,8 @@ the trace viewer, you should look at the model code between steps and check if disabling callbacks/metrics improves performance. Some details of these ops are also on the trace viewer (both device and host side).The recommendation in this scenario is to amortize the overhead of these ops by executing them after a -fixed number of steps instead of every step. When using the `compile` method in -the `tf.keras` API, setting the `experimental_steps_per_execution` flag does +fixed number of steps instead of every step. When using the `Model.compile` method in +the `tf.keras` API, setting the `steps_per_execution` flag does this automatically. For custom training loops, use `tf.while_loop`. #### 2. Achieve higher device utilization diff --git a/site/en/guide/graph_optimization.ipynb b/site/en/guide/graph_optimization.ipynb index 50eedda621c..063d8817489 100644 --- a/site/en/guide/graph_optimization.ipynb +++ b/site/en/guide/graph_optimization.ipynb @@ -90,7 +90,7 @@ "* *Constant folding optimizer -* Statically infers the value of tensors when possible by folding constant nodes in the graph and materializes the result using constants.\n", "* *Arithmetic optimizer -* Simplifies arithmetic operations by eliminating common subexpressions and simplifying arithmetic statements. \n", "* *Layout optimizer -* Optimizes tensor layouts to execute data format dependent operations such as convolutions more efficiently.\n", - "* *Remapper optimizer -* Remaps subgraphs onto more efficient implementations by replacing commonly occuring subgraphs with optimized fused monolithic kernels.\n", + "* *Remapper optimizer -* Remaps subgraphs onto more efficient implementations by replacing commonly occurring subgraphs with optimized fused monolithic kernels.\n", "* *Memory optimizer -* Analyzes the graph to inspect the peak memory usage for each operation and inserts CPU-GPU memory copy operations for swapping GPU memory to CPU to reduce the peak memory usage.\n", "* *Dependency optimizer -* Removes or rearranges control dependencies to shorten the critical path for a model step or enables other\n", "optimizations. Also removes nodes that are effectively no-ops such as Identity.\n", @@ -101,7 +101,7 @@ "* *Loop optimizer -* Optimizes the graph control flow by hoisting loop-invariant subgraphs out of loops and by removing redundant stack operations in loops. Also optimizes loops with statically known trip counts and removes statically known dead branches in conditionals.\n", "* *Scoped allocator optimizer -* Introduces scoped allocators to reduce data movement and to consolidate some operations.\n", "* *Pin to host optimizer -* Swaps small operations onto the CPU. This optimizer is turned OFF by default. \n", - "* *Auto mixed precision optimizer -* Converts data types to float16 where applicable to improve performance. Currently applies only to GPUs.\n", + "* *Auto mixed precision optimizer -* Converts data types to float16 where applicable to improve performance. Currently applies to GPUs and the latest Intel Xeon CPUs.\n", "* *Debug stripper -* Strips nodes related to debugging operations such as `tf.debugging.Assert`, `tf.debugging.check_numerics`, and `tf.print` from the graph. This optimizer is turned OFF by default." ] }, @@ -166,7 +166,7 @@ "source": [ "## Compare execution performance with and without Grappler\n", "\n", - "TensorFlow 2 and beyond executes [eagerly](../eager.md) by default. Use `tf.function` to switch the default execution to Graph mode. Grappler runs automatically in the background to apply the graph optimizations above and improve execution performance. \n" + "TensorFlow 2 and beyond executes eagerly by default. Use `tf.function` to switch the default execution to Graph mode. Grappler runs automatically in the background to apply the graph optimizations above and improve execution performance. \n" ] }, { diff --git a/site/en/guide/images/new_type_promotion/type_promotion_lattice.png b/site/en/guide/images/new_type_promotion/type_promotion_lattice.png new file mode 100644 index 00000000000..501698965a2 Binary files /dev/null and b/site/en/guide/images/new_type_promotion/type_promotion_lattice.png differ diff --git a/site/en/guide/images/new_type_promotion/type_promotion_table.png b/site/en/guide/images/new_type_promotion/type_promotion_table.png new file mode 100644 index 00000000000..62bb465212a Binary files /dev/null and b/site/en/guide/images/new_type_promotion/type_promotion_table.png differ diff --git a/site/en/guide/images/tensor/shape2.png b/site/en/guide/images/tensor/shape2.png index 3609ff2c263..a316359c8fc 100644 Binary files a/site/en/guide/images/tensor/shape2.png and b/site/en/guide/images/tensor/shape2.png differ diff --git a/site/en/guide/intro_to_graphs.ipynb b/site/en/guide/intro_to_graphs.ipynb index 19b5c5f432e..4fe442632ba 100644 --- a/site/en/guide/intro_to_graphs.ipynb +++ b/site/en/guide/intro_to_graphs.ipynb @@ -76,7 +76,7 @@ "\n", "Note: For those of you who are only familiar with TensorFlow 1.x, this guide demonstrates a very different view of graphs.\n", "\n", - "**This is a big-picture overview that covers how `tf.function` allows you to switch from eager execution to graph execution.** For a more complete specification of `tf.function`, go to the [`tf.function` guide](function.ipynb).\n" + "**This is a big-picture overview that covers how `tf.function` allows you to switch from eager execution to graph execution.** For a more complete specification of `tf.function`, go to the [Better performance with `tf.function`](./function.ipynb) guide.\n" ] }, { @@ -87,13 +87,13 @@ "source": [ "### What are graphs?\n", "\n", - "In the previous three guides, you ran TensorFlow **eagerly**. This means TensorFlow operations are executed by Python, operation by operation, and returning results back to Python.\n", + "In the previous three guides, you ran TensorFlow **eagerly**. This means TensorFlow operations are executed by Python, operation by operation, and return results back to Python.\n", "\n", "While eager execution has several unique advantages, graph execution enables portability outside Python and tends to offer better performance. **Graph execution** means that tensor computations are executed as a *TensorFlow graph*, sometimes referred to as a `tf.Graph` or simply a \"graph.\"\n", "\n", "**Graphs are data structures that contain a set of `tf.Operation` objects, which represent units of computation; and `tf.Tensor` objects, which represent the units of data that flow between operations.** They are defined in a `tf.Graph` context. Since these graphs are data structures, they can be saved, run, and restored all without the original Python code.\n", "\n", - "This is what a TensorFlow graph representing a two-layer neural network looks like when visualized in TensorBoard.\n" + "This is what a TensorFlow graph representing a two-layer neural network looks like when visualized in TensorBoard:" ] }, { @@ -113,7 +113,7 @@ "source": [ "### The benefits of graphs\n", "\n", - "With a graph, you have a great deal of flexibility. You can use your TensorFlow graph in environments that don't have a Python interpreter, like mobile applications, embedded devices, and backend servers. TensorFlow uses graphs as the format for [saved models](saved_model) when it exports them from Python.\n", + "With a graph, you have a great deal of flexibility. You can use your TensorFlow graph in environments that don't have a Python interpreter, like mobile applications, embedded devices, and backend servers. TensorFlow uses graphs as the format for [saved models](./saved_model.ipynb) when it exports them from Python.\n", "\n", "Graphs are also easily optimized, allowing the compiler to do transformations like:\n", "\n", @@ -144,6 +144,15 @@ "## Setup" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "0d1689fa928f" + }, + "source": [ + "Import some necessary libraries:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -165,7 +174,7 @@ "source": [ "## Taking advantage of graphs\n", "\n", - "You create and run a graph in TensorFlow by using `tf.function`, either as a direct call or as a decorator. `tf.function` takes a regular function as input and returns a `Function`. **A `Function` is a Python callable that builds TensorFlow graphs from the Python function. You use a `Function` in the same way as its Python equivalent.**\n" + "You create and run a graph in TensorFlow by using `tf.function`, either as a direct call or as a decorator. `tf.function` takes a regular function as input and returns a `tf.types.experimental.PolymorphicFunction`. **A `PolymorphicFunction` is a Python callable that builds TensorFlow graphs from the Python function. You use a `tf.function` in the same way as its Python equivalent.**\n" ] }, { @@ -182,7 +191,8 @@ " x = x + b\n", " return x\n", "\n", - "# `a_function_that_uses_a_graph` is a TensorFlow `Function`.\n", + "# The Python type of `a_function_that_uses_a_graph` will now be a\n", + "# `PolymorphicFunction`.\n", "a_function_that_uses_a_graph = tf.function(a_regular_function)\n", "\n", "# Make some tensors.\n", @@ -191,7 +201,7 @@ "b1 = tf.constant(4.0)\n", "\n", "orig_value = a_regular_function(x1, y1, b1).numpy()\n", - "# Call a `Function` like a Python function.\n", + "# Call a `tf.function` like a Python function.\n", "tf_function_value = a_function_that_uses_a_graph(x1, y1, b1).numpy()\n", "assert(orig_value == tf_function_value)" ] @@ -202,7 +212,7 @@ "id": "PNvuAYpdrTOf" }, "source": [ - "On the outside, a `Function` looks like a regular function you write using TensorFlow operations. [Underneath](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/eager/def_function.py), however, it is *very different*. A `Function` **encapsulates [several `tf.Graph`s behind one API](#polymorphism_one_function_many_graphs).** That is how `Function` is able to give you the [benefits of graph execution](#the_benefits_of_graphs), like speed and deployability." + "On the outside, a `tf.function` looks like a regular function you write using TensorFlow operations. [Underneath](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/eager/polymorphic_function/polymorphic_function.py), however, it is *very different*. The underlying `PolymorphicFunction` **encapsulates several `tf.Graph`s behind one API** (learn more in the _Polymorphism_ section). That is how a `tf.function` is able to give you the benefits of graph execution, like speed and deployability (refer to _The benefits of graphs_ above)." ] }, { @@ -227,7 +237,8 @@ " x = x + b\n", " return x\n", "\n", - "# Use the decorator to make `outer_function` a `Function`.\n", + "# Using the `tf.function` decorator makes `outer_function` into a\n", + "# `PolymorphicFunction`.\n", "@tf.function\n", "def outer_function(x):\n", " y = tf.constant([[2.0], [3.0]])\n", @@ -274,7 +285,8 @@ " else:\n", " return 0\n", "\n", - "# `tf_simple_relu` is a TensorFlow `Function` that wraps `simple_relu`.\n", + "# Using `tf.function` makes `tf_simple_relu` a `PolymorphicFunction` that wraps\n", + "# `simple_relu`.\n", "tf_simple_relu = tf.function(simple_relu)\n", "\n", "print(\"First branch, with graph:\", tf_simple_relu(tf.constant(1)).numpy())\n", @@ -320,7 +332,7 @@ "id": "GZ4Ieg6tBE6l" }, "source": [ - "Most of the time, `tf.function` will work without special considerations. However, there are some caveats, and the [tf.function guide](./function.ipynb) can help here, as well as the [complete AutoGraph reference](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/index.md)" + "Most of the time, `tf.function` will work without special considerations. However, there are some caveats, and the [`tf.function` guide](./function.ipynb) can help here, as well as the [complete AutoGraph reference](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/index.md)." ] }, { @@ -329,13 +341,13 @@ "id": "sIpc_jfjEZEg" }, "source": [ - "### Polymorphism: one `Function`, many graphs\n", + "### Polymorphism: one `tf.function`, many graphs\n", "\n", - "A `tf.Graph` is specialized to a specific type of inputs (for example, tensors with a specific [`dtype`](https://www.tensorflow.org/api_docs/python/tf/dtypes/DType) or objects with the same [`id()`](https://docs.python.org/3/library/functions.html#id])).\n", + "A `tf.Graph` is specialized to a specific type of inputs (for example, tensors with a specific [`dtype`](https://www.tensorflow.org/api_docs/python/tf/dtypes/DType) or objects with the same [`id()`](https://docs.python.org/3/library/functions.html#id)).\n", "\n", - "Each time you invoke a `Function` with a set of arguments that can't be handled by any of its existing graphs (such as arguments with new `dtypes` or incompatible shapes), `Function` creates a new `tf.Graph` specialized to those new arguments. The type specification of a `tf.Graph`'s inputs is known as its **input signature** or just a **signature**. For more information regarding when a new `tf.Graph` is generated and how that can be controlled, see the [rules of retracing](https://www.tensorflow.org/guide/function#rules_of_tracing).\n", + "Each time you invoke a `tf.function` with a set of arguments that can't be handled by any of its existing graphs (such as arguments with new `dtypes` or incompatible shapes), it creates a new `tf.Graph` specialized to those new arguments. The type specification of a `tf.Graph`'s inputs is represented by `tf.types.experimental.FunctionType`, also referred to as the **signature**. For more information regarding when a new `tf.Graph` is generated, how that can be controlled, and how `FunctionType` can be useful, go to the _Rules of tracing_ section of the [Better performance with `tf.function`](./function.ipynb) guide.\n", "\n", - "The `Function` stores the `tf.Graph` corresponding to that signature in a `ConcreteFunction`. **A `ConcreteFunction` is a wrapper around a `tf.Graph`.**\n" + "The `tf.function` stores the `tf.Graph` corresponding to that signature in a `ConcreteFunction`. **A `ConcreteFunction` can be thought of as a wrapper around a `tf.Graph`.**\n" ] }, { @@ -350,7 +362,7 @@ "def my_relu(x):\n", " return tf.maximum(0., x)\n", "\n", - "# `my_relu` creates new graphs as it observes more signatures.\n", + "# `my_relu` creates new graphs as it observes different input types.\n", "print(my_relu(tf.constant(5.5)))\n", "print(my_relu([1, -1]))\n", "print(my_relu(tf.constant([3., -3.])))" @@ -362,7 +374,7 @@ "id": "1qRtw7R4KL9X" }, "source": [ - "If the `Function` has already been called with that signature, `Function` does not create a new `tf.Graph`." + "If the `tf.function` has already been called with the same input types, it does not create a new `tf.Graph`." ] }, { @@ -374,8 +386,8 @@ "outputs": [], "source": [ "# These two calls do *not* create new graphs.\n", - "print(my_relu(tf.constant(-2.5))) # Signature matches `tf.constant(5.5)`.\n", - "print(my_relu(tf.constant([-1., 1.]))) # Signature matches `tf.constant([3., -3.])`." + "print(my_relu(tf.constant(-2.5))) # Input type matches `tf.constant(5.5)`.\n", + "print(my_relu(tf.constant([-1., 1.]))) # Input type matches `tf.constant([3., -3.])`." ] }, { @@ -384,7 +396,7 @@ "id": "UohRmexhIpvQ" }, "source": [ - "Because it's backed by multiple graphs, a `Function` is **polymorphic**. That enables it to support more input types than a single `tf.Graph` could represent, as well as to optimize each `tf.Graph` for better performance." + "Because it's backed by multiple graphs, a `tf.function` is (as the name \"PolymorphicFunction\" suggests) **polymorphic**. That enables it to support more input types than a single `tf.Graph` could represent, and to optimize each `tf.Graph` for better performance." ] }, { @@ -419,7 +431,7 @@ "source": [ "### Graph execution vs. eager execution\n", "\n", - "The code in a `Function` can be executed both eagerly and as a graph. By default, `Function` executes its code as a graph:\n" + "The code in a `tf.function` can be executed both eagerly and as a graph. By default, `tf.function` executes its code as a graph:\n" ] }, { @@ -467,7 +479,7 @@ "id": "cyZNCRcQorGO" }, "source": [ - "To verify that your `Function`'s graph is doing the same computation as its equivalent Python function, you can make it execute eagerly with `tf.config.run_functions_eagerly(True)`. This is a switch that **turns off `Function`'s ability to create and run graphs**, instead executing the code normally." + "To verify that your `tf.function`'s graph is doing the same computation as its equivalent Python function, you can make it execute eagerly with `tf.config.run_functions_eagerly(True)`. This is a switch that **turns off `tf.function`'s ability to create and run graphs**, instead of executing the code normally." ] }, { @@ -510,7 +522,7 @@ "id": "DKT3YBsqy0x4" }, "source": [ - "However, `Function` can behave differently under graph and eager execution. The Python [`print`](https://docs.python.org/3/library/functions.html#print) function is one example of how these two modes differ. Let's check out what happens when you insert a `print` statement to your function and call it repeatedly.\n" + "However, `tf.function` can behave differently under graph and eager execution. The Python [`print`](https://docs.python.org/3/library/functions.html#print) function is one example of how these two modes differ. Let's check out what happens when you insert a `print` statement to your function and call it repeatedly." ] }, { @@ -558,7 +570,7 @@ "source": [ "Is the output surprising? **`get_MSE` only printed once even though it was called *three* times.**\n", "\n", - "To explain, the `print` statement is executed when `Function` runs the original code in order to create the graph in a process known as [\"tracing\"](function.ipynb#tracing). **Tracing captures the TensorFlow operations into a graph, and `print` is not captured in the graph.** That graph is then executed for all three calls **without ever running the Python code again**.\n", + "To explain, the `print` statement is executed when `tf.function` runs the original code in order to create the graph in a process known as \"tracing\" (refer to the _Tracing_ section of the [`tf.function` guide](./function.ipynb). **Tracing captures the TensorFlow operations into a graph, and `print` is not captured in the graph.** That graph is then executed for all three calls **without ever running the Python code again**.\n", "\n", "As a sanity check, let's turn off graph execution to compare:" ] @@ -606,7 +618,7 @@ "id": "PUR7qC_bquCn" }, "source": [ - "`print` is a *Python side effect*, and there are other differences that you should be aware of when converting a function into a `Function`. Learn more in the _Limitations_ section of the [Better performance with tf.function](./function.ipynb#limitations) guide." + "`print` is a *Python side effect*, and there are other differences that you should be aware of when converting a function into a `tf.function`. Learn more in the _Limitations_ section of the [Better performance with `tf.function`](./function.ipynb) guide." ] }, { @@ -628,7 +640,7 @@ "\n", "\n", "\n", - "Graph execution only executes the operations necessary to produce the observable effects, which includes:\n", + "Graph execution only executes the operations necessary to produce the observable effects, which include:\n", "\n", "- The return value of the function\n", "- Documented well-known side-effects such as:\n", @@ -676,7 +688,7 @@ " tf.gather(x, [1]) # unused\n", " return x\n", "\n", - "# Only needed operations are run during graph exection. The error is not raised.\n", + "# Only needed operations are run during graph execution. The error is not raised.\n", "print(unused_return_graph(tf.constant([0.0])))" ] }, @@ -686,16 +698,16 @@ "id": "def6MupG9R0O" }, "source": [ - "###`tf.function` best practices\n", + "### `tf.function` best practices\n", "\n", - "It may take some time to get used to the behavior of `Function`. To get started quickly, first-time users should play around with decorating toy functions with `@tf.function` to get experience with going from eager to graph execution.\n", + "It may take some time to get used to the behavior of `tf.function`. To get started quickly, first-time users should play around with decorating toy functions with `@tf.function` to get experience with going from eager to graph execution.\n", "\n", "*Designing for `tf.function`* may be your best bet for writing graph-compatible TensorFlow programs. Here are some tips:\n", "- Toggle between eager and graph execution early and often with `tf.config.run_functions_eagerly` to pinpoint if/ when the two modes diverge.\n", "- Create `tf.Variable`s\n", - "outside the Python function and modify them on the inside. The same goes for objects that use `tf.Variable`, like `keras.layers`, `keras.Model`s and `tf.optimizers`.\n", - "- Avoid writing functions that [depend on outer Python variables](function#depending_on_python_global_and_free_variables), excluding `tf.Variable`s and Keras objects.\n", - "- Prefer to write functions which take tensors and other TensorFlow types as input. You can pass in other object types but [be careful](function#depending_on_python_objects)!\n", + "outside the Python function and modify them on the inside. The same goes for objects that use `tf.Variable`, like `tf.keras.layers`, `tf.keras.Model`s and `tf.keras.optimizers`.\n", + "- Avoid writing functions that depend on outer Python variables, excluding `tf.Variable`s and Keras objects. Learn more in _Depending on Python global and free variables_ of the [`tf.function` guide](./function.ipynb).\n", + "- Prefer to write functions which take tensors and other TensorFlow types as input. You can pass in other object types but be careful! Learn more in _Depending on Python objects_ of the [`tf.function` guide](./function.ipynb).\n", "- Include as much computation as possible under a `tf.function` to maximize the performance gain. For example, decorate a whole training step or the entire training loop.\n" ] }, @@ -742,7 +754,7 @@ }, "outputs": [], "source": [ - "print(\"Eager execution:\", timeit.timeit(lambda: power(x, 100), number=1000))" + "print(\"Eager execution:\", timeit.timeit(lambda: power(x, 100), number=1000), \"seconds\")" ] }, { @@ -754,7 +766,7 @@ "outputs": [], "source": [ "power_as_graph = tf.function(power)\n", - "print(\"Graph execution:\", timeit.timeit(lambda: power_as_graph(x, 100), number=1000))" + "print(\"Graph execution:\", timeit.timeit(lambda: power_as_graph(x, 100), number=1000), \"seconds\")" ] }, { @@ -763,9 +775,9 @@ "id": "Q1Pfo5YwwILi" }, "source": [ - "`tf.function` is commonly used to speed up training loops, and you can learn more about it in [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch#speeding-up_your_training_step_with_tffunction) with Keras.\n", + "`tf.function` is commonly used to speed up training loops, and you can learn more about it in the _Speeding-up your training step with `tf.function`_ section of the [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) with Keras guide.\n", "\n", - "Note: You can also try [`tf.function(jit_compile=True)`](https://www.tensorflow.org/xla#explicit_compilation_with_tffunctionjit_compiletrue) for a more significant performance boost, especially if your code is heavy on TF control flow and uses many small tensors." + "Note: You can also try `tf.function(jit_compile=True)` for a more significant performance boost, especially if your code is heavy on TensorFlow control flow and uses many small tensors. Learn more in the _Explicit compilation with `tf.function(jit_compile=True)`_ section of the [XLA overview](https://www.tensorflow.org/xla)." ] }, { @@ -778,7 +790,7 @@ "\n", "Graphs can speed up your code, but the process of creating them has some overhead. For some functions, the creation of the graph takes more time than the execution of the graph. **This investment is usually quickly paid back with the performance boost of subsequent executions, but it's important to be aware that the first few steps of any large model training can be slower due to tracing.**\n", "\n", - "No matter how large your model, you want to avoid tracing frequently. The `tf.function` guide discusses [how to set input specifications and use tensor arguments](function#controlling_retracing) to avoid retracing. If you find you are getting unusually poor performance, it's a good idea to check if you are retracing accidentally." + "No matter how large your model, you want to avoid tracing frequently. In the _Controlling retracing_ section, the [`tf.function` guide](./function.ipynb) discusses how to set input specifications and use tensor arguments to avoid retracing. If you find you are getting unusually poor performance, it's a good idea to check if you are retracing accidentally." ] }, { @@ -787,9 +799,9 @@ "id": "F4InDaTjwmBA" }, "source": [ - "## When is a `Function` tracing?\n", + "## When is a `tf.function` tracing?\n", "\n", - "To figure out when your `Function` is tracing, add a `print` statement to its code. As a rule of thumb, `Function` will execute the `print` statement every time it traces." + "To figure out when your `tf.function` is tracing, add a `print` statement to its code. As a rule of thumb, `tf.function` will execute the `print` statement every time it traces." ] }, { @@ -843,13 +855,12 @@ "source": [ "## Next steps\n", "\n", - "You can learn more about `tf.function` on the API reference page and by following the [Better performance with `tf.function`](function.ipynb) guide." + "You can learn more about `tf.function` on the API reference page and by following the [Better performance with `tf.function`](./function.ipynb) guide." ] } ], "metadata": { "colab": { - "collapsed_sections": [], "name": "intro_to_graphs.ipynb", "toc_visible": true }, diff --git a/site/en/guide/intro_to_modules.ipynb b/site/en/guide/intro_to_modules.ipynb index 7393e1544dc..79bbe89ca56 100644 --- a/site/en/guide/intro_to_modules.ipynb +++ b/site/en/guide/intro_to_modules.ipynb @@ -91,6 +91,7 @@ "outputs": [], "source": [ "import tensorflow as tf\n", + "import keras\n", "from datetime import datetime\n", "\n", "%load_ext tensorboard" @@ -102,10 +103,12 @@ "id": "yt5HEbsYAbw1" }, "source": [ - "## Defining models and layers in TensorFlow\n", + "## TensorFlow Modules\n", "\n", "Most models are made of layers. Layers are functions with a known mathematical structure that can be reused and have trainable variables. In TensorFlow, most high-level implementations of layers and models, such as Keras or [Sonnet](https://github.com/deepmind/sonnet), are built on the same foundational class: `tf.Module`.\n", "\n", + "### Building Modules\n", + "\n", "Here's an example of a very simple `tf.Module` that operates on a scalar tensor:\n" ] }, @@ -337,7 +340,7 @@ "id": "JOLVVBT8J_dl" }, "source": [ - "## Saving weights\n", + "### Saving weights\n", "\n", "You can save a `tf.Module` as both a [checkpoint](./checkpoint.ipynb) and a [SavedModel](./saved_model.ipynb).\n", "\n", @@ -403,7 +406,7 @@ "id": "4eGaNiQWcK4j" }, "source": [ - "During distributed (multi-machine) training they can be sharded, which is why they are numbered (e.g., '00000-of-00001'). In this case, though, there is only have one shard.\n", + "During distributed (multi-machine) training they can be sharded, which is why they are numbered (e.g., '00000-of-00001'). In this case, though, there is only one shard.\n", "\n", "When you load models back in, you overwrite the values in your Python object." ] @@ -439,7 +442,7 @@ "id": "pSZebVuWxDXu" }, "source": [ - "## Saving functions\n", + "### Saving functions\n", "\n", "TensorFlow can run models without the original Python objects, as demonstrated by [TensorFlow Serving](https://tensorflow.org/tfx) and [TensorFlow Lite](https://tensorflow.org/lite), even when you download a trained model from [TensorFlow Hub](https://tensorflow.org/hub).\n", "\n", @@ -699,6 +702,25 @@ "In this section, you will examine how Keras uses `tf.Module`. A complete user guide to Keras models can be found in the [Keras guide](https://www.tensorflow.org/guide/keras/sequential_model).\n" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "ds08u3touwe4t" + }, + "source": [ + "Keras layers and models have a lot more extra features including:\n", + "\n", + "* Optional losses\n", + "* Support for [metrics](https://keras.io/api/layers/base_layer/#add_metric-method)\n", + "* Built-in support for an optional `training` argument to differentiate between training and inference use\n", + "* Saving and restoring python objects instead of just black-box functions\n", + "* `get_config` and `from_config` methods that allow you to accurately store configurations to allow model cloning in Python\n", + "\n", + "These features allow for far more complex models through subclassing, such as a custom GAN or a Variational AutoEncoder (VAE) model. Read about them in the [full guide](./keras/custom_layers_and_models.ipynb) to custom layers and models.\n", + "\n", + "Keras models also come with extra functionality that makes them easy to train, evaluate, load, save, and even train on multiple machines." + ] + }, { "cell_type": "markdown", "metadata": { @@ -874,22 +896,6 @@ " print(\"Failed:\", e)" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "YnporXiudF1I" - }, - "source": [ - "Keras layers have a lot more extra features including:\n", - "\n", - "* Optional losses\n", - "* Support for metrics\n", - "* Built-in support for an optional `training` argument to differentiate between training and inference use\n", - "* `get_config` and `from_config` methods that allow you to accurately store configurations to allow model cloning in Python\n", - "\n", - "Read about them in the [full guide](./keras/custom_layers_and_models.ipynb) to custom layers and models." - ] - }, { "cell_type": "markdown", "metadata": { @@ -900,7 +906,7 @@ "\n", "You can define your model as nested Keras layers.\n", "\n", - "However, Keras also provides a full-featured model class called `tf.keras.Model`. It inherits from `tf.keras.layers.Layer`, so a Keras model can be used, nested, and saved in the same way as Keras layers. Keras models come with extra functionality that makes them easy to train, evaluate, load, save, and even train on multiple machines.\n", + "However, Keras also provides a full-featured model class called `tf.keras.Model`. It inherits from `tf.keras.layers.Layer`, so a Keras model can be used and nested in the same way as Keras layers. Keras models come with extra functionality that makes them easy to train, evaluate, load, save, and even train on multiple machines.\n", "\n", "You can define the `SequentialModule` from above with nearly identical code, again converting `__call__` to `call()` and changing the parent:" ] @@ -913,6 +919,7 @@ }, "outputs": [], "source": [ + "@keras.saving.register_keras_serializable()\n", "class MySequentialModel(tf.keras.Model):\n", " def __init__(self, name=None, **kwargs):\n", " super().__init__(**kwargs)\n", @@ -938,7 +945,7 @@ "source": [ "All the same features are available, including tracking variables and submodules.\n", "\n", - "Note: To emphasize the note above, a raw `tf.Module` nested inside a Keras layer or model will not get its variables collected for training or saving. Instead, nest Keras layers inside of Keras layers." + "Note: A raw `tf.Module` nested inside a Keras layer or model will not get its variables collected for training or saving. Instead, nest Keras layers inside of Keras layers." ] }, { @@ -1022,11 +1029,9 @@ "id": "qI9aXLnaHEFF" }, "source": [ - "## Saving Keras models\n", - "\n", - "Keras models can be checkpointed, and that will look the same as `tf.Module`.\n", + "### Saving Keras models\n", "\n", - "Keras models can also be saved with `tf.saved_model.save()`, as they are modules. However, Keras models have convenience methods and other functionality:" + "Keras models have their own specialized zip archive saving format, marked by the `.keras` extension. When calling `tf.keras.Model.save`, add a `.keras` extension to the filename. For example:" ] }, { @@ -1037,7 +1042,7 @@ }, "outputs": [], "source": [ - "my_sequential_model.save(\"exname_of_file\")" + "my_sequential_model.save(\"exname_of_file.keras\")" ] }, { @@ -1057,7 +1062,7 @@ }, "outputs": [], "source": [ - "reconstructed_model = tf.keras.models.load_model(\"exname_of_file\")" + "reconstructed_model = tf.keras.models.load_model(\"exname_of_file.keras\")" ] }, { @@ -1066,7 +1071,7 @@ "id": "EA7P_MNvpviZ" }, "source": [ - "Keras `SavedModels` also save metric, loss, and optimizer states.\n", + "Keras zip archives — `.keras` files — also save metric, loss, and optimizer states.\n", "\n", "This reconstructed model can be used and will produce the same result when called on the same data:" ] @@ -1082,6 +1087,17 @@ "reconstructed_model(tf.constant([[2.0, 2.0, 2.0]]))" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "seLIUG2354s" + }, + "source": [ + "### Checkpointing Keras models\n", + "\n", + "Keras models can also be checkpointed, and that will look the same as `tf.Module`." + ] + }, { "cell_type": "markdown", "metadata": { diff --git a/site/en/guide/jax2tf.ipynb b/site/en/guide/jax2tf.ipynb new file mode 100644 index 00000000000..613c622658d --- /dev/null +++ b/site/en/guide/jax2tf.ipynb @@ -0,0 +1,851 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ckM5wJMsNTYL" + }, + "source": [ + "##### Copyright 2023 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "NKvERjPVNWxu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bqePLdDjNhNk" + }, + "source": [ + "# Import a JAX model using JAX2TF" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gw3w46yhNiK_" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IyrsY3uTOmPY" + }, + "source": [ + "This notebook provides a complete, runnable example of creating a model using [JAX](https://jax.readthedocs.io/en/latest/) and bringing it into TensorFlow to continue training. This is made possible by [JAX2TF](https://github.com/google/jax/tree/main/jax/experimental/jax2tf), a lightweight API that provides a pathway from the JAX ecosystem to the TensorFlow ecosystem. \n", + "\n", + "JAX is a high-performance array computing library. To create the model, this notebook uses [Flax](https://flax.readthedocs.io/en/latest/), a neural network library for JAX. To train it, it uses [Optax](https://optax.readthedocs.io), an optimization library for JAX.\n", + "\n", + "If you're a researcher using JAX, JAX2TF gives you a path to production using TensorFlow's proven tools.\n", + "\n", + "There are many ways this can be useful, here are just a few:\n", + "\n", + "* Inference: Taking a model written for JAX and deploying it either on a server using TF Serving, on-device using TFLite, or on the web using TensorFlow.js. \n", + "\n", + "* Fine-tuning: Taking a model that was trained using JAX, you can bring its components to TF using JAX2TF, and continue training it in TensorFlow with your existing training data and setup.\n", + "\n", + "* Fusion: Combining parts of models that were trained using JAX with those trained using TensorFlow, for maximum flexibility.\n", + "\n", + "The key to enabling this kind of interoperation between JAX and TensorFlow is `jax2tf.convert`, which takes in model components created on top of JAX (your loss function, prediction function, etc) and creates equivalent representations of them as TensorFlow functions, which can then be exported as a TensorFlow SavedModel." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G6rtu96yOepm" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9yqxfHzr0LPF" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import numpy as np\n", + "import jax\n", + "import jax.numpy as jnp\n", + "import flax\n", + "import optax\n", + "import os\n", + "from matplotlib import pyplot as plt\n", + "from jax.experimental import jax2tf\n", + "from threading import Lock # Only used in the visualization utility.\n", + "from functools import partial" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SDnTaZO0r872" + }, + "outputs": [], + "source": [ + "# Needed for TensorFlow and JAX to coexist in GPU memory.\n", + "os.environ['XLA_PYTHON_CLIENT_PREALLOCATE'] = \"false\"\n", + "gpus = tf.config.list_physical_devices('GPU')\n", + "if gpus:\n", + " try:\n", + " for gpu in gpus:\n", + " tf.config.experimental.set_memory_growth(gpu, True)\n", + " except RuntimeError as e:\n", + " # Memory growth must be set before GPUs have been initialized.\n", + " print(e)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "BXOjCNJxDLil" + }, + "outputs": [], + "source": [ + "#@title Visualization utilities\n", + "\n", + "plt.rcParams[\"figure.figsize\"] = (20,8)\n", + "\n", + "# The utility for displaying training and validation curves.\n", + "def display_train_curves(loss, avg_loss, eval_loss, eval_accuracy, epochs, steps_per_epochs, ignore_first_n=10):\n", + "\n", + " ignore_first_n_epochs = int(ignore_first_n/steps_per_epochs)\n", + "\n", + " # The losses.\n", + " ax = plt.subplot(121)\n", + " if loss is not None:\n", + " x = np.arange(len(loss)) / steps_per_epochs #* epochs\n", + " ax.plot(x, loss)\n", + " ax.plot(range(1, epochs+1), avg_loss, \"-o\", linewidth=3)\n", + " ax.plot(range(1, epochs+1), eval_loss, \"-o\", linewidth=3)\n", + " ax.set_title('Loss')\n", + " ax.set_ylabel('loss')\n", + " ax.set_xlabel('epoch')\n", + " if loss is not None:\n", + " ax.set_ylim(0, np.max(loss[ignore_first_n:]))\n", + " ax.legend(['train', 'avg train', 'eval'])\n", + " else:\n", + " ymin = np.min(avg_loss[ignore_first_n_epochs:])\n", + " ymax = np.max(avg_loss[ignore_first_n_epochs:])\n", + " ax.set_ylim(ymin-(ymax-ymin)/10, ymax+(ymax-ymin)/10)\n", + " ax.legend(['avg train', 'eval'])\n", + "\n", + " # The accuracy.\n", + " ax = plt.subplot(122)\n", + " ax.set_title('Eval Accuracy')\n", + " ax.set_ylabel('accuracy')\n", + " ax.set_xlabel('epoch')\n", + " ymin = np.min(eval_accuracy[ignore_first_n_epochs:])\n", + " ymax = np.max(eval_accuracy[ignore_first_n_epochs:])\n", + " ax.set_ylim(ymin-(ymax-ymin)/10, ymax+(ymax-ymin)/10)\n", + " ax.plot(range(1, epochs+1), eval_accuracy, \"-o\", linewidth=3)\n", + "\n", + "class Progress:\n", + " \"\"\"Text mode progress bar.\n", + " Usage:\n", + " p = Progress(30)\n", + " p.step()\n", + " p.step()\n", + " p.step(reset=True) # to restart form 0%\n", + " The progress bar displays a new header at each restart.\"\"\"\n", + " def __init__(self, maxi, size=100, msg=\"\"):\n", + " \"\"\"\n", + " :param maxi: the number of steps required to reach 100%\n", + " :param size: the number of characters taken on the screen by the progress bar\n", + " :param msg: the message displayed in the header of the progress bar\n", + " \"\"\"\n", + " self.maxi = maxi\n", + " self.p = self.__start_progress(maxi)() # `()`: to get the iterator from the generator.\n", + " self.header_printed = False\n", + " self.msg = msg\n", + " self.size = size\n", + " self.lock = Lock()\n", + "\n", + " def step(self, reset=False):\n", + " with self.lock:\n", + " if reset:\n", + " self.__init__(self.maxi, self.size, self.msg)\n", + " if not self.header_printed:\n", + " self.__print_header()\n", + " next(self.p)\n", + "\n", + " def __print_header(self):\n", + " print()\n", + " format_string = \"0%{: ^\" + str(self.size - 6) + \"}100%\"\n", + " print(format_string.format(self.msg))\n", + " self.header_printed = True\n", + "\n", + " def __start_progress(self, maxi):\n", + " def print_progress():\n", + " # Bresenham's algorithm. Yields the number of dots printed.\n", + " # This will always print 100 dots in max invocations.\n", + " dx = maxi\n", + " dy = self.size\n", + " d = dy - dx\n", + " for x in range(maxi):\n", + " k = 0\n", + " while d >= 0:\n", + " print('=', end=\"\", flush=True)\n", + " k += 1\n", + " d -= dx\n", + " d += dy\n", + " yield k\n", + " # Keep yielding the last result if there are too many steps.\n", + " while True:\n", + " yield k\n", + "\n", + " return print_progress" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6xgS_8nDDIu8" + }, + "source": [ + "## Download and prepare the MNIST dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nbN7rmuF0VFB" + }, + "outputs": [], + "source": [ + "(x_train, train_labels), (x_test, test_labels) = tf.keras.datasets.mnist.load_data()\n", + "\n", + "train_data = tf.data.Dataset.from_tensor_slices((x_train, train_labels))\n", + "train_data = train_data.map(lambda x,y: (tf.expand_dims(tf.cast(x, tf.float32)/255.0, axis=-1),\n", + " tf.one_hot(y, depth=10)))\n", + "\n", + "BATCH_SIZE = 256\n", + "train_data = train_data.batch(BATCH_SIZE, drop_remainder=True)\n", + "train_data = train_data.cache()\n", + "train_data = train_data.shuffle(5000, reshuffle_each_iteration=True)\n", + "\n", + "test_data = tf.data.Dataset.from_tensor_slices((x_test, test_labels))\n", + "test_data = test_data.map(lambda x,y: (tf.expand_dims(tf.cast(x, tf.float32)/255.0, axis=-1),\n", + " tf.one_hot(y, depth=10)))\n", + "test_data = test_data.batch(10000)\n", + "test_data = test_data.cache()\n", + "\n", + "(one_batch, one_batch_labels) = next(iter(train_data)) # just one batch\n", + "(all_test_data, all_test_labels) = next(iter(test_data)) # all in one batch since batch size is 10000" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LuZTo7SM3W_n" + }, + "source": [ + "## Configure training\n", + "This notebook will create and train a simple model for demonstration purposes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3vbKB4yZ3aTL" + }, + "outputs": [], + "source": [ + "# Training hyperparameters.\n", + "JAX_EPOCHS = 3\n", + "TF_EPOCHS = 7\n", + "STEPS_PER_EPOCH = len(train_labels)//BATCH_SIZE\n", + "LEARNING_RATE = 0.01\n", + "LEARNING_RATE_EXP_DECAY = 0.6\n", + "\n", + "# The learning rate schedule for JAX (with Optax).\n", + "jlr_decay = optax.exponential_decay(LEARNING_RATE, transition_steps=STEPS_PER_EPOCH, decay_rate=LEARNING_RATE_EXP_DECAY, staircase=True)\n", + "\n", + "# THe learning rate schedule for TensorFlow.\n", + "tflr_decay = tf.keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=LEARNING_RATE, decay_steps=STEPS_PER_EPOCH, decay_rate=LEARNING_RATE_EXP_DECAY, staircase=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Od3sMwQxtC34" + }, + "source": [ + "## Create the model using Flax" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-ybqQF2zd2QX" + }, + "outputs": [], + "source": [ + "class ConvModel(flax.linen.Module):\n", + "\n", + " @flax.linen.compact\n", + " def __call__(self, x, train):\n", + " x = flax.linen.Conv(features=12, kernel_size=(3,3), padding=\"SAME\", use_bias=False)(x)\n", + " x = flax.linen.BatchNorm(use_running_average=not train, use_scale=False, use_bias=True)(x)\n", + " x = x.reshape((x.shape[0], -1)) # flatten\n", + " x = flax.linen.Dense(features=200, use_bias=True)(x)\n", + " x = flax.linen.BatchNorm(use_running_average=not train, use_scale=False, use_bias=True)(x)\n", + " x = flax.linen.Dropout(rate=0.3, deterministic=not train)(x)\n", + " x = flax.linen.relu(x)\n", + " x = flax.linen.Dense(features=10)(x)\n", + " #x = flax.linen.log_softmax(x)\n", + " return x\n", + "\n", + " # JAX differentiation requires a function `f(params, other_state, data, labels)` -> `loss` (as a single number).\n", + " # `jax.grad` will differentiate it against the fist argument.\n", + " # The user must split trainable and non-trainable variables into `params` and `other_state`.\n", + " # Must pass a different RNG key each time for the dropout mask to be different.\n", + " def loss(self, params, other_state, rng, data, labels, train):\n", + " logits, batch_stats = self.apply({'params': params, **other_state},\n", + " data,\n", + " mutable=['batch_stats'],\n", + " rngs={'dropout': rng},\n", + " train=train)\n", + " # The loss averaged across the batch dimension.\n", + " loss = optax.softmax_cross_entropy(logits, labels).mean()\n", + " return loss, batch_stats\n", + "\n", + " def predict(self, state, data):\n", + " logits = self.apply(state, data, train=False) # predict and accuracy disable dropout and use accumulated batch norm stats (train=False)\n", + " probabilities = flax.linen.log_softmax(logits)\n", + " return probabilities\n", + "\n", + " def accuracy(self, state, data, labels):\n", + " probabilities = self.predict(state, data)\n", + " predictions = jnp.argmax(probabilities, axis=-1)\n", + " dense_labels = jnp.argmax(labels, axis=-1)\n", + " accuracy = jnp.equal(predictions, dense_labels).mean()\n", + " return accuracy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7Cr0FRNFtHN4" + }, + "source": [ + "## Write the training step function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tmDwApcpgZzw" + }, + "outputs": [], + "source": [ + "# The training step.\n", + "@partial(jax.jit, static_argnums=[0]) # this forces jax.jit to recompile for every new model\n", + "def train_step(model, state, optimizer_state, rng, data, labels):\n", + "\n", + " other_state, params = state.pop('params') # differentiate only against 'params' which represents trainable variables\n", + " (loss, batch_stats), grads = jax.value_and_grad(model.loss, has_aux=True)(params, other_state, rng, data, labels, train=True)\n", + "\n", + " updates, optimizer_state = optimizer.update(grads, optimizer_state)\n", + " params = optax.apply_updates(params, updates)\n", + " new_state = state.copy(add_or_replace={**batch_stats, 'params': params})\n", + "\n", + " rng, _ = jax.random.split(rng)\n", + "\n", + " return new_state, optimizer_state, rng, loss" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zr16g6NzV4O9" + }, + "source": [ + "## Write the training loop" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zbl5w-KUV7Qw" + }, + "outputs": [], + "source": [ + "def train(model, state, optimizer_state, train_data, epochs, losses, avg_losses, eval_losses, eval_accuracies):\n", + " p = Progress(STEPS_PER_EPOCH)\n", + " rng = jax.random.PRNGKey(0)\n", + " for epoch in range(epochs):\n", + "\n", + " # This is where the learning rate schedule state is stored in the optimizer state.\n", + " optimizer_step = optimizer_state[1].count\n", + "\n", + " # Run an epoch of training.\n", + " for step, (data, labels) in enumerate(train_data):\n", + " p.step(reset=(step==0))\n", + " state, optimizer_state, rng, loss = train_step(model, state, optimizer_state, rng, data.numpy(), labels.numpy())\n", + " losses.append(loss)\n", + " avg_loss = np.mean(losses[-step:])\n", + " avg_losses.append(avg_loss)\n", + "\n", + " # Run one epoch of evals (10,000 test images in a single batch).\n", + " other_state, params = state.pop('params')\n", + " # Gotcha: must discard modified batch_stats here\n", + " eval_loss, _ = model.loss(params, other_state, rng, all_test_data.numpy(), all_test_labels.numpy(), train=False)\n", + " eval_losses.append(eval_loss)\n", + " eval_accuracy = model.accuracy(state, all_test_data.numpy(), all_test_labels.numpy())\n", + " eval_accuracies.append(eval_accuracy)\n", + "\n", + " print(\"\\nEpoch\", epoch, \"train loss:\", avg_loss, \"eval loss:\", eval_loss, \"eval accuracy\", eval_accuracy, \"lr:\", jlr_decay(optimizer_step))\n", + "\n", + " return state, optimizer_state" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DGB3W5g0Wt1H" + }, + "source": [ + "## Create the model and the optimizer (with Optax)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mW5mkmCWtN8W" + }, + "outputs": [], + "source": [ + "# The model.\n", + "model = ConvModel()\n", + "state = model.init({'params':jax.random.PRNGKey(0), 'dropout':jax.random.PRNGKey(0)}, one_batch, train=True) # Flax allows a separate RNG for \"dropout\"\n", + "\n", + "# The optimizer.\n", + "optimizer = optax.adam(learning_rate=jlr_decay) # Gotcha: it does not seem to be possible to pass just a callable as LR, must be an Optax Schedule\n", + "optimizer_state = optimizer.init(state['params'])\n", + "\n", + "losses=[]\n", + "avg_losses=[]\n", + "eval_losses=[]\n", + "eval_accuracies=[]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FJdsKghBNF" + }, + "source": [ + "## Train the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nmcofTTBZSIb" + }, + "outputs": [], + "source": [ + "new_state, new_optimizer_state = train(model, state, optimizer_state, train_data, JAX_EPOCHS+TF_EPOCHS, losses, avg_losses, eval_losses, eval_accuracies)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "n_20vgvDXB5r" + }, + "outputs": [], + "source": [ + "display_train_curves(losses, avg_losses, eval_losses, eval_accuracies, len(eval_losses), STEPS_PER_EPOCH, ignore_first_n=1*STEPS_PER_EPOCH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0lT3cdENCBzL" + }, + "source": [ + "## Partially train the model\n", + "\n", + "You will continue training the model in TensorFlow shortly." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KT-xqj5N7C6L" + }, + "outputs": [], + "source": [ + "model = ConvModel()\n", + "state = model.init({'params':jax.random.PRNGKey(0), 'dropout':jax.random.PRNGKey(0)}, one_batch, train=True) # Flax allows a separate RNG for \"dropout\"\n", + "\n", + "# The optimizer.\n", + "optimizer = optax.adam(learning_rate=jlr_decay) # LR must be an Optax LR Schedule\n", + "optimizer_state = optimizer.init(state['params'])\n", + "\n", + "losses, avg_losses, eval_losses, eval_accuracies = [], [], [], []" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oa362HMDbzDE" + }, + "outputs": [], + "source": [ + "state, optimizer_state = train(model, state, optimizer_state, train_data, JAX_EPOCHS, losses, avg_losses, eval_losses, eval_accuracies)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0IyZtUPPCt0y" + }, + "outputs": [], + "source": [ + "display_train_curves(losses, avg_losses, eval_losses, eval_accuracies, len(eval_losses), STEPS_PER_EPOCH, ignore_first_n=1*STEPS_PER_EPOCH)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uNtlSaOCCumB" + }, + "source": [ + "## Save just enough for inference\n", + "\n", + "If your goal is to deploy your JAX model (so you can run inference using `model.predict()`), simply exporting it to [SavedModel](https://www.tensorflow.org/guide/saved_model) is sufficient. This section demonstrates how to accomplish that." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O653B3-5H8FL" + }, + "outputs": [], + "source": [ + "# Test data with a different batch size to test polymorphic shapes.\n", + "x, y = next(iter(train_data.unbatch().batch(13)))\n", + "\n", + "m = tf.Module()\n", + "# Wrap the JAX state in `tf.Variable` (needed when calling the converted JAX function.\n", + "state_vars = tf.nest.map_structure(tf.Variable, state)\n", + "# Keep the wrapped state as flat list (needed in TensorFlow fine-tuning).\n", + "m.vars = tf.nest.flatten(state_vars)\n", + "# Convert the desired JAX function (`model.predict`).\n", + "predict_fn = jax2tf.convert(model.predict, polymorphic_shapes=[\"...\", \"(b, 28, 28, 1)\"])\n", + "# Wrap the converted function in `tf.function` with the correct `tf.TensorSpec` (necessary for dynamic shapes to work).\n", + "@tf.function(autograph=False, input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32)])\n", + "def predict(data):\n", + " return predict_fn(state_vars, data)\n", + "m.predict = predict\n", + "tf.saved_model.save(m, \"./\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8HFx67zStgvo" + }, + "outputs": [], + "source": [ + "# Test the converted function.\n", + "print(\"Converted function predictions:\", np.argmax(m.predict(x).numpy(), axis=-1))\n", + "# Reload the model.\n", + "reloaded_model = tf.saved_model.load(\"./\")\n", + "# Test the reloaded converted function (the result should be the same).\n", + "print(\"Reloaded function predictions:\", np.argmax(reloaded_model.predict(x).numpy(), axis=-1))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eEk8wv4HJu94" + }, + "source": [ + "## Save everything\n", + "If your goal is a comprehensive export (useful if you're planning on brining the model into TensorFlow for fine-tuning, fusion, etc), this section demonstrates how to save the model so you can access methods including:\n", + "\n", + " - model.predict\n", + " - model.accuracy\n", + " - model.loss (including train=True/False bool, RNG for dropout and BatchNorm state updates)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9mty52pmvDDp" + }, + "outputs": [], + "source": [ + "from collections import abc\n", + "\n", + "def _fix_frozen(d):\n", + " \"\"\"Changes any mappings (e.g. frozendict) back to dict.\"\"\"\n", + " if isinstance(d, list):\n", + " return [_fix_frozen(v) for v in d]\n", + " elif isinstance(d, tuple):\n", + " return tuple(_fix_frozen(v) for v in d)\n", + " elif not isinstance(d, abc.Mapping):\n", + " return d\n", + " d = dict(d)\n", + " for k, v in d.items():\n", + " d[k] = _fix_frozen(v)\n", + " return d" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3HEsKNXbCwXw" + }, + "outputs": [], + "source": [ + "class TFModel(tf.Module):\n", + " def __init__(self, state, model):\n", + " super().__init__()\n", + "\n", + " # Special care needed for the train=True/False parameter in the loss\n", + " @jax.jit\n", + " def loss_with_train_bool(state, rng, data, labels, train):\n", + " other_state, params = state.pop('params')\n", + " loss, batch_stats = jax.lax.cond(train,\n", + " lambda state, data, labels: model.loss(params, other_state, rng, data, labels, train=True),\n", + " lambda state, data, labels: model.loss(params, other_state, rng, data, labels, train=False),\n", + " state, data, labels)\n", + " # must use JAX to split the RNG, therefore, must do it in a @jax.jit function\n", + " new_rng, _ = jax.random.split(rng)\n", + " return loss, batch_stats, new_rng\n", + "\n", + " self.state_vars = tf.nest.map_structure(tf.Variable, state)\n", + " self.vars = tf.nest.flatten(self.state_vars)\n", + " self.jax_rng = tf.Variable(jax.random.PRNGKey(0))\n", + "\n", + " self.loss_fn = jax2tf.convert(loss_with_train_bool, polymorphic_shapes=[\"...\", \"...\", \"(b, 28, 28, 1)\", \"(b, 10)\", \"...\"])\n", + " self.accuracy_fn = jax2tf.convert(model.accuracy, polymorphic_shapes=[\"...\", \"(b, 28, 28, 1)\", \"(b, 10)\"])\n", + " self.predict_fn = jax2tf.convert(model.predict, polymorphic_shapes=[\"...\", \"(b, 28, 28, 1)\"])\n", + "\n", + " # Must specify TensorSpec manually for variable batch size to work\n", + " @tf.function(autograph=False, input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32)])\n", + " def predict(self, data):\n", + " # Make sure the TfModel.predict function implicitly use self.state_vars and not the JAX state directly\n", + " # otherwise, all model weights would be embedded in the TF graph as constants.\n", + " return self.predict_fn(self.state_vars, data)\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32),\n", + " tf.TensorSpec(shape=(None, 10), dtype=tf.float32)],\n", + " autograph=False)\n", + " def train_loss(self, data, labels):\n", + " loss, batch_stats, new_rng = self.loss_fn(self.state_vars, self.jax_rng, data, labels, True)\n", + " # update batch norm stats\n", + " flat_vars = tf.nest.flatten(self.state_vars['batch_stats'])\n", + " flat_values = tf.nest.flatten(batch_stats['batch_stats'])\n", + " for var, val in zip(flat_vars, flat_values):\n", + " var.assign(val)\n", + " # update RNG\n", + " self.jax_rng.assign(new_rng)\n", + " return loss\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32),\n", + " tf.TensorSpec(shape=(None, 10), dtype=tf.float32)],\n", + " autograph=False)\n", + " def eval_loss(self, data, labels):\n", + " loss, batch_stats, new_rng = self.loss_fn(self.state_vars, self.jax_rng, data, labels, False)\n", + " return loss\n", + "\n", + " @tf.function(input_signature=[tf.TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32),\n", + " tf.TensorSpec(shape=(None, 10), dtype=tf.float32)],\n", + " autograph=False)\n", + " def accuracy(self, data, labels):\n", + " return self.accuracy_fn(self.state_vars, data, labels)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "znJrAVpcxO9u" + }, + "outputs": [], + "source": [ + "# Instantiate the model.\n", + "tf_model = TFModel(state, model)\n", + "\n", + "# Save the model.\n", + "tf.saved_model.save(tf_model, \"./\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y02DHEwTjNzV" + }, + "source": [ + "## Reload the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "i75yS3v2jPpM" + }, + "outputs": [], + "source": [ + "reloaded_model = tf.saved_model.load(\"./\")\n", + "\n", + "# Test if it works and that the batch size is indeed variable.\n", + "x,y = next(iter(train_data.unbatch().batch(13)))\n", + "print(np.argmax(reloaded_model.predict(x).numpy(), axis=-1))\n", + "x,y = next(iter(train_data.unbatch().batch(20)))\n", + "print(np.argmax(reloaded_model.predict(x).numpy(), axis=-1))\n", + "\n", + "print(reloaded_model.accuracy(one_batch, one_batch_labels))\n", + "print(reloaded_model.accuracy(all_test_data, all_test_labels))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DiwEAwQmlx1x" + }, + "source": [ + "## Continue training the converted JAX model in TensorFlow" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MubFcO_jl2vE" + }, + "outputs": [], + "source": [ + "optimizer = tf.keras.optimizers.Adam(learning_rate=tflr_decay)\n", + "\n", + "# Set the iteration step for the learning rate to resume from where it left off in JAX.\n", + "optimizer.iterations.assign(len(eval_losses)*STEPS_PER_EPOCH)\n", + "\n", + "p = Progress(STEPS_PER_EPOCH)\n", + "\n", + "for epoch in range(JAX_EPOCHS, JAX_EPOCHS+TF_EPOCHS):\n", + "\n", + " # This is where the learning rate schedule state is stored in the optimizer state.\n", + " optimizer_step = optimizer.iterations\n", + "\n", + " for step, (data, labels) in enumerate(train_data):\n", + " p.step(reset=(step==0))\n", + " with tf.GradientTape() as tape:\n", + " #loss = reloaded_model.loss(data, labels, True)\n", + " loss = reloaded_model.train_loss(data, labels)\n", + " grads = tape.gradient(loss, reloaded_model.vars)\n", + " optimizer.apply_gradients(zip(grads, reloaded_model.vars))\n", + " losses.append(loss)\n", + " avg_loss = np.mean(losses[-step:])\n", + " avg_losses.append(avg_loss)\n", + "\n", + " eval_loss = reloaded_model.eval_loss(all_test_data.numpy(), all_test_labels.numpy()).numpy()\n", + " eval_losses.append(eval_loss)\n", + " eval_accuracy = reloaded_model.accuracy(all_test_data.numpy(), all_test_labels.numpy()).numpy()\n", + " eval_accuracies.append(eval_accuracy)\n", + "\n", + " print(\"\\nEpoch\", epoch, \"train loss:\", avg_loss, \"eval loss:\", eval_loss, \"eval accuracy\", eval_accuracy, \"lr:\", tflr_decay(optimizer.iterations).numpy())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "50V1FSmI6UTk" + }, + "outputs": [], + "source": [ + "display_train_curves(losses, avg_losses, eval_losses, eval_accuracies, len(eval_losses), STEPS_PER_EPOCH, ignore_first_n=2*STEPS_PER_EPOCH)\n", + "\n", + "# The loss takes a hit when the training restarts, but does not go back to random levels.\n", + "# This is likely caused by the optimizer momentum being reinitialized." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L7lSziW0K0ny" + }, + "source": [ + "## Next steps\n", + "You can learn more about [JAX](https://jax.readthedocs.io/en/latest/index.html) and [Flax](https://flax.readthedocs.io/en/latest) on their documentation websites which contain detailed guides and examples. If you're new to JAX, be sure to explore the [JAX 101 tutorials](https://jax.readthedocs.io/en/latest/jax-101/index.html), and check out the [Flax quickstart](https://flax.readthedocs.io/en/latest/getting_started.html). To learn more about converting JAX models to TensorFlow format, check out the [jax2tf](https://github.com/google/jax/tree/main/jax/experimental/jax2tf) utility on GitHub. If you're interested in converting JAX models to run in the browser with TensorFlow.js, visit [JAX on the Web with TensorFlow.js](https://blog.tensorflow.org/2022/08/jax-on-web-with-tensorflowjs.html). If you'd like to prepare JAX models to run in TensorFLow Lite, visit the [JAX Model Conversion For TFLite](https://www.tensorflow.org/lite/examples/jax_conversion/overview) guide." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "jax2tf.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/keras.md b/site/en/guide/keras.md new file mode 100644 index 00000000000..3dee7be3aa1 --- /dev/null +++ b/site/en/guide/keras.md @@ -0,0 +1,135 @@ +# Keras: The high-level API for TensorFlow + +Keras is the high-level API of the TensorFlow platform. It provides an +approachable, highly-productive interface for solving machine learning (ML) +problems, with a focus on modern deep learning. Keras covers every step of the +machine learning workflow, from data processing to hyperparameter tuning to +deployment. It was developed with a focus on enabling fast experimentation. + +With Keras, you have full access to the scalability and cross-platform +capabilities of TensorFlow. You can run Keras on a TPU Pod or large clusters of +GPUs, and you can export Keras models to run in the browser or on mobile +devices. You can also serve Keras models via a web API. + +Keras is designed to reduce cognitive load by achieving the following goals: + +* Offer simple, consistent interfaces. +* Minimize the number of actions required for common use cases. +* Provide clear, actionable error messages. +* Follow the principle of progressive disclosure of complexity: It's easy to get + started, and you can complete advanced workflows by learning as you go. +* Help you write concise, readable code. + +## Who should use Keras + +The short answer is that every TensorFlow user should use the Keras APIs by +default. Whether you're an engineer, a researcher, or an ML practitioner, you +should start with Keras. + +There are a few use cases (for example, building tools on top of TensorFlow or +developing your own high-performance platform) that require the low-level +[TensorFlow Core APIs](https://www.tensorflow.org/guide/core). But if your use +case doesn't fall into one +of the +[Core API applications](https://www.tensorflow.org/guide/core#core_api_applications), +you should prefer Keras. + +## Keras API components + +The core data structures of Keras are [layers](https://keras.io/api/layers/) and +[models](https://keras.io/api/models/). A layer is a simple input/output +transformation, and a model is a directed acyclic graph (DAG) of layers. + +### Layers + +The `tf.keras.layers.Layer` class is the fundamental abstraction in Keras. A +`Layer` encapsulates a state (weights) and some computation (defined in the +`tf.keras.layers.Layer.call` method). + +Weights created by layers can be trainable or non-trainable. Layers are +recursively composable: If you assign a layer instance as an attribute of +another layer, the outer layer will start tracking the weights created by the +inner layer. + +You can also use layers to handle data preprocessing tasks like normalization +and text vectorization. Preprocessing layers can be included directly into a +model, either during or after training, which makes the model portable. + +### Models + +A model is an object that groups layers together and that can be trained on +data. + +The simplest type of model is the +[`Sequential` model](https://www.tensorflow.org/guide/keras/sequential_model), +which is a linear stack of layers. For more complex architectures, you can +either use the +[Keras functional API](https://www.tensorflow.org/guide/keras/functional_api), +which lets you build arbitrary graphs of layers, or +[use subclassing to write models from scratch](https://www.tensorflow.org/guide/keras/making_new_layers_and_models_via_subclassing). + +The `tf.keras.Model` class features built-in training and evaluation methods: + +* `tf.keras.Model.fit`: Trains the model for a fixed number of epochs. +* `tf.keras.Model.predict`: Generates output predictions for the input samples. +* `tf.keras.Model.evaluate`: Returns the loss and metrics values for the model; + configured via the `tf.keras.Model.compile` method. + +These methods give you access to the following built-in training features: + +* [Callbacks](https://www.tensorflow.org/api_docs/python/tf/keras/callbacks). + You can leverage built-in callbacks for early stopping, model checkpointing, + and [TensorBoard](https://www.tensorflow.org/tensorboard) monitoring. You can + also + [implement custom callbacks](https://www.tensorflow.org/guide/keras/writing_your_own_callbacks). +* [Distributed training](https://www.tensorflow.org/guide/keras/distributed_training). + You can easily scale up your training to multiple GPUs, TPUs, or devices. +* Step fusing. With the `steps_per_execution` argument in + `tf.keras.Model.compile`, you can process multiple batches in a single + `tf.function` call, which greatly improves device utilization on TPUs. + +For a detailed overview of how to use `fit`, see the +[training and evaluation guide](https://www.tensorflow.org/guide/keras/training_with_built_in_methods). +To learn how to customize the built-in training and evaluation loops, see +[Customizing what happens in `fit()`](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit). + +### Other APIs and tools + +Keras provides many other APIs and tools for deep learning, including: + +* [Optimizers](https://keras.io/api/optimizers/) +* [Metrics](https://keras.io/api/metrics/) +* [Losses](https://keras.io/api/losses/) +* [Data loading utilities](https://keras.io/api/data_loading/) + +For a full list of available APIs, see the +[Keras API reference](https://keras.io/api/). To learn more about other Keras +projects and initiatives, see +[The Keras ecosystem](https://keras.io/getting_started/ecosystem/). + +## Next steps + +To get started using Keras with TensorFlow, check out the following topics: + +* [The Sequential model](https://www.tensorflow.org/guide/keras/sequential_model) +* [The Functional API](https://www.tensorflow.org/guide/keras/functional) +* [Training & evaluation with the built-in methods](https://www.tensorflow.org/guide/keras/training_with_built_in_methods) +* [Making new layers and models via subclassing](https://www.tensorflow.org/guide/keras/custom_layers_and_models) +* [Serialization and saving](https://www.tensorflow.org/guide/keras/save_and_serialize) +* [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) +* [Customizing what happens in fit()](https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit) +* [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) +* [Working with RNNs](https://www.tensorflow.org/guide/keras/rnn) +* [Understanding masking & padding](https://www.tensorflow.org/guide/keras/masking_and_padding) +* [Writing your own callbacks](https://www.tensorflow.org/guide/keras/custom_callback) +* [Transfer learning & fine-tuning](https://www.tensorflow.org/guide/keras/transfer_learning) +* [Multi-GPU and distributed training](https://www.tensorflow.org/guide/keras/distributed_training) + +To learn more about Keras, see the following topics at +[keras.io](http://keras.io): + +* [About Keras](https://keras.io/about/) +* [Introduction to Keras for Engineers](https://keras.io/getting_started/intro_to_keras_for_engineers/) +* [Introduction to Keras for Researchers](https://keras.io/getting_started/intro_to_keras_for_researchers/) +* [Keras API reference](https://keras.io/api/) +* [The Keras ecosystem](https://keras.io/getting_started/ecosystem/) \ No newline at end of file diff --git a/site/en/guide/migrate/canned_estimators.ipynb b/site/en/guide/migrate/canned_estimators.ipynb index 66d688b7676..68859511a84 100644 --- a/site/en/guide/migrate/canned_estimators.ipynb +++ b/site/en/guide/migrate/canned_estimators.ipynb @@ -37,7 +37,7 @@ "id": "77z2OchJTk0l" }, "source": [ - "# Migration Examples: Canned Estimators\n", + "# Migration examples: Canned Estimators\n", "\n", "\n", "
\n", @@ -67,15 +67,15 @@ "source": [ "Canned (or Premade) Estimators have traditionally been used in TensorFlow 1 as quick and easy ways to train models for a variety of typical use cases. TensorFlow 2 provides straightforward approximate substitutes for a number of them by way of Keras models. For those canned estimators that do not have built-in TensorFlow 2 substitutes, you can still build your own replacement fairly easily.\n", "\n", - "This guide walks through a few examples of direct equivalents and custom substitutions to demonstrate how TensorFlow 1's `tf.estimator`-derived models can be migrated to TF2 with Keras.\n", + "This guide will walk you through a few examples of direct equivalents and custom substitutions to demonstrate how TensorFlow 1's `tf.estimator`-derived models can be migrated to TensorFlow 2 with Keras.\n", "\n", "Namely, this guide includes examples for migrating:\n", "* From `tf.estimator`'s `LinearEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to Keras `tf.compat.v1.keras.models.LinearModel` in TensorFlow 2\n", "* From `tf.estimator`'s `DNNEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to a custom Keras DNN ModelKeras in TensorFlow 2\n", "* From `tf.estimator`'s `DNNLinearCombinedEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to `tf.compat.v1.keras.models.WideDeepModel` in TensorFlow 2\n", - "* From `tf.estimator`'s `BoostedTreesEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to `tf.compat.v1.keras.models.WideDeepModel` in TensorFlow 2\n", + "* From `tf.estimator`'s `BoostedTreesEstimator`, `Classifier` or `Regressor` in TensorFlow 1 to `tfdf.keras.GradientBoostedTreesModel` in TensorFlow 2\n", "\n", - "A common precursor to the training of a model is feature preprocessing, which is done for TensorFlow 1 Estimator models with `tf.feature_column`. For more information on feature preprocessing in TensorFlow 2, see [this guide on migrating feature columns](migrating_feature_columns.ipynb)." + "A common precursor to the training of a model is feature preprocessing, which is done for TensorFlow 1 Estimator models with `tf.feature_column`. For more information on feature preprocessing in TensorFlow 2, see [this guide on migrating from feature columns to the Keras preprocessing layers API](migrating_feature_columns.ipynb)." ] }, { @@ -108,11 +108,11 @@ }, "outputs": [], "source": [ - "import keras\n", "import pandas as pd\n", "import tensorflow as tf\n", "import tensorflow.compat.v1 as tf1\n", - "import tensorflow_decision_forests as tfdf\n" + "import tensorflow_decision_forests as tfdf\n", + "from tensorflow import keras\n" ] }, { @@ -183,7 +183,7 @@ "id": "bYSgoezeMrpI" }, "source": [ - "and create a method to instantiate a simplistic sample optimizer to use with our various TensorFlow 1 Estimator and TensorFlow 2 Keras models." + "and create a method to instantiate a simplistic sample optimizer to use with various TensorFlow 1 Estimator and TensorFlow 2 Keras models." ] }, { @@ -196,7 +196,7 @@ "source": [ "def create_sample_optimizer(tf_version):\n", " if tf_version == 'tf1':\n", - " optimizer = lambda: tf.keras.optimizers.Ftrl(\n", + " optimizer = lambda: tf.keras.optimizers.legacy.Ftrl(\n", " l1_regularization_strength=0.001,\n", " learning_rate=tf1.train.exponential_decay(\n", " learning_rate=0.1,\n", @@ -204,7 +204,7 @@ " decay_steps=10000,\n", " decay_rate=0.9))\n", " elif tf_version == 'tf2':\n", - " optimizer = tf.keras.optimizers.Ftrl(\n", + " optimizer = tf.keras.optimizers.legacy.Ftrl(\n", " l1_regularization_strength=0.001,\n", " learning_rate=tf.keras.optimizers.schedules.ExponentialDecay(\n", " initial_learning_rate=0.1, decay_steps=10000, decay_rate=0.9))\n", @@ -226,7 +226,7 @@ "id": "_O7fyhCnpvED" }, "source": [ - "### TF1: Using LinearEstimator" + "### TensorFlow 1: Using LinearEstimator" ] }, { @@ -270,7 +270,7 @@ "id": "KEmzBjfnsxwT" }, "source": [ - "### TF2: Using Keras LinearModel" + "### TensorFlow 2: Using Keras LinearModel" ] }, { @@ -311,7 +311,7 @@ "id": "YKl6XZ7Bp1t5" }, "source": [ - "### TF1: Using DNNEstimator" + "### TensorFlow 1: Using DNNEstimator" ] }, { @@ -320,7 +320,7 @@ "id": "J7wJUmgypln8" }, "source": [ - "In TensorFlow 1, you can use `tf.estimator.DNNEstimator` to create a baseline DNN model for regression and classification problems." + "In TensorFlow 1, you can use `tf.estimator.DNNEstimator` to create a baseline deep neural network (DNN) model for regression and classification problems." ] }, { @@ -357,7 +357,7 @@ "id": "6xJz6px6pln-" }, "source": [ - "### TF2: Using Keras to Create a Custom DNN Model" + "### TensorFlow 2: Using Keras to create a custom DNN model" ] }, { @@ -368,7 +368,7 @@ "source": [ "In TensorFlow 2, you can create a custom DNN model to substitute for one generated by `tf.estimator.DNNEstimator`, with similar levels of user-specified customization (for instance, as in the previous example, the ability to customize a chosen model optimizer).\n", "\n", - "A similar workflow can be used to replace `tf.estimator.experimental.RNNEstimator` with a Keras RNN Model. Keras provides a number of built-in, customizable choices by way of `tf.keras.layers.RNN`, `tf.keras.layers.LSTM`, and `tf.keras.layers.GRU` - see [here](https://www.tensorflow.org/guide/keras/rnn#built-in_rnn_layers_a_simple_example) for more details." + "A similar workflow can be used to replace `tf.estimator.experimental.RNNEstimator` with a Keras recurrent neural network (RNN) model. Keras provides a number of built-in, customizable choices by way of `tf.keras.layers.RNN`, `tf.keras.layers.LSTM`, and `tf.keras.layers.GRU`. To learn more, check out the _Built-in RNN layers: a simple example_ section of [RNN with Keras guide](https://www.tensorflow.org/guide/keras/rnn)." ] }, { @@ -413,7 +413,7 @@ "id": "GfRaObf5g4TU" }, "source": [ - "### TF1: Using DNNLinearCombinedEstimator" + "### TensorFlow 1: Using DNNLinearCombinedEstimator" ] }, { @@ -464,7 +464,7 @@ "id": "BeMikL5ug4TX" }, "source": [ - "### TF2: Using Keras WideDeepModel" + "### TensorFlow 2: Using Keras WideDeepModel" ] }, { @@ -477,7 +477,7 @@ "\n", "This `WideDeepModel` is constructed on the basis of a constituent `LinearModel` and a custom DNN Model, both of which are discussed in the preceding two examples. A custom linear model can also be used in place of the built-in Keras `LinearModel` if desired.\n", "\n", - "If you would like to build your own model instead of a canned estimator, check out [how to build a `keras.Sequential` model](https://www.tensorflow.org/guide/keras/sequential_model). For more information on custom training and optimizers you can also checkout [this guide](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough)." + "If you would like to build your own model instead of using a canned estimator, check out the [Keras Sequential model](https://www.tensorflow.org/guide/keras/sequential_model) guide. For more information on custom training and optimizers, check out the [Custom training: walkthrough](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough) guide." ] }, { @@ -532,7 +532,7 @@ "id": "_3mCQVDSeOKD" }, "source": [ - "### TF1: Using BoostedTreesEstimator" + "### TensorFlow 1: Using BoostedTreesEstimator" ] }, { @@ -578,7 +578,7 @@ "id": "eNuLP6BeeOKF" }, "source": [ - "### TF2: Using TensorFlow Decision Forests" + "### TensorFlow 2: Using TensorFlow Decision Forests" ] }, { @@ -620,7 +620,7 @@ "id": "B1qTdAS-VpXk" }, "source": [ - "Create a TensorFlow dataset. Note that Decision Forests support natively many types of features and do not need pre-processing." + "Create a TensorFlow dataset. Note that Decision Forests natively support many types of features and do not need pre-processing." ] }, { @@ -689,9 +689,9 @@ "id": "Z22UJ5SUqToQ" }, "source": [ - "Gradient Boosted Trees is just one of the many decision forests algorithms avaiable in TensorFlow Decision Forests. For example, Random Forests (available as [tfdf.keras.GradientBoostedTreesModel](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/RandomForestModel) is very resistant to overfitting) while CART (available as [tfdf.keras.CartModel](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/CartModel)) is great for model interpretation.\n", + "Gradient Boosted Trees is just one of the many decision forest algorithms available in TensorFlow Decision Forests. For example, Random Forests (available as [tfdf.keras.GradientBoostedTreesModel](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/RandomForestModel) is very resistant to overfitting) while CART (available as [tfdf.keras.CartModel](https://www.tensorflow.org/decision_forests/api_docs/python/tfdf/keras/CartModel)) is great for model interpretation.\n", "\n", - "In the next example, we train and plot a Random Forest model." + "In the next example, train and plot a Random Forest model." ] }, { @@ -718,7 +718,7 @@ "id": "Z0QYolhoZb_k" }, "source": [ - "Finaly, in the next example, we train and evaluate a CART model." + "In the final example, train and evaluate a CART model." ] }, { @@ -742,7 +742,6 @@ "colab": { "collapsed_sections": [], "name": "canned_estimators.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/early_stopping.ipynb b/site/en/guide/migrate/early_stopping.ipynb index fec8144348a..1c1712e975b 100644 --- a/site/en/guide/migrate/early_stopping.ipynb +++ b/site/en/guide/migrate/early_stopping.ipynb @@ -457,7 +457,7 @@ "epochs = 100\n", "patience = 5\n", "wait = 0\n", - "best = 0\n", + "best = float('inf')\n", "\n", "for epoch in range(epochs):\n", " print(\"\\nStart of epoch %d\" % (epoch,))\n", @@ -486,7 +486,7 @@ " # The early stopping strategy: stop the training if `val_loss` does not\n", " # decrease over a certain number of epochs.\n", " wait += 1\n", - " if val_loss > best:\n", + " if val_loss < best:\n", " best = val_loss\n", " wait = 0\n", " if wait >= patience:\n", diff --git a/site/en/guide/migrate/evaluator.ipynb b/site/en/guide/migrate/evaluator.ipynb index 3588838467c..c8f848e4406 100644 --- a/site/en/guide/migrate/evaluator.ipynb +++ b/site/en/guide/migrate/evaluator.ipynb @@ -13,6 +13,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "cellView": "form", "id": "HMUDt0CiUJk9" }, "outputs": [], @@ -121,7 +122,7 @@ "\n", "In TensorFlow 1, you can configure a `tf.estimator` to evaluate the estimator using `tf.estimator.train_and_evaluate`.\n", "\n", - "In this example, start by defining the `tf.estimator.Estimator` and speciyfing training and evaluation specifications:" + "In this example, start by defining the `tf.estimator.Estimator` and specifying training and evaluation specifications:" ] }, { @@ -193,7 +194,7 @@ "source": [ "## TensorFlow 2: Evaluating a Keras model\n", "\n", - "In TensorFlow 2, if you use the Keras `Model.fit` API for training, you can evaluate the model with `tf.keras.utils.SidecarEvaluator`. You can also visualize the evaluation metrics in Tensorboard which is not shown in this guide.\n", + "In TensorFlow 2, if you use the Keras `Model.fit` API for training, you can evaluate the model with `tf.keras.utils.SidecarEvaluator`. You can also visualize the evaluation metrics in TensorBoard which is not shown in this guide.\n", "\n", "To help demonstrate this, let's first start by defining and training the model:\n" ] @@ -279,7 +280,6 @@ "colab": { "collapsed_sections": [], "name": "evaluator.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/fault_tolerance.ipynb b/site/en/guide/migrate/fault_tolerance.ipynb index 1109db36840..fdbd0b972c3 100644 --- a/site/en/guide/migrate/fault_tolerance.ipynb +++ b/site/en/guide/migrate/fault_tolerance.ipynb @@ -13,6 +13,7 @@ "cell_type": "code", "execution_count": null, "metadata": { + "cellView": "form", "id": "HMUDt0CiUJk9" }, "outputs": [], @@ -84,6 +85,26 @@ "## Setup" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "TOVQubuDzdmA" + }, + "source": [ + "Install `tf-nightly`, as the frequency of checkpoint saving at a particular step with the `save_freq` argument in `tf.keras.callbacks.BackupAndRestore` is introduced from TensorFlow 2.10:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pGW0XhXkxY_q" + }, + "outputs": [], + "source": [ + "!pip install tf-nightly" + ] + }, { "cell_type": "code", "execution_count": null, @@ -119,7 +140,7 @@ "id": "TtlucRG_Uro_" }, "source": [ - "## TensorFlow 1: Save checkpoints with tf.estimator.RunConfig\n", + "## TensorFlow 1: Save checkpoints with `tf.estimator.RunConfig`\n", "\n", "In TensorFlow 1, you can configure a `tf.estimator` to save checkpoints every step by configuring `tf.estimator.RunConfig`.\n", "\n", @@ -250,11 +271,11 @@ "id": "T5LtVtmvYx7J" }, "source": [ - "## TensorFlow 2: Back up and restore with a callback and Model.fit\n", + "## TensorFlow 2: Back up and restore with a callback and `Model.fit`\n", "\n", "In TensorFlow 2, if you use the Keras `Model.fit` API for training, you can provide the `tf.keras.callbacks.BackupAndRestore` callback to add the fault tolerance functionality.\n", "\n", - "To help demonstrate this, let's first start by defining a callback class that artificially throws an error during the fifth checkpoint:\n" + "To help demonstrate this, first start by defining a Keras `Callback` class that artificially throws an error during the fourth epoch checkpoint:\n" ] }, { @@ -265,10 +286,13 @@ }, "outputs": [], "source": [ - "class InterruptingCallback(tf.keras.callbacks.Callback):\n", + "class InterruptAtEpoch(tf.keras.callbacks.Callback):\n", " # A callback for artificially interrupting training.\n", + " def __init__(self, interrupting_epoch=3):\n", + " self.interrupting_epoch = interrupting_epoch\n", + "\n", " def on_epoch_end(self, epoch, log=None):\n", - " if epoch == 4:\n", + " if epoch == self.interrupting_epoch:\n", " raise RuntimeError('Interruption')" ] }, @@ -278,7 +302,7 @@ "id": "AhU3VTYZoDh-" }, "source": [ - "Then, define and instantiate a simple Keras model, define the loss function, call `Model.compile`, and set up a `tf.keras.callbacks.BackupAndRestore` callback that will save the checkpoints in a temporary directory:" + "Then, define and instantiate a simple Keras model, define the loss function, call `Model.compile`, and set up a `tf.keras.callbacks.BackupAndRestore` callback that will save the checkpoints in a temporary directory at epoch boundaries:" ] }, { @@ -296,20 +320,14 @@ " tf.keras.layers.Dropout(0.2),\n", " tf.keras.layers.Dense(10)\n", " ])\n", - "\n", "loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)\n", - "\n", "model = create_model()\n", "model.compile(optimizer='adam',\n", " loss=loss,\n", - " metrics=['accuracy'],\n", - " steps_per_execution=10)\n", - "\n", + " metrics=['accuracy'])\n", "log_dir = tempfile.mkdtemp()\n", - "\n", "backup_restore_callback = tf.keras.callbacks.BackupAndRestore(\n", - " backup_dir = log_dir\n", - ")" + " backup_dir = log_dir)" ] }, { @@ -318,7 +336,7 @@ "id": "LRRWmZqsvMrq" }, "source": [ - "Now, start training the model with `Model.fit`. During training, checkpoints will be saved thanks to the `backup_restore_callback` defined above, while the `InterruptingCallback` will raise an artificial exception to simulate a failure." + "Start training the model with `Model.fit`. During training, checkpoints will be saved thanks to `tf.keras.callbacks.BackupAndRestore` instantiated above, while the `InterruptAtEpoch` class will raise an artificial exception to simulate a failure after the fourth epoch." ] }, { @@ -333,8 +351,9 @@ " model.fit(x=x_train,\n", " y=y_train,\n", " epochs=10,\n", + " steps_per_epoch=100,\n", " validation_data=(x_test, y_test),\n", - " callbacks=[backup_restore_callback, InterruptingCallback()])\n", + " callbacks=[backup_restore_callback, InterruptAtEpoch()])\n", "except Exception as e:\n", " print(f'{type(e).__name__}:{e}')" ] @@ -364,6 +383,108 @@ "model.fit(x=x_train,\n", " y=y_train,\n", " epochs=10,\n", + " steps_per_epoch=100,\n", + " validation_data=(x_test, y_test),\n", + " callbacks=[backup_restore_callback])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nP2dnpMPxtYj" + }, + "source": [ + "Define another `Callback` class that artificially throws an error during the 140th step:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YardkAaBxr-c" + }, + "outputs": [], + "source": [ + "class InterruptAtStep(tf.keras.callbacks.Callback):\n", + " # A callback for artificially interrupting training.\n", + " def __init__(self, interrupting_step=140):\n", + " self.total_step_count = 0\n", + " self.interrupting_step = interrupting_step\n", + "\n", + " def on_batch_begin(self, batch, logs=None):\n", + " self.total_step_count += 1\n", + "\n", + " def on_batch_end(self, batch, logs=None):\n", + " if self.total_step_count == self.interrupting_step:\n", + " print(\"\\nInterrupting at step count\", self.total_step_count)\n", + " raise RuntimeError('Interruption')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Af3VpehxyTpb" + }, + "source": [ + "Note: This section uses features that are only available in `tf-nightly` until Tensorflow 2.10 is released.\n", + "\n", + "To make sure the checkpoints are saved every 30 steps, set the `save_freq` in the `BackupAndRestore` callback to `30`. The `InterruptAtStep` will raise an artificial exception to simulate a failure at epoch 1 and step 40 (total step count 140). The checkpoint would be last saved at epoch 1 and step 20." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dHHCENDPyUHS" + }, + "outputs": [], + "source": [ + "log_dir_2 = tempfile.mkdtemp()\n", + "\n", + "backup_restore_callback = tf.keras.callbacks.BackupAndRestore(\n", + " backup_dir = log_dir_2, save_freq=30\n", + ")\n", + "model = create_model()\n", + "model.compile(optimizer='adam',\n", + " loss=loss,\n", + " metrics=['accuracy'])\n", + "try:\n", + " model.fit(x=x_train,\n", + " y=y_train,\n", + " epochs=10,\n", + " steps_per_epoch=100,\n", + " validation_data=(x_test, y_test),\n", + " callbacks=[backup_restore_callback, InterruptAtStep()])\n", + "except Exception as e:\n", + " print(f'{type(e).__name__}:{e}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2-ggMFEHynMR" + }, + "source": [ + "Next, instantiate the Keras model, call `Model.compile`, and continue training the model with `Model.fit` from a previously saved checkpoint. Notice that the training starts from epoch 2 and step 21." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vT7Kx30NEqly" + }, + "outputs": [], + "source": [ + "model = create_model()\n", + "model.compile(optimizer='adam',\n", + " loss=loss,\n", + " metrics=['accuracy'],\n", + " steps_per_execution=10)\n", + "model.fit(x=x_train,\n", + " y=y_train,\n", + " epochs=10,\n", + " steps_per_epoch=100,\n", " validation_data=(x_test, y_test),\n", " callbacks=[backup_restore_callback])" ] @@ -467,7 +588,6 @@ "colab": { "collapsed_sections": [], "name": "fault_tolerance.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/images/tensorboard_TF1.png b/site/en/guide/migrate/images/tensorboard_TF1.png index 18b7bbb12cf..294fbbcc5b5 100644 Binary files a/site/en/guide/migrate/images/tensorboard_TF1.png and b/site/en/guide/migrate/images/tensorboard_TF1.png differ diff --git a/site/en/guide/migrate/images/tensorboard_TF2.png b/site/en/guide/migrate/images/tensorboard_TF2.png index 55abb91fe6f..bbad8768210 100644 Binary files a/site/en/guide/migrate/images/tensorboard_TF2.png and b/site/en/guide/migrate/images/tensorboard_TF2.png differ diff --git a/site/en/guide/migrate/metrics_optimizers.ipynb b/site/en/guide/migrate/metrics_optimizers.ipynb index dea0d11dc3a..61afb35aea6 100644 --- a/site/en/guide/migrate/metrics_optimizers.ipynb +++ b/site/en/guide/migrate/metrics_optimizers.ipynb @@ -144,7 +144,7 @@ "\n", "def _model_fn(features, labels, mode):\n", " logits = tf1.layers.Dense(2)(features)\n", - " predictions = tf.argmax(input=logits, axis=1)\n", + " predictions = tf.math.argmax(input=logits, axis=1)\n", " loss = tf1.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits)\n", " optimizer = tf1.train.AdagradOptimizer(0.05)\n", " train_op = optimizer.minimize(loss, global_step=tf1.train.get_global_step())\n", @@ -223,7 +223,7 @@ "\n", "inputs = tf.keras.Input((2,))\n", "logits = tf.keras.layers.Dense(2)(inputs)\n", - "predictions = tf.argmax(input=logits, axis=1)\n", + "predictions = tf.math.argmax(input=logits, axis=1)\n", "model = tf.keras.models.Model(inputs, predictions)\n", "optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n", "\n", @@ -370,8 +370,7 @@ "metadata": { "colab": { "collapsed_sections": [], - "name": "metrics.ipynb", - "provenance": [], + "name": "metrics_optimizers.ipynb", "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/migrating_feature_columns.ipynb b/site/en/guide/migrate/migrating_feature_columns.ipynb index 65aa1a786b8..b2dbc5fe7c0 100644 --- a/site/en/guide/migrate/migrating_feature_columns.ipynb +++ b/site/en/guide/migrate/migrating_feature_columns.ipynb @@ -37,7 +37,7 @@ "id": "77z2OchJTk0l" }, "source": [ - "# Migrating feature_columns to TF2's Keras Preprocessing Layers\n", + "# Migrate `tf.feature_column`s to Keras preprocessing layers\n", "\n", "\n", "
\n", @@ -67,11 +67,11 @@ "id": "-5jGPDA2PDPI" }, "source": [ - "Training a model will usually come with some amount of feature preprocessing, particularly when dealing with structured data. When training a `tf.estimator.Estimator` in TF1, this feature preprocessing is usually done with the `tf.feature_column` API. In TF2, this preprocessing can be done directly with Keras layers, called _preprocessing layers_.\n", + "Training a model usually comes with some amount of feature preprocessing, particularly when dealing with structured data. When training a `tf.estimator.Estimator` in TensorFlow 1, you usually perform feature preprocessing with the `tf.feature_column` API. In TensorFlow 2, you can do this directly with Keras preprocessing layers.\n", "\n", - "In this migration guide, you will perform some common feature transformations using both feature columns and preprocessing layers, followed by training a complete model with both APIs.\n", + "This migration guide demonstrates common feature transformations using both feature columns and preprocessing layers, followed by training a complete model with both APIs.\n", "\n", - "First, start with a couple of necessary imports," + "First, start with a couple of necessary imports:" ] }, { @@ -93,7 +93,7 @@ "id": "NVPYTQAWtDwH" }, "source": [ - "and add a utility for calling a feature column for demonstration:" + "Now, add a utility function for calling a feature column for demonstration:" ] }, { @@ -557,7 +557,7 @@ "id": "fd6eluARXndC" }, "source": [ - "In Keras, there is no `combiner` option to `tf.keras.layers.Embedding`, but you can acheive the same effect with `tf.keras.layers.Dense`. The `embedding_column` above is simply linearly combining embedding vectors according to category weight. Though not obvious at first, it is exactly equivalent to representing your categorical inputs as a sparse weight vector of size `(num_tokens)`, and mutiplying them by a `Dense` kernel of shape `(embedding_size, num_tokens)`." + "In Keras, there is no `combiner` option to `tf.keras.layers.Embedding`, but you can achieve the same effect with `tf.keras.layers.Dense`. The `embedding_column` above is simply linearly combining embedding vectors according to category weight. Though not obvious at first, it is exactly equivalent to representing your categorical inputs as a sparse weight vector of size `(num_tokens)`, and multiplying them by a `Dense` kernel of shape `(embedding_size, num_tokens)`." ] }, { @@ -572,7 +572,7 @@ "weights = tf.constant([[0.5, 1.5, 0.7, 1.8, 0.2]])\n", "\n", "# For `combiner='mean'`, normalize your weights to sum to 1. Removing this line\n", - "# would be eqivalent to an `embedding_column` with `combiner='sum'`.\n", + "# would be equivalent to an `embedding_column` with `combiner='sum'`.\n", "weights = weights / tf.reduce_sum(weights, axis=-1, keepdims=True)\n", "\n", "count_layer = tf.keras.layers.CategoryEncoding(\n", @@ -615,7 +615,7 @@ "id": "e_4Xx2c37lqD" }, "source": [ - "Define some common constants for both TF1 and TF2 workflows:" + "Define some common constants for both TensorFlow 1 and TensorFlow 2 workflows:" ] }, { @@ -654,17 +654,17 @@ "source": [ "categorical_col = tf1.feature_column.categorical_column_with_identity(\n", " 'type', num_buckets=one_hot_dims)\n", - "# Convert index to one-hot; e.g. [2] -> [0,0,1].\n", + "# Convert index to one-hot; e.g., [2] -> [0,0,1].\n", "indicator_col = tf1.feature_column.indicator_column(categorical_col)\n", "\n", - "# Convert strings to indices; e.g. ['small'] -> [1].\n", + "# Convert strings to indices; e.g., ['small'] -> [1].\n", "vocab_col = tf1.feature_column.categorical_column_with_vocabulary_list(\n", " 'size', vocabulary_list=vocab, num_oov_buckets=1)\n", "# Embed the indices.\n", "embedding_col = tf1.feature_column.embedding_column(vocab_col, embedding_dims)\n", "\n", "normalizer_fn = lambda x: (x - weight_mean) / math.sqrt(weight_variance)\n", - "# Normalize the numeric inputs; e.g. [2.0] -> [0.0].\n", + "# Normalize the numeric inputs; e.g., [2.0] -> [0.0].\n", "numeric_col = tf1.feature_column.numeric_column(\n", " 'weight', normalizer_fn=normalizer_fn)\n", "\n", @@ -727,12 +727,12 @@ " 'size': tf.keras.Input(shape=(), dtype='string'),\n", " 'weight': tf.keras.Input(shape=(), dtype='float32'),\n", "}\n", - "# Convert index to one-hot; e.g. [2] -> [0,0,1].\n", + "# Convert index to one-hot; e.g., [2] -> [0,0,1].\n", "type_output = tf.keras.layers.CategoryEncoding(\n", " one_hot_dims, output_mode='one_hot')(inputs['type'])\n", - "# Convert size strings to indices; e.g. ['small'] -> [1].\n", + "# Convert size strings to indices; e.g., ['small'] -> [1].\n", "size_output = tf.keras.layers.StringLookup(vocabulary=vocab)(inputs['size'])\n", - "# Normalize the numeric inputs; e.g. [2.0] -> [0.0].\n", + "# Normalize the numeric inputs; e.g., [2.0] -> [0.0].\n", "weight_output = tf.keras.layers.Normalization(\n", " axis=None, mean=weight_mean, variance=weight_variance)(inputs['weight'])\n", "outputs = {\n", @@ -845,8 +845,8 @@ "outputs": [], "source": [ "inputs = preprocessing_model.input\n", - "outpus = training_model(preprocessing_model(inputs))\n", - "inference_model = tf.keras.Model(inputs, outpus)\n", + "outputs = training_model(preprocessing_model(inputs))\n", + "inference_model = tf.keras.Model(inputs, outputs)\n", "\n", "predict_dataset = tf.data.Dataset.from_tensor_slices(predict_features).batch(1)\n", "inference_model.predict(predict_dataset)" @@ -858,7 +858,7 @@ "id": "O01VQIxCWBxU" }, "source": [ - "This composed model can be saved as a [SavedModel](https://www.tensorflow.org/guide/saved_model) for later use." + "This composed model can be saved as a `.keras` file for later use." ] }, { @@ -869,8 +869,8 @@ }, "outputs": [], "source": [ - "inference_model.save('model')\n", - "restored_model = tf.keras.models.load_model('model')\n", + "inference_model.save('model.keras')\n", + "restored_model = tf.keras.models.load_model('model.keras')\n", "restored_model.predict(predict_dataset)" ] }, @@ -880,7 +880,7 @@ "id": "IXMBwzggwUjI" }, "source": [ - "Note: Preprocessing layers are not trainable, which allows you to apply them *asynchronously* using `tf.data`. This has performence benefits, as you can both [prefetch](https://www.tensorflow.org/guide/data_performance#prefetching) preprocessed batches, and free up any accelerators to focus on the differentiable parts of a model. As this guide shows, seperating preprocessing during training and composing it during inference is a flexible way to leverage these performance gains. However, if your model is small or preprocessing time is negligable, it may be simpler to build preprocessing into a complete model from the start. To do this you can build a single model starting with `tf.keras.Input`, followed by preprocessing layers, followed by trainable layers." + "Note: Preprocessing layers are not trainable, which allows you to apply them *asynchronously* using `tf.data`. This has performance benefits, as you can both prefetch preprocessed batches, and free up any accelerators to focus on the differentiable parts of a model (learn more in the _Prefetching_ section of the [Better performance with the `tf.data` API](../data_performance.ipynb) guide). As this guide shows, separating preprocessing during training and composing it during inference is a flexible way to leverage these performance gains. However, if your model is small or preprocessing time is negligible, it may be simpler to build preprocessing into a complete model from the start. To do this you can build a single model starting with `tf.keras.Input`, followed by preprocessing layers, followed by trainable layers." ] }, { @@ -892,76 +892,76 @@ "## Feature column equivalence table\n", "\n", "For reference, here is an approximate correspondence between feature columns and\n", - "preprocessing layers:\n", + "Keras preprocessing layers:
\n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", "
Feature ColumnKeras LayerFeature columnKeras layer
`feature_column.bucketized_column``layers.Discretization``tf.feature_column.bucketized_column``tf.keras.layers.Discretization`
`feature_column.categorical_column_with_hash_bucket``layers.Hashing``tf.feature_column.categorical_column_with_hash_bucket``tf.keras.layers.Hashing`
`feature_column.categorical_column_with_identity``layers.CategoryEncoding``tf.feature_column.categorical_column_with_identity``tf.keras.layers.CategoryEncoding`
`feature_column.categorical_column_with_vocabulary_file``layers.StringLookup` or `layers.IntegerLookup``tf.feature_column.categorical_column_with_vocabulary_file``tf.keras.layers.StringLookup` or `tf.keras.layers.IntegerLookup`
`feature_column.categorical_column_with_vocabulary_list``layers.StringLookup` or `layers.IntegerLookup``tf.feature_column.categorical_column_with_vocabulary_list``tf.keras.layers.StringLookup` or `tf.keras.layers.IntegerLookup`
`feature_column.crossed_column``layers.experimental.preprocessing.HashedCrossing``tf.feature_column.crossed_column``tf.keras.layers.experimental.preprocessing.HashedCrossing`
`feature_column.embedding_column``layers.Embedding``tf.feature_column.embedding_column``tf.keras.layers.Embedding`
`feature_column.indicator_column``tf.feature_column.indicator_column``output_mode='one_hot'` or `output_mode='multi_hot'`*
`feature_column.numeric_column``layers.Normalization``tf.feature_column.numeric_column``tf.keras.layers.Normalization`
`feature_column.sequence_categorical_column_with_hash_bucket``layers.Hashing``tf.feature_column.sequence_categorical_column_with_hash_bucket``tf.keras.layers.Hashing`
`feature_column.sequence_categorical_column_with_identity``layers.CategoryEncoding``tf.feature_column.sequence_categorical_column_with_identity``tf.keras.layers.CategoryEncoding`
`feature_column.sequence_categorical_column_with_vocabulary_file``layers.StringLookup`, `layers.IntegerLookup`, or `layer.TextVectorization`†`tf.feature_column.sequence_categorical_column_with_vocabulary_file``tf.keras.layers.StringLookup`, `tf.keras.layers.IntegerLookup`, or `tf.keras.layer.TextVectorization`†
`feature_column.sequence_categorical_column_with_vocabulary_list``layers.StringLookup`, `layers.IntegerLookup`, or `layer.TextVectorization`†`tf.feature_column.sequence_categorical_column_with_vocabulary_list``tf.keras.layers.StringLookup`, `tf.keras.layers.IntegerLookup`, or `tf.keras.layer.TextVectorization`†
`feature_column.sequence_numeric_column``layers.Normalization``tf.feature_column.sequence_numeric_column``tf.keras.layers.Normalization`
`feature_column.weighted_categorical_column``layers.CategoryEncoding``tf.feature_column.weighted_categorical_column``tf.keras.layers.CategoryEncoding`
\n", "\n", - "\\* `output_mode` can be passed to `layers.CategoryEncoding`, `layers.StringLookup`, `layers.IntegerLookup`, and `layers.TextVectorization`.\n", + "\\* The `output_mode` can be passed to `tf.keras.layers.CategoryEncoding`, `tf.keras.layers.StringLookup`, `tf.keras.layers.IntegerLookup`, and `tf.keras.layers.TextVectorization`.\n", "\n", - "† `layers.TextVectorization` can handle freeform text input directly (e.g. entire sentences or paragraphs). This is not one-to-one replacement for categorical sequence handling in TF1, but may offer a convinient replacement for ad-hoc text preprocessing.\n", + "† `tf.keras.layers.TextVectorization` can handle freeform text input directly (for example, entire sentences or paragraphs). This is not one-to-one replacement for categorical sequence handling in TensorFlow 1, but may offer a convenient replacement for ad-hoc text preprocessing.\n", "\n", "Note: Linear estimators, such as `tf.estimator.LinearClassifier`, can handle direct categorical input (integer indices) without an `embedding_column` or `indicator_column`. However, integer indices cannot be passed directly to `tf.keras.layers.Dense` or `tf.keras.experimental.LinearModel`. These inputs should be first encoded with `tf.layers.CategoryEncoding` with `output_mode='count'` (and `sparse=True` if the category sizes are large) before calling into `Dense` or `LinearModel`." ] @@ -972,10 +972,10 @@ "id": "AQCJ6lM3YDq_" }, "source": [ - "## Next Steps\n", + "## Next steps\n", "\n", - " - For more information on keras preprocessing layers, see [the guide to preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers).\n", - " - For a more in-depth example of applying preprocessing layers to structured data, see [the structured data tutorial](https://www.tensorflow.org/tutorials/structured_data/preprocessing_layers)." + " - For more information on Keras preprocessing layers, go to the [Working with preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) guide.\n", + " - For a more in-depth example of applying preprocessing layers to structured data, refer to the [Classify structured data using Keras preprocessing layers](../../tutorials/structured_data/preprocessing_layers.ipynb) tutorial." ] } ], diff --git a/site/en/guide/migrate/migration_debugging.ipynb b/site/en/guide/migrate/migration_debugging.ipynb index 774fca2160b..25cb7f9065f 100644 --- a/site/en/guide/migrate/migration_debugging.ipynb +++ b/site/en/guide/migrate/migration_debugging.ipynb @@ -37,7 +37,7 @@ "id": "77z2OchJTk0l" }, "source": [ - "# Debug TF2 Migrated Training Pipeline\n", + "# Debug a TensorFlow 2 migrated training pipeline\n", "\n", "\n", " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", @@ -67,12 +67,12 @@ "id": "zTwPu-w6M5sz" }, "source": [ - "This notebook demonstrates how to debug training pipeline when migrating to TF2. It consists of following components:\n", + "This notebook demonstrates how to debug a training pipeline when migrating to TensorFlow 2 (TF2). It consists of following components:\n", "1. Suggested steps and code samples for debugging training pipeline\n", "2. Tools for debugging\n", "3. Other related resources\n", "\n", - "One assumption is you have TF1.x code and trained models for comparison, and you want to build a TF2 model that achieves similar validation accuracy.\n", + "One assumption is you have the TensorFlow 1 (TF1.x) code and trained models for comparison, and you want to build a TF2 model that achieves similar validation accuracy.\n", "\n", "This notebook does **NOT** cover debugging performance issues for training/inference speed or memory usage." ] @@ -100,7 +100,7 @@ " disabled\n", " * Align random number generation, check numerical equivalence in inference\n", " * (Optional) Check checkpoints are loaded properly and TF1.x/TF2 models\n", - " generate identitcal output\n", + " generate identical output\n", "\n", " b. On single GPU/TPU device\n", "\n", @@ -124,11 +124,11 @@ "\n", " d. With multi-device strategies (check the intro for [MultiProcessRunner](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/distribute/multi_process_runner.py#L108) at the bottom)\n", "\n", - "4. End-to-end covergence testing on real dataset\n", + "4. End-to-end convergence testing on real dataset\n", "\n", " a. Check training behaviors with TensorBoard\n", "\n", - " * use simple optimizers e.g. SGD and simple distribution strategies e.g.\n", + " * use simple optimizers e.g., SGD and simple distribution strategies e.g.\n", " `tf.distribute.OneDeviceStrategy` first\n", " * training metrics\n", " * evaluation metrics\n", @@ -151,17 +151,6 @@ "## Setup" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "sopP--i7-LaF" - }, - "outputs": [], - "source": [ - "!pip uninstall -y -q tensorflow" - ] - }, { "cell_type": "code", "execution_count": null, @@ -170,9 +159,8 @@ }, "outputs": [], "source": [ - "# Install tf-nightly as the DeterministicRandomTestTool is only available in\n", - "# Tensorflow 2.8\n", - "!pip install -q tf-nightly" + "# The `DeterministicRandomTestTool` is only available from Tensorflow 2.8:\n", + "!pip install -q \"tensorflow==2.9.*\"" ] }, { @@ -416,7 +404,7 @@ " decay_steps=params['decay_steps'],\n", " end_learning_rate=params['end_lr'],\n", " power=params['lr_power']) \n", - " self.optimizer = tf.keras.optimizers.SGD(learning_rate_fn)\n", + " self.optimizer = tf.keras.optimizers.legacy.SGD(learning_rate_fn)\n", " self.compiled_loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)\n", " self.logs = {\n", " 'lr': [],\n", @@ -486,7 +474,7 @@ "source": [ "Compare numerical equivalence for first few training steps.\n", "\n", - "You can also check the [Validating correctness & numerical equivalence notebook](./validate_correctness.ipynb) for additonal advice for numerical equivalence." + "You can also check the [Validating correctness & numerical equivalence notebook](./validate_correctness.ipynb) for additional advice for numerical equivalence." ] }, { @@ -586,7 +574,7 @@ " # adopt different tolerance strategies before and after 10 steps\n", " first_n_step = 10\n", "\n", - " # abosolute difference is limited below 1e-5\n", + " # absolute difference is limited below 1e-5\n", " # set `equal_nan` to be False to detect potential NaN loss issues\n", " abosolute_tolerance = 1e-5\n", " np.testing.assert_allclose(\n", @@ -622,7 +610,7 @@ "\n", "tf.print vs print/logging.info\n", "\n", - "- With configurable arguments, `tf.print` can recursively display show first and last few elements of each dimension for printed tensors. Check the [API docs](https://www.tensorflow.org/api_docs/python/tf/print) for details.\n", + "- With configurable arguments, `tf.print` can recursively display the first and last few elements of each dimension for printed tensors. Check the [API docs](https://www.tensorflow.org/api_docs/python/tf/print) for details.\n", "- For eager execution, both `print` and `tf.print` print the value of the tensor. But `print` may involve device-to-host copy, which can potentially slow down your code. \n", "- For graph mode including usage inside `tf.function`, you need to use `tf.print` to print the actual tensor value. `tf.print` is compiled into an op in the graph, whereas `print` and `logging.info` only log at tracing time, which is often not what you want. \n", "- `tf.print` also supports printing composite tensors like `tf.RaggedTensor` and `tf.sparse.SparseTensor`.\n", diff --git a/site/en/guide/migrate/model_mapping.ipynb b/site/en/guide/migrate/model_mapping.ipynb index edb411a7270..2d4582839c0 100644 --- a/site/en/guide/migrate/model_mapping.ipynb +++ b/site/en/guide/migrate/model_mapping.ipynb @@ -1048,6 +1048,7 @@ "source": [ "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", + " tf.keras.utils.set_random_seed(42)\n", " layer = CompatModel(10)\n", "\n", " inputs = tf.random.normal(shape=(10, 5, 5, 5))\n", @@ -1069,6 +1070,7 @@ "source": [ "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", + " tf.keras.utils.set_random_seed(42)\n", " layer = PartiallyMigratedModel(10)\n", "\n", " inputs = tf.random.normal(shape=(10, 5, 5, 5))\n", @@ -1142,6 +1144,7 @@ "source": [ "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", + " tf.keras.utils.set_random_seed(42)\n", " layer = NearlyFullyNativeModel(10)\n", "\n", " inputs = tf.random.normal(shape=(10, 5, 5, 5))\n", @@ -1215,6 +1218,7 @@ "source": [ "random_tool = v1.keras.utils.DeterministicRandomTestTool(mode='num_random_ops')\n", "with random_tool.scope():\n", + " tf.keras.utils.set_random_seed(42)\n", " layer = FullyNativeModel(10)\n", "\n", " inputs = tf.random.normal(shape=(10, 5, 5, 5))\n", @@ -1249,7 +1253,7 @@ "\n", "The above migration process to native TF2 APIs changed both the variable names (as Keras APIs produce very different weight names), and the object-oriented paths that point to different weights in the model. The impact of these changes is that they will have broken both any existing TF1-style name-based checkpoints or TF2-style object-oriented checkpoints.\n", "\n", - "However, in some cases, you might be able to take your original name-based checkpoint and find a mapping of the variables to their new names with approaches like the one detailed in the [Reusing TF1.x checkpoints guide](./reusing_checkpoints.ipynb).\n", + "However, in some cases, you might be able to take your original name-based checkpoint and find a mapping of the variables to their new names with approaches like the one detailed in the [Reusing TF1.x checkpoints guide](./migrating_checkpoints.ipynb).\n", "\n", "Some tips to making this feasible are as follows:\n", "- Variables still all have a `name` argument you can set.\n", diff --git a/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb b/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb index e4c30e2fa5c..8a95cb903d6 100644 --- a/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb +++ b/site/en/guide/migrate/multi_worker_cpu_gpu_training.ipynb @@ -369,7 +369,7 @@ "\n", "with strategy.scope():\n", " model = tf.keras.models.Sequential([tf.keras.layers.Dense(1)])\n", - " optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n", + " optimizer = tf.keras.optimizers.legacy.Adagrad(learning_rate=0.05)\n", " model.compile(optimizer, \"mse\")\n", "\n", "model.fit(dataset, epochs=5, steps_per_epoch=10)" diff --git a/site/en/guide/migrate/saved_model.ipynb b/site/en/guide/migrate/saved_model.ipynb index f875b3f9a6c..e7e8ce8daa1 100644 --- a/site/en/guide/migrate/saved_model.ipynb +++ b/site/en/guide/migrate/saved_model.ipynb @@ -352,7 +352,11 @@ "source": [ "### Save and export a SavedModel defined with Keras\n", "\n", - "The Keras APIs for saving and exporting—`Mode.save` or `tf.keras.models.save_model`—can export a SavedModel from a `tf.keras.Model`. Check out the [Save and load Keras models](../..guide/keras/save_and_serialize) for more details." + "\n", + "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code.\n", + "\n", + "\n", + "The Keras APIs for saving and exporting—`Model.save` or `tf.keras.models.save_model`—can export a SavedModel from a `tf.keras.Model`. Check out the [Save and load Keras models](../..guide/keras/save_and_serialize) for more details." ] }, { @@ -506,6 +510,9 @@ "source": [ "### TensorFlow 2: Load a model saved with Keras\n", "\n", + "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code.\n", + "\n", + "\n", "The Keras loading API—`tf.keras.models.load_model`—allows you to reload a saved model back into a Keras Model object. Note that this only allows you to load SavedModels saved with Keras (`Model.save` or `tf.keras.models.save_model`).\n", "\n", "Models saved with `tf.saved_model.save` should be loaded with `tf.saved_model.load`. You can load a Keras model saved with `Model.save` using `tf.saved_model.load` but you will only get the TensorFlow graph. Refer to the `tf.keras.models.load_model` API docs and [Save and load Keras models](https://www.tensorflow.org/guide/keras/save_and_serialize#savedmodel_format) guide for details." @@ -748,7 +755,6 @@ "colab": { "collapsed_sections": [], "name": "saved_model.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/tensorboard.ipynb b/site/en/guide/migrate/tensorboard.ipynb index c8ec222b621..ea0cd72b47e 100644 --- a/site/en/guide/migrate/tensorboard.ipynb +++ b/site/en/guide/migrate/tensorboard.ipynb @@ -218,10 +218,10 @@ "\n", "def create_model():\n", " return tf.keras.models.Sequential([\n", - " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", - " tf.keras.layers.Dense(512, activation='relu'),\n", - " tf.keras.layers.Dropout(0.2),\n", - " tf.keras.layers.Dense(10, activation='softmax')\n", + " tf.keras.layers.Flatten(input_shape=(28, 28), name='layers_flatten'),\n", + " tf.keras.layers.Dense(512, activation='relu', name='layers_dense'),\n", + " tf.keras.layers.Dropout(0.2, name='layers_dropout'),\n", + " tf.keras.layers.Dense(10, activation='softmax', name='layers_dense_2')\n", " ])\n", "\n", "model = create_model()\n", @@ -279,7 +279,6 @@ "colab": { "collapsed_sections": [], "name": "tensorboard.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/tf1_vs_tf2.ipynb b/site/en/guide/migrate/tf1_vs_tf2.ipynb index 80bb11bd530..60791f72680 100644 --- a/site/en/guide/migrate/tf1_vs_tf2.ipynb +++ b/site/en/guide/migrate/tf1_vs_tf2.ipynb @@ -131,7 +131,7 @@ "\n", "### Other API changes\n", "\n", - "* TF2 features significant improvements to the device placement algorithms which renders the usage of `tf.colocate_with` unnecessary. If removing it causes a performance degrade [please file a bug](https://github.com/tensorflow/tensorflow/issues).\n", + "* TF2 features significant improvements to the device placement algorithms which renders the usage of `tf.colocate_with` unnecessary. If removing it causes a performance degradation, [please file a bug](https://github.com/tensorflow/tensorflow/issues).\n", "\n", "* Replace all usage of `tf.v1.ConfigProto` with equivalent functions from `tf.config`." ] @@ -159,7 +159,7 @@ "source": [ "## No more globals\n", "\n", - "TF1.x relied heavily on implicit global namespaces and collections. When you called `tf.Variable`, it would be put into a collection in the default graph, and it would remain there, even if you lost track of the Python variable pointing to it. You could then recover that `tf.Variable`, but only if you knew the name that it had been created with. This was difficult to do if you were not in control of the variable's creation. As a result, all sorts of mechanisms proliferated to\n", + "TF1.x relied heavily on implicit global namespaces and collections. When you call `tf.Variable`, it would be put into a collection in the default graph, and it would remain there, even if you lost track of the Python variable pointing to it. You could then recover that `tf.Variable`, but only if you knew the name that it had been created with. This was difficult to do if you were not in control of the variable's creation. As a result, all sorts of mechanisms proliferated to\n", "attempt to help you find your variables again, and for frameworks to find\n", "user-created variables. Some of these include: variable scopes, global collections, helper methods like `tf.get_global_step` and `tf.global_variables_initializer`, optimizers implicitly\n", "computing gradients over all trainable variables, and so on. TF2 eliminates all of these mechanisms ([Variables 2.0 RFC](https://github.com/tensorflow/community/pull/11)) in favor of the default mechanism - you keep track of your variables. If you lose track of a `tf.Variable`, it gets garbage collected.\n", @@ -340,7 +340,7 @@ " print(e) # is out of scope and cannot be used here.\n", "```\n", "\n", - "The most straightfoward solution is ensuring that the variable creation and dataset creation are both outside of the `tf.funciton` call. For example:\n", + "The most straightforward solution is ensuring that the variable creation and dataset creation are both outside of the `tf.function` call. For example:\n", "\n", "```python\n", "class Model(tf.Module):\n", @@ -681,7 +681,7 @@ "source": [ "### `ResourceVariables` instead of `ReferenceVariables`\n", "\n", - "`ResourceVariables` have stronger read-write consistency guarantees than `ReferenceVariables`. This leads to more predictable, easier-to-reason about semantics about whether or not you will observe the result of a previous write when using your variables. This change is extremely unlikely to cause existing code to raise errors or to break silently.\n", + "`ResourceVariables` have stronger read-write consistency guarantees than `ReferenceVariables`. This leads to more predictable, easier-to-reason semantics about whether or not you will observe the result of a previous write when using your variables. This change is extremely unlikely to cause existing code to raise errors or to break silently.\n", "\n", "However, it is ***possible though unlikely*** that these stronger consistency guarantees may increase the memory usage of your specific program. Please file an [issue](https://github.com/tensorflow/tensorflow/issues) if you find this to be the case. Additionally, if you have unit tests relying on exact string comparisons against the operator names in a graph corresponding to variable reads, be aware that enabling resource variables may slightly change the name of these operators.\n", "\n", @@ -1027,28 +1027,12 @@ }, "source": [ "### Hashing tensors and variables\n", - "With TF1.x behaviors you used to be able to directly add variables and tensors to data structures that require hashing, such as `set` and `dict` keys." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "fxi7EgKdBpd5" - }, - "outputs": [], - "source": [ - "tf.compat.v1.disable_tensor_equality()\n", + "With TF1.x behaviors you used to be able to directly add variables and tensors to data structures that require hashing, such as `set` and `dict` keys.\n", + "```python\n", "x = tf.Variable(0.0)\n", - "set([x, tf.constant(2.0)])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "LFgjR-PSajVQ" - }, - "source": [ + "set([x, tf.constant(2.0)])\n", + "```\n", + "\n", "However, in TF2 with tensor equality enabled, tensors and variables are made unhashable due to the `==` and `!=` operator semantics changing to value equality checks." ] }, @@ -1135,7 +1119,6 @@ "colab": { "collapsed_sections": [], "name": "tf1_vs_tf2.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/tflite.ipynb b/site/en/guide/migrate/tflite.ipynb index fd3695e5434..0426655ee1a 100644 --- a/site/en/guide/migrate/tflite.ipynb +++ b/site/en/guide/migrate/tflite.ipynb @@ -67,14 +67,14 @@ "id": "meUTrR4I6m1C" }, "source": [ - "[TensorFlow Lite](https://www.tensorflow.org/lite/guide) (TFLite) is a set of tools that helps developers run ML inference on-device (mobile, embedded, and IoT devices). The [TFLite converter](https://www.tensorflow.org/lite/convert) is one such tool that converts exisiting TF models into an optimized TFLite model format that can be efficiently run on-device.\n", + "[TensorFlow Lite](https://www.tensorflow.org/lite/guide) (TFLite) is a set of tools that helps developers run ML inference on-device (mobile, embedded, and IoT devices). The [TFLite converter](https://www.tensorflow.org/lite/convert) is one such tool that converts existing TF models into an optimized TFLite model format that can be efficiently run on-device.\n", "\n", "In this doc, you'll learn what changes you need to make to your TF to TFLite conversion code, followed by a few examples that do the same.\n", "\n", "\n", "## Changes to your TF to TFLite conversion code\n", "\n", - "* If you're using a legacy TF1 model format (Keras file, frozen GraphDef, checkpoints, tf.Session, etc), update it to TF1/TF2 SavedModel and use the TF2 converter API `tf.lite.TFLiteConverter.from_saved_model(...)` to convert it to a TFLite model (refer to Table 1).\n", + "* If you're using a legacy TF1 model format (such as Keras file, frozen GraphDef, checkpoints, tf.Session), update it to TF1/TF2 SavedModel and use the TF2 converter API `tf.lite.TFLiteConverter.from_saved_model(...)` to convert it to a TFLite model (refer to Table 1).\n", "\n", "* Update the converter API flags (refer to Table 2).\n", "* Remove legacy APIs such as `tf.lite.constants`. (eg: Replace `tf.lite.constants.INT8` with `tf.int8`)\n", @@ -125,7 +125,7 @@ "source": [ "## Examples\n", "\n", - "You'll now walkthrough some examples to convert legacy TF1 models to TF1/TF2 SavedModels and then convert it to TF2 TFLite models.\n", + "You'll now walk through some examples to convert legacy TF1 models to TF1/TF2 SavedModels and then convert them to TF2 TFLite models.\n", "\n", "### Setup\n", "\n", @@ -400,7 +400,7 @@ "with tf.Graph().as_default() as g:\n", " tf.graph_util.import_graph_def(gdef, name=\"\")\n", "\n", - "# Lookup the input and output tensors.\n", + "# Look up the input and output tensors.\n", "input_tensor = g.get_tensor_by_name('input:0') \n", "output_tensor = g.get_tensor_by_name('MobilenetV1/Predictions/Softmax:0')\n", "\n", @@ -436,7 +436,6 @@ "colab": { "collapsed_sections": [], "name": "tflite.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/migrate/tpu_embedding.ipynb b/site/en/guide/migrate/tpu_embedding.ipynb index 0a5b8882ef2..44105ea984b 100644 --- a/site/en/guide/migrate/tpu_embedding.ipynb +++ b/site/en/guide/migrate/tpu_embedding.ipynb @@ -476,7 +476,10 @@ "source": [ "strategy = tf.distribute.TPUStrategy(cluster_resolver)\n", "with strategy.scope():\n", - " optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n", + " if hasattr(tf.keras.optimizers, \"legacy\"):\n", + " optimizer = tf.keras.optimizers.legacy.Adagrad(learning_rate=0.05)\n", + " else:\n", + " optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.05)\n", " dense_input = tf.keras.Input(shape=(2,), dtype=tf.float32, batch_size=global_batch_size)\n", " sparse_input = tf.keras.Input(shape=(), dtype=tf.int32, batch_size=global_batch_size)\n", " embedded_input = tfrs.layers.embedding.TPUEmbedding(\n", diff --git a/site/en/guide/migrate/upgrade.ipynb b/site/en/guide/migrate/upgrade.ipynb index b59f5da3f5c..7223a8c8c81 100644 --- a/site/en/guide/migrate/upgrade.ipynb +++ b/site/en/guide/migrate/upgrade.ipynb @@ -95,7 +95,7 @@ "source": [ "## Compatibility modules\n", "\n", - "Certain API symbols can not be upgraded simply by using a string replacement. Those that cannot be automatically upgraded will be mapped to their locations in the `compat.v1` module. This module replaces TF 1.x symbols like `tf.foo` with the equivalent `tf.compat.v1.foo` reference. If you are already using `compat.v1` APIs by importing TF via `import tensorflow.compat.v1 as tf`, the `tf_upgrade_v2` script will attempt to convert these usages to the non-compat APIs where possible. Note that while some `compat.v1` APIs are compatible with TF2.x behaviors, many are not. So, we recommend that you manually proofread replacements and migrate them to new APIs in the `tf.*` namespace instead of `tf.compat.v1` namespace as quickly as possible.\n", + "Certain API symbols can not be upgraded simply by using a string replacement. Those that cannot be automatically upgraded will be mapped to their locations in the `compat.v1` module. This module replaces TF 1.x symbols like `tf.foo` with the equivalent `tf.compat.v1.foo` reference. If you are already using `compat.v1` APIs by importing TF via `import tensorflow.compat.v1 as tf`, the `tf_upgrade_v2` script will attempt to convert these usages to the non-compat APIs where possible. Note that while some `compat.v1` APIs are compatible with TF2.x behaviors, many are not. Therefore, it's recommended to manually proofread replacements and migrate them to new APIs in the `tf.*` namespace instead of `tf.compat.v1` namespace as quickly as possible.\n", "\n", "Because of TensorFlow 2.x module deprecations (for example, `tf.flags` and `tf.contrib`), some changes can not be worked around by switching to `compat.v1`. Upgrading this code may require using an additional library (for example, [`absl.flags`](https://github.com/abseil/abseil-py)) or switching to a package in [tensorflow/addons](http://www.github.com/tensorflow/addons).\n" ] @@ -108,7 +108,7 @@ "source": [ "## Recommended upgrade process\n", "\n", - "The rest of this guide demonstrates how to use the symbol-rewriting script. While the script is easy to use, it is strongly recomended that you use the script as part of the following process: \n", + "The rest of this guide demonstrates how to use the symbol-rewriting script. While the script is easy to use, it is strongly recommended that you use the script as part of the following process: \n", "\n", "1. **Unit Test**: Ensure that the code you’re upgrading has a unit test suite with reasonable coverage. This is Python code, so the language won’t protect you from many classes of mistakes. Also ensure that any dependency you have has already been upgraded to be compatible with TensorFlow 2.x.\n", "\n", @@ -574,7 +574,7 @@ "source": [ "## Caveats\n", "\n", - "- Do not update parts of your code manually before running this script. In particular, functions that have had reordered arguments like `tf.argmax` or `tf.batch_to_space` cause the script to incorrectly add keyword arguments that mismap your existing code.\n", + "- Do not update parts of your code manually before running this script. In particular, functions that have had reordered arguments like `tf.math.argmax` or `tf.batch_to_space` cause the script to incorrectly add keyword arguments that mismap your existing code.\n", "\n", "- The script assumes that `tensorflow` is imported using `import tensorflow as tf`, or `import tensorflow.compat.v1 as tf`.\n", "\n", diff --git a/site/en/guide/migrate/validate_correctness.ipynb b/site/en/guide/migrate/validate_correctness.ipynb index 458d03ad48f..a0555cdd55c 100644 --- a/site/en/guide/migrate/validate_correctness.ipynb +++ b/site/en/guide/migrate/validate_correctness.ipynb @@ -1254,7 +1254,7 @@ "source": [ "## Step 3b or 4b (optional): Testing with pre-existing checkpoints\n", "\n", - "After step 3 or step 4 above, it can be useful to run your numerical equivalence tests when starting from pre-existing name-based checkpoints if you have some. This can test both that your legacy checkpoint loading is working correctly and that the model itself is working right. The [Reusing TF1.x checkpoints guide](./reuse_checkpoints.ipynb) covers how to reuse your pre-existing TF1.x checkpoints and transfer them over to TF2 checkpoints.\n" + "After step 3 or step 4 above, it can be useful to run your numerical equivalence tests when starting from pre-existing name-based checkpoints if you have some. This can test both that your legacy checkpoint loading is working correctly and that the model itself is working right. The [Reusing TF1.x checkpoints guide](./migrating_checkpoints.ipynb) covers how to reuse your pre-existing TF1.x checkpoints and transfer them over to TF2 checkpoints.\n" ] }, { diff --git a/site/en/guide/mixed_precision.ipynb b/site/en/guide/mixed_precision.ipynb index 05d29122211..a19d6f254f3 100644 --- a/site/en/guide/mixed_precision.ipynb +++ b/site/en/guide/mixed_precision.ipynb @@ -70,7 +70,7 @@ "source": [ "## Overview\n", "\n", - "Mixed precision is the use of both 16-bit and 32-bit floating-point types in a model during training to make it run faster and use less memory. By keeping certain parts of the model in the 32-bit types for numeric stability, the model will have a lower step time and train equally as well in terms of the evaluation metrics such as accuracy. This guide describes how to use the Keras mixed precision API to speed up your models. Using this API can improve performance by more than 3 times on modern GPUs and 60% on TPUs." + "Mixed precision is the use of both 16-bit and 32-bit floating-point types in a model during training to make it run faster and use less memory. By keeping certain parts of the model in the 32-bit types for numeric stability, the model will have a lower step time and train equally as well in terms of the evaluation metrics such as accuracy. This guide describes how to use the Keras mixed precision API to speed up your models. Using this API can improve performance by more than 3 times on modern GPUs, 60% on TPUs and more than 2 times on latest Intel CPUs." ] }, { @@ -81,7 +81,7 @@ "source": [ "Today, most models use the float32 dtype, which takes 32 bits of memory. However, there are two lower-precision dtypes, float16 and bfloat16, each which take 16 bits of memory instead. Modern accelerators can run operations faster in the 16-bit dtypes, as they have specialized hardware to run 16-bit computations and 16-bit dtypes can be read from memory faster.\n", "\n", - "NVIDIA GPUs can run operations in float16 faster than in float32, and TPUs can run operations in bfloat16 faster than float32. Therefore, these lower-precision dtypes should be used whenever possible on those devices. However, variables and a few computations should still be in float32 for numeric reasons so that the model trains to the same quality. The Keras mixed precision API allows you to use a mix of either float16 or bfloat16 with float32, to get the performance benefits from float16/bfloat16 and the numeric stability benefits from float32.\n", + "NVIDIA GPUs can run operations in float16 faster than in float32, and TPUs and supporting Intel CPUs can run operations in bfloat16 faster than float32. Therefore, these lower-precision dtypes should be used whenever possible on those devices. However, variables and a few computations should still be in float32 for numeric reasons so that the model trains to the same quality. The Keras mixed precision API allows you to use a mix of either float16 or bfloat16 with float32, to get the performance benefits from float16/bfloat16 and the numeric stability benefits from float32.\n", "\n", "Note: In this guide, the term \"numeric stability\" refers to how a model's quality is affected by the use of a lower-precision dtype instead of a higher precision dtype. An operation is \"numerically unstable\" in float16 or bfloat16 if running it in one of those dtypes causes the model to have worse evaluation accuracy or other metrics compared to running the operation in float32." ] @@ -118,9 +118,11 @@ "source": [ "## Supported hardware\n", "\n", - "While mixed precision will run on most hardware, it will only speed up models on recent NVIDIA GPUs and Cloud TPUs. NVIDIA GPUs support using a mix of float16 and float32, while TPUs support a mix of bfloat16 and float32.\n", + "While mixed precision will run on most hardware, it will only speed up models on recent NVIDIA GPUs, Cloud TPUs and recent Intel CPUs. NVIDIA GPUs support using a mix of float16 and float32, while TPUs and Intel CPUs support a mix of bfloat16 and float32.\n", "\n", - "Among NVIDIA GPUs, those with compute capability 7.0 or higher will see the greatest performance benefit from mixed precision because they have special hardware units, called Tensor Cores, to accelerate float16 matrix multiplications and convolutions. Older GPUs offer no math performance benefit for using mixed precision, however memory and bandwidth savings can enable some speedups. You can look up the compute capability for your GPU at NVIDIA's [CUDA GPU web page](https://developer.nvidia.com/cuda-gpus). Examples of GPUs that will benefit most from mixed precision include RTX GPUs, the V100, and the A100." + "Among NVIDIA GPUs, those with compute capability 7.0 or higher will see the greatest performance benefit from mixed precision because they have special hardware units, called Tensor Cores, to accelerate float16 matrix multiplications and convolutions. Older GPUs offer no math performance benefit for using mixed precision, however memory and bandwidth savings can enable some speedups. You can look up the compute capability for your GPU at NVIDIA's [CUDA GPU web page](https://developer.nvidia.com/cuda-gpus). Examples of GPUs that will benefit most from mixed precision include RTX GPUs, the V100, and the A100.\n", + "\n", + "Among Intel CPUs, starting with the 4th Gen Intel Xeon Processors (code name Sapphire Rapids), will see the greatest performance benefit from mixed precision as they can accelerate bfloat16 computations using AMX instructions (requires Tensorflow 2.12 or later)." ] }, { @@ -129,7 +131,7 @@ "id": "-q2hisD60F0_" }, "source": [ - "Note: If running this guide in Google Colab, the GPU runtime typically has a P100 connected. The P100 has compute capability 6.0 and is not expected to show a significant speedup.\n", + "Note: If running this guide in Google Colab, the GPU runtime typically has a P100 connected. The P100 has compute capability 6.0 and is not expected to show a significant speedup. If running on CPU runtime, there may be a slow down as the runtime likely has a CPU without AMX.\n", "\n", "You can check your GPU type with the following. The command only exists if the\n", "NVIDIA drivers are installed, so the following will raise an error otherwise." @@ -154,7 +156,7 @@ "source": [ "All Cloud TPUs support bfloat16.\n", "\n", - "Even on CPUs and older GPUs, where no speedup is expected, mixed precision APIs can still be used for unit testing, debugging, or just to try out the API. On CPUs, mixed precision will run significantly slower, however." + "Even on older Intel CPUs, other x86 CPUs without AMX, and older GPUs, where no speedup is expected, mixed precision APIs can still be used for unit testing, debugging, or just to try out the API. However, mixed_bfloat16 on CPUs without AMX instructions and mixed_float16 on all x86 CPUs will run significantly slower." ] }, { @@ -235,7 +237,7 @@ "id": "MOFEcna28o4T" }, "source": [ - "As mentioned before, the `mixed_float16` policy will most significantly improve performance on NVIDIA GPUs with compute capability of at least 7.0. The policy will run on other GPUs and CPUs but may not improve performance. For TPUs, the `mixed_bfloat16` policy should be used instead." + "As mentioned before, the `mixed_float16` policy will most significantly improve performance on NVIDIA GPUs with compute capability of at least 7.0. The policy will run on other GPUs and CPUs but may not improve performance. For TPUs and CPUs, the `mixed_bfloat16` policy should be used instead." ] }, { @@ -411,7 +413,7 @@ "id": "0Sm8FJHegVRN" }, "source": [ - "This example cast the input data from int8 to float32. You don't cast to float16 since the division by 255 is on the CPU, which runs float16 operations slower than float32 operations. In this case, the performance difference in negligible, but in general you should run input processing math in float32 if it runs on the CPU. The first layer of the model will cast the inputs to float16, as each layer casts floating-point inputs to its compute dtype.\n", + "This example casts the input data from int8 to float32. You don't cast to float16 since the division by 255 is on the CPU, which runs float16 operations slower than float32 operations. In this case, the performance difference is negligible, but in general you should run input processing math in float32 if it runs on the CPU. The first layer of the model will cast the inputs to float16, as each layer casts floating-point inputs to its compute dtype.\n", "\n", "The initial weights of the model are retrieved. This will allow training from scratch again by loading the weights." ] @@ -465,7 +467,7 @@ " \n", "If you are running this guide in Colab, you can compare the performance of mixed precision with float32. To do so, change the policy from `mixed_float16` to `float32` in the \"Setting the dtype policy\" section, then rerun all the cells up to this point. On GPUs with compute capability 7.X, you should see the time per step significantly increase, indicating mixed precision sped up the model. Make sure to change the policy back to `mixed_float16` and rerun the cells before continuing with the guide.\n", "\n", - "On GPUs with compute capability of at least 8.0 (Ampere GPUs and above), you likely will see no performance improvement in the toy model in this guide when using mixed precision compared to float32. This is due to the use of [TensorFloat-32](https://www.tensorflow.org/api_docs/python/tf/config/experimental/enable_tensor_float_32_execution), which automatically uses lower precision math in certain float32 ops such as `tf.linalg.matmul`. TensorFloat-32 gives some of the performance advantages of mixed precision when using float32. However, in real-world models, you will still typically see significantly performance improvements from mixed precision due to memory bandwidth savings and ops which TensorFloat-32 does not support.\n", + "On GPUs with compute capability of at least 8.0 (Ampere GPUs and above), you likely will see no performance improvement in the toy model in this guide when using mixed precision compared to float32. This is due to the use of [TensorFloat-32](https://www.tensorflow.org/api_docs/python/tf/config/experimental/enable_tensor_float_32_execution), which automatically uses lower precision math in certain float32 ops such as `tf.linalg.matmul`. TensorFloat-32 gives some of the performance advantages of mixed precision when using float32. However, in real-world models, you will still typically experience significant performance improvements from mixed precision due to memory bandwidth savings and ops which TensorFloat-32 does not support.\n", "\n", "If running mixed precision on a TPU, you will not see as much of a performance gain compared to running mixed precision on GPUs, especially pre-Ampere GPUs. This is because TPUs do certain ops in bfloat16 under the hood even with the default dtype policy of float32. This is similar to how Ampere GPUs use TensorFloat-32 by default. Compared to Ampere GPUs, TPUs typically see less performance gains with mixed precision on real-world models.\n", "\n", @@ -480,7 +482,9 @@ "source": [ "## Loss scaling\n", "\n", - "Loss scaling is a technique which `tf.keras.Model.fit` automatically performs with the `mixed_float16` policy to avoid numeric underflow. This section describes what loss scaling is and the next section describes how to use it with a custom training loop." + "Loss scaling is a technique which `tf.keras.Model.fit` automatically performs with the `mixed_float16` policy to avoid numeric underflow. This section describes what loss scaling is and the next section describes how to use it with a custom training loop.\n", + "\n", + "Note: When using `mixed_bfloat16` policy, there is no need to do loss scaling." ] }, { @@ -612,7 +616,7 @@ "id": "FVy5gnBqTE9z" }, "source": [ - "If you want, it is possible choose an explicit loss scale or otherwise customize the loss scaling behavior, but it is highly recommended to keep the default loss scaling behavior, as it has been found to work well on all known models. See the `tf.keras.mixed_precision.LossScaleOptimizer` documention if you want to customize the loss scaling behavior." + "If you want, it is possible choose an explicit loss scale or otherwise customize the loss scaling behavior, but it is highly recommended to keep the default loss scaling behavior, as it has been found to work well on all known models. See the `tf.keras.mixed_precision.LossScaleOptimizer` documentation if you want to customize the loss scaling behavior." ] }, { @@ -806,16 +810,17 @@ "source": [ "## Summary\n", "\n", - "- You should use mixed precision if you use TPUs or NVIDIA GPUs with at least compute capability 7.0, as it will improve performance by up to 3x.\n", + "- You should use mixed precision if you use TPUs, NVIDIA GPUs with at least compute capability 7.0, or Intel CPUs with support for AMX instructions, as it will improve performance by up to 3x.\n", "- You can use mixed precision with the following lines:\n", "\n", " ```python\n", - " # On TPUs, use 'mixed_bfloat16' instead\n", + " # On TPUs and CPUs, use 'mixed_bfloat16' instead\n", " mixed_precision.set_global_policy('mixed_float16')\n", " ```\n", "\n", "* If your model ends in softmax, make sure it is float32. And regardless of what your model ends in, make sure the output is float32.\n", "* If you use a custom training loop with `mixed_float16`, in addition to the above lines, you need to wrap your optimizer with a `tf.keras.mixed_precision.LossScaleOptimizer`. Then call `optimizer.get_scaled_loss` to scale the loss, and `optimizer.get_unscaled_gradients` to unscale the gradients.\n", + "* If you use a custom training loop with `mixed_bfloat16`, setting the global_policy mentioned above is sufficient.\n", "* Double the training batch size if it does not reduce evaluation accuracy\n", "* On GPUs, ensure most tensor dimensions are a multiple of $8$ to maximize performance\n", "\n", diff --git a/site/en/guide/profiler.md b/site/en/guide/profiler.md index 1cd19c109fe..dee8a5a84af 100644 --- a/site/en/guide/profiler.md +++ b/site/en/guide/profiler.md @@ -55,7 +55,7 @@ found. When you run profiling with CUDA® Toolkit in a Docker environment or on Linux, you may encounter issues related to insufficient CUPTI privileges (`CUPTI_ERROR_INSUFFICIENT_PRIVILEGES`). Go to the -[NVIDIA Developer Docs](https://developer.nvidia.com/nvidia-development-tools-solutions-ERR_NVGPUCTRPERM-permission-issue-performance-counters){:.external} +[NVIDIA Developer Docs](https://developer.nvidia.com/nvidia-development-tools-solutions-ERR_NVGPUCTRPERM-permission-issue-performance-counters) to learn more about how you can resolve these issues on Linux. To resolve CUPTI privilege issues in a Docker environment, run @@ -694,7 +694,7 @@ first few batches to avoid inaccuracies due to initialization overhead. An example for profiling multiple workers: ```python - # E.g. your worker IP addresses are 10.0.0.2, 10.0.0.3, 10.0.0.4, and you + # E.g., your worker IP addresses are 10.0.0.2, 10.0.0.3, 10.0.0.4, and you # would like to profile for a duration of 2 seconds. tf.profiler.experimental.client.trace( 'grpc://10.0.0.2:8466,grpc://10.0.0.3:8466,grpc://10.0.0.4:8466', @@ -845,7 +845,7 @@ more efficient by casting to different data types after applying spatial transformations, such as flipping, cropping, rotating, etc. Note: Some ops like `tf.image.resize` transparently change the `dtype` to -`fp32`. Make sure you normalize your data to lie between `0` and `1` if its not +`fp32`. Make sure you normalize your data to lie between `0` and `1` if it's not done automatically. Skipping this step could lead to `NaN` errors if you have enabled [AMP](https://developer.nvidia.com/automatic-mixed-precision). diff --git a/site/en/guide/ragged_tensor.ipynb b/site/en/guide/ragged_tensor.ipynb index 4bc0d679499..ba0be2928ce 100644 --- a/site/en/guide/ragged_tensor.ipynb +++ b/site/en/guide/ragged_tensor.ipynb @@ -81,6 +81,7 @@ }, "outputs": [], "source": [ + "!pip install --pre -U tensorflow\n", "import math\n", "import tensorflow as tf" ] @@ -109,7 +110,7 @@ "source": [ "### What you can do with a ragged tensor\n", "\n", - "Ragged tensors are supported by more than a hundred TensorFlow operations, including math operations (such as `tf.add` and `tf.reduce_mean`), array operations (such as `tf.concat` and `tf.tile`), string manipulation ops (such as `tf.substr`), control flow operations (such as `tf.while_loop` and `tf.map_fn`), and many others:" + "Ragged tensors are supported by more than a hundred TensorFlow operations, including math operations (such as `tf.add` and `tf.reduce_mean`), array operations (such as `tf.concat` and `tf.tile`), string manipulation ops (such as `tf.strings.substr`), control flow operations (such as `tf.while_loop` and `tf.map_fn`), and many others:" ] }, { @@ -673,14 +674,14 @@ "source": [ "### Keras\n", "\n", - "[tf.keras](https://www.tensorflow.org/guide/keras) is TensorFlow's high-level API for building and training deep learning models. Ragged tensors may be passed as inputs to a Keras model by setting `ragged=True` on `tf.keras.Input` or `tf.keras.layers.InputLayer`. Ragged tensors may also be passed between Keras layers, and returned by Keras models. The following example shows a toy LSTM model that is trained using ragged tensors." + "[tf.keras](https://www.tensorflow.org/guide/keras) is TensorFlow's high-level API for building and training deep learning models. It doesn't have ragged support. But it does support masked tensors. So the easiest way to use a ragged tensor in a Keras model is to convert the ragged tensor to a dense tensor, using `.to_tensor()` and then using Keras's builtin masking:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "pHls7hQVJlk5" + "id": "ucYf2sSzTvQo" }, "outputs": [], "source": [ @@ -690,26 +691,77 @@ " 'She turned me into a newt.',\n", " 'A newt?',\n", " 'Well, I got better.'])\n", - "is_question = tf.constant([True, False, True, False])\n", - "\n", + "is_question = tf.constant([True, False, True, False])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MGYKmizJTw8B" + }, + "outputs": [], + "source": [ "# Preprocess the input strings.\n", "hash_buckets = 1000\n", "words = tf.strings.split(sentences, ' ')\n", "hashed_words = tf.strings.to_hash_bucket_fast(words, hash_buckets)\n", - "\n", + "hashed_words.to_list()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7FTujwOlUT8J" + }, + "outputs": [], + "source": [ + "hashed_words.to_tensor()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vzWudaESUBOZ" + }, + "outputs": [], + "source": [ + "tf.keras.Input?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pHls7hQVJlk5" + }, + "outputs": [], + "source": [ "# Build the Keras model.\n", "keras_model = tf.keras.Sequential([\n", - " tf.keras.layers.Input(shape=[None], dtype=tf.int64, ragged=True),\n", - " tf.keras.layers.Embedding(hash_buckets, 16),\n", - " tf.keras.layers.LSTM(32, use_bias=False),\n", + " tf.keras.layers.Embedding(hash_buckets, 16, mask_zero=True),\n", + " tf.keras.layers.LSTM(32, return_sequences=True, use_bias=False),\n", + " tf.keras.layers.GlobalAveragePooling1D(),\n", " tf.keras.layers.Dense(32),\n", " tf.keras.layers.Activation(tf.nn.relu),\n", " tf.keras.layers.Dense(1)\n", "])\n", "\n", "keras_model.compile(loss='binary_crossentropy', optimizer='rmsprop')\n", - "keras_model.fit(hashed_words, is_question, epochs=5)\n", - "print(keras_model.predict(hashed_words))" + "keras_model.fit(hashed_words.to_tensor(), is_question, epochs=5)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1IAjjmdTU9OU" + }, + "outputs": [], + "source": [ + "print(keras_model.predict(hashed_words.to_tensor()))" ] }, { @@ -798,7 +850,7 @@ "source": [ "### Datasets\n", "\n", - "[tf.data](https://www.tensorflow.org/guide/data) is an API that enables you to build complex input pipelines from simple, reusable pieces. Its core data structure is `tf.data.Dataset`, which represents a sequence of elements, in which each element consists of one or more components. " + "[tf.data](https://www.tensorflow.org/guide/data) is an API that enables you to build complex input pipelines from simple, reusable pieces. Its core data structure is `tf.data.Dataset`, which represents a sequence of elements, in which each element consists of one or more components." ] }, { @@ -1077,9 +1129,11 @@ "import tempfile\n", "\n", "keras_module_path = tempfile.mkdtemp()\n", - "tf.saved_model.save(keras_model, keras_module_path)\n", - "imported_model = tf.saved_model.load(keras_module_path)\n", - "imported_model(hashed_words)" + "keras_model.save(keras_module_path+\"/my_model.keras\")\n", + "\n", + "imported_model = tf.keras.models.load_model(keras_module_path+\"/my_model.keras\")\n", + "\n", + "imported_model(hashed_words.to_tensor())" ] }, { @@ -1439,7 +1493,7 @@ "\n", "1. Use `tf.RaggedTensor.to_list` to convert the ragged tensor to a nested Python list.\n", "2. Use `tf.RaggedTensor.numpy` to convert the ragged tensor to a NumPy array whose values are nested NumPy arrays.\n", - "3. Decompose the ragged tensor into its components, using the `tf.RaggedTensor.values` and `tf.RaggedTensor.row_splits` properties, or row-paritioning methods such as `tf.RaggedTensor.row_lengths` and `tf.RaggedTensor.value_rowids`.\n", + "3. Decompose the ragged tensor into its components, using the `tf.RaggedTensor.values` and `tf.RaggedTensor.row_splits` properties, or row-partitioning methods such as `tf.RaggedTensor.row_lengths` and `tf.RaggedTensor.value_rowids`.\n", "4. Use Python indexing to select values from the ragged tensor.\n" ] }, @@ -1459,13 +1513,267 @@ "print(\"Indexed value:\", rt[1].numpy())" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "J87jMZa0M_YW" + }, + "source": [ + "## Ragged Shapes\n", + "\n", + "The shape of a tensor specifies the size of each axis. For example, the shape of `[[1, 2], [3, 4], [5, 6]]` is `[3, 2]`, since there are 3 rows and 2 columns. TensorFlow has two separate but related ways to describe shapes:\n", + "\n", + "* ***static shape***: Information about axis sizes that is known statically (e.g., while tracing a `tf.function`). May be partially specified.\n", + "\n", + "* ***dynamic shape***: Runtime information about the axis sizes." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IOETE_OLPLZo" + }, + "source": [ + "### Static shape\n", + "\n", + "A Tensor's static shape contains information about its axis sizes that is known at graph-construction time. For both `tf.Tensor` and `tf.RaggedTensor`, it is available using the `.shape` property, and is encoded using `tf.TensorShape`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "btGDjT4uNgQy" + }, + "outputs": [], + "source": [ + "x = tf.constant([[1, 2], [3, 4], [5, 6]])\n", + "x.shape # shape of a tf.tensor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "__OgvmrGPEjq" + }, + "outputs": [], + "source": [ + "rt = tf.ragged.constant([[1], [2, 3], [], [4]])\n", + "rt.shape # shape of a tf.RaggedTensor" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9EWnQd3qPWaw" + }, + "source": [ + "The static shape of a ragged dimension is always `None` (i.e., unspecified). However, the inverse is not true -- if a `TensorShape` dimension is `None`, then that could indicate that the dimension is ragged, *or* it could indicate that the dimension is uniform but that its size is not statically known." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "75E9YXYMNfne" + }, + "source": [ + "### Dynamic shape\n", + "\n", + "A tensor's dynamic shape contains information about its axis sizes that is known when the graph is run. It is constructed using the `tf.shape` operation. For `tf.Tensor`, `tf.shape` returns the shape as a 1D integer `Tensor`, where `tf.shape(x)[i]` is the size of axis `i`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kWJ7Cn1EQTD_" + }, + "outputs": [], + "source": [ + "x = tf.constant([['a', 'b'], ['c', 'd'], ['e', 'f']])\n", + "tf.shape(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BeZEfxwmRcSv" + }, + "source": [ + "However, a 1D `Tensor` is not expressive enough to describe the shape of a `tf.RaggedTensor`. Instead, the dynamic shape for ragged tensors is encoded using a dedicated type, `tf.experimental.DynamicRaggedShape`. In the following example, the `DynamicRaggedShape` returned by `tf.shape(rt)` indicates that the ragged tensor has 4 rows, with lengths 1, 3, 0, and 2:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nZc2wqgQQUFU" + }, + "outputs": [], + "source": [ + "rt = tf.ragged.constant([[1], [2, 3, 4], [], [5, 6]])\n", + "rt_shape = tf.shape(rt)\n", + "print(rt_shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EphU60YvTf98" + }, + "source": [ + "#### Dynamic shape: operations\n", + "\n", + "`DynamicRaggedShape`s can be used with most TensorFlow ops that expect shapes, including `tf.reshape`, `tf.zeros`, `tf.ones`. `tf.fill`, `tf.broadcast_dynamic_shape`, and `tf.broadcast_to`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pclAODLXT6Gr" + }, + "outputs": [], + "source": [ + "print(f\"tf.reshape(x, rt_shape) = {tf.reshape(x, rt_shape)}\")\n", + "print(f\"tf.zeros(rt_shape) = {tf.zeros(rt_shape)}\")\n", + "print(f\"tf.ones(rt_shape) = {tf.ones(rt_shape)}\")\n", + "print(f\"tf.fill(rt_shape, 9) = {tf.fill(rt_shape, 'x')}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rNP_3_btRAHj" + }, + "source": [ + "#### Dynamic shape: indexing and slicing\n", + "\n", + "`DynamicRaggedShape` can be also be indexed to get the sizes of uniform dimensions. For example, we can find the number of rows in a raggedtensor using `tf.shape(rt)[0]` (just as we would for a non-ragged tensor):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MzQvPhsxS6HN" + }, + "outputs": [], + "source": [ + "rt_shape[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wvr2iT6zS_e8" + }, + "source": [ + "However, it is an error to use indexing to try to retrieve the size of a ragged dimension, since it doesn't have a single size. (Since `RaggedTensor` keeps track of which axes are ragged, this error is only thrown during eager execution or when tracing a `tf.function`; it will never be thrown when executing a concrete function.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HgGMk0LeTGik" + }, + "outputs": [], + "source": [ + "try:\n", + " rt_shape[1]\n", + "except ValueError as e:\n", + " print(\"Got expected ValueError:\", e)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5QUsdawGU0SM" + }, + "source": [ + "`DynamicRaggedShape`s can also be sliced, as long as the slice either begins with axis `0`, or contains only dense dimensions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "APT72EaBU70t" + }, + "outputs": [], + "source": [ + "rt_shape[:1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a-Wl9IrQXcdY" + }, + "source": [ + "#### Dynamic shape: encoding\n", + "\n", + "`DynamicRaggedShape` is encoded using two fields:\n", + "\n", + "* `inner_shape`: An integer vector giving the shape of a dense `tf.Tensor`.\n", + "* `row_partitions`: A list of `tf.experimental.RowPartition` objects, describing how the outermost dimension of that inner shape should be partitioned to add ragged axes.\n", + "\n", + "For more information about row partitions, see the \"RaggedTensor encoding\" section below, and the API docs for `tf.experimental.RowPartition`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jfeY9tTcV_zL" + }, + "source": [ + "#### Dynamic shape: construction\n", + "\n", + "`DynamicRaggedShape` is most often constructed by applying `tf.shape` to a `RaggedTensor`, but it can also be constructed directly:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NSRgD667WwIZ" + }, + "outputs": [], + "source": [ + "tf.experimental.DynamicRaggedShape(\n", + " row_partitions=[tf.experimental.RowPartition.from_row_lengths([5, 3, 2])],\n", + " inner_shape=[10, 8])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EjzVjs9MXIIA" + }, + "source": [ + "If the lengths of all rows are known statically, `DynamicRaggedShape.from_lengths` can also be used to construct a dynamic ragged shape. (This is mostly useful for testing and demonstration code, since it's rare for the lengths of ragged dimensions to be known statically).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gMxCzADUYIjY" + }, + "outputs": [], + "source": [ + "tf.experimental.DynamicRaggedShape.from_lengths([4, (2, 1, 0, 8), 12])" + ] + }, { "cell_type": "markdown", "metadata": { "id": "EdljbNPq-PWS" }, "source": [ - "## Broadcasting\n", + "### Broadcasting\n", "\n", "Broadcasting is the process of making tensors with different shapes have compatible shapes for elementwise operations. For more background on broadcasting, refer to:\n", "\n", @@ -1491,7 +1799,7 @@ "id": "-S2hOUWx-PWU" }, "source": [ - "### Broadcasting examples" + "#### Broadcasting examples" ] }, { @@ -1870,7 +2178,6 @@ ], "metadata": { "colab": { - "collapsed_sections": [], "name": "ragged_tensor.ipynb", "toc_visible": true }, diff --git a/site/en/guide/random_numbers.ipynb b/site/en/guide/random_numbers.ipynb index 37c83ae76a0..f8b824ad906 100644 --- a/site/en/guide/random_numbers.ipynb +++ b/site/en/guide/random_numbers.ipynb @@ -166,7 +166,7 @@ "source": [ "See the *Algorithms* section below for more information about it.\n", "\n", - "Another way to create a generator is with `Generator.from_non_deterministic_state`. A generator created this way will start from a non-deterministic state, depending on e.g. time and OS." + "Another way to create a generator is with `Generator.from_non_deterministic_state`. A generator created this way will start from a non-deterministic state, depending on e.g., time and OS." ] }, { @@ -268,7 +268,7 @@ "source": [ "Note: In theory, you can use constructors such as `from_seed` instead of `split` here to obtain a new generator, but by doing so you lose the guarantee that the new generator is independent of the global generator. You will also run the risk that you may accidentally create two generators with the same seed or with seeds that lead to overlapping random-number streams.\n", "\n", - "You can do splitting recursively, calling `split` on splitted generators. There are no limits (barring integer overflow) on the depth of recursions." + "You can do splitting recursively, calling `split` on split generators. There are no limits (barring integer overflow) on the depth of recursions." ] }, { @@ -325,7 +325,7 @@ "source": [ "#### Creating generators inside `tf.function` \n", "\n", - "Creation of generators inside a `tf.function` can only happend during the first run of the function. " + "Creation of generators inside a `tf.function` can only happened during the first run of the function. " ] }, { diff --git a/site/en/guide/saved_model.ipynb b/site/en/guide/saved_model.ipynb index 2e5351b49c3..2601e504669 100644 --- a/site/en/guide/saved_model.ipynb +++ b/site/en/guide/saved_model.ipynb @@ -75,7 +75,9 @@ " - Save: `tf.saved_model.save(model, path_to_dir)`\n", " - Load: `model = tf.saved_model.load(path_to_dir)`\n", "- High-level `tf.keras.Model` API. Refer to [the keras save and serialize guide](https://www.tensorflow.org/guide/keras/save_and_serialize).\n", - "- If you just want to save/load weights during training, refer to [the checkpoints guide](./checkpoint.ipynb).\n" + "- If you just want to save/load weights during training, refer to [the checkpoints guide](./checkpoint.ipynb).\n", + "\n", + "Caution: TensorFlow models are code and it is important to be careful with untrusted code. Learn more in [Using TensorFlow securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md).\n" ] }, { @@ -84,8 +86,24 @@ "id": "9SuIC7FiI9g8" }, "source": [ - "## Creating a SavedModel from Keras\n", - "\n", + "## Creating a SavedModel from Keras" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AtSmftAvhJvE" + }, + "source": [ + "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eLSOptpYhJvE" + }, + "source": [ "For a quick introduction, this section exports a pre-trained Keras model and serves image classification requests with it. The rest of the guide will fill in details and discuss other ways to create SavedModels." ] }, @@ -352,7 +370,9 @@ "source": [ "The `assets` directory contains files used by the TensorFlow graph, for example text files used to initialize vocabulary tables. It is unused in this example.\n", "\n", - "SavedModels may have an `assets.extra` directory for any files not used by the TensorFlow graph, for example information for consumers about what to do with the SavedModel. TensorFlow itself does not use this directory." + "SavedModels may have an `assets.extra` directory for any files not used by the TensorFlow graph, for example information for consumers about what to do with the SavedModel. TensorFlow itself does not use this directory.\n", + "\n", + "The `fingerprint.pb` file contains the [fingerprint](https://en.wikipedia.org/wiki/Fingerprint_(computing)) of the SavedModel, which is composed of several 64-bit hashes that uniquely identify the contents of the SavedModel. The fingerprinting API is currently experimental, but `tf.saved_model.experimental.read_fingerprint` can be used to read the SavedModel fingerprint into a `tf.saved_model.experimental.Fingerprint` object." ] }, { @@ -491,7 +511,7 @@ }, "outputs": [], "source": [ - "optimizer = tf.optimizers.SGD(0.05)\n", + "optimizer = tf.keras.optimizers.SGD(0.05)\n", "\n", "def train_step():\n", " with tf.GradientTape() as tape:\n", @@ -619,7 +639,7 @@ "outputs": [], "source": [ "imported_with_signatures = tf.saved_model.load(module_with_signature_path)\n", - "list(imported_with_signatures.signatures.keys())\n" + "list(imported_with_signatures.signatures.keys()) # [\"serving_default\"]" ] }, { @@ -654,8 +674,12 @@ }, "outputs": [], "source": [ - "imported_with_multiple_signatures = tf.saved_model.load(module_multiple_signatures_path)\n", - "list(imported_with_multiple_signatures.signatures.keys())" + "imported_with_multiple_signatures = tf.saved_model.load(\n", + " module_multiple_signatures_path\n", + ")\n", + "list(\n", + " imported_with_multiple_signatures.signatures.keys()\n", + ") # [\"serving_default\", \"array_input\"]" ] }, { @@ -680,7 +704,7 @@ " super(CustomModuleWithOutputName, self).__init__()\n", " self.v = tf.Variable(1.)\n", "\n", - " @tf.function(input_signature=[tf.TensorSpec([], tf.float32)])\n", + " @tf.function(input_signature=[tf.TensorSpec(None, tf.float32)])\n", " def __call__(self, x):\n", " return {'custom_output_name': x * self.v}\n", "\n", @@ -700,7 +724,41 @@ "outputs": [], "source": [ "imported_with_output_name = tf.saved_model.load(module_output_path)\n", - "imported_with_output_name.signatures['serving_default'].structured_outputs" + "imported_with_output_name.signatures[\n", + " 'serving_default'\n", + "].structured_outputs # {'custom_output_name': TensorSpec(shape=, dtype=tf.float32, name='custom_output_name')}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4bCK55x1IBW" + }, + "source": [ + "## Proto-splitting\n", + "\n", + "Note: This feature will be part of the TensorFlow 2.15 release. It is currently available in the nightly build which you cqan install with `pip install tf-nightly`.\n", + "\n", + "Due to limits of the protobuf implementation, proto sizes cannot exceed 2GB. This can lead to the following errors when attempting to save very large models:\n", + "\n", + "```\n", + "ValueError: Message tensorflow.SavedModel exceeds maximum protobuf size of 2GB: ...\n", + "```\n", + "\n", + "```\n", + "google.protobuf.message.DecodeError: Error parsing message as the message exceeded the protobuf limit with type 'tensorflow.GraphDef'\n", + "```\n", + "\n", + "If you wish to save models that exceed the 2GB limit, then you'll need to save using the new proto-splitting option:\n", + "\n", + "```python\n", + "tf.saved_model.save(\n", + " ...,\n", + " options=tf.saved_model.SaveOptions(experimental_image_format=True)\n", + ")\n", + "```\n", + "\n", + "More information can be found in the [Proto Splitter / Merger Library guide](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/proto_splitter/g3doc/in-depth-guide.md)." ] }, { @@ -758,7 +816,7 @@ "additional command to build `saved_model_cli`:\n", "\n", "```\n", - "$ bazel build tensorflow/python/tools:saved_model_cli\n", + "$ bazel build //tensorflow/python/tools:saved_model_cli\n", "```\n", "\n", "### Overview of commands\n", @@ -974,8 +1032,8 @@ ], "metadata": { "colab": { - "collapsed_sections": [], "name": "saved_model.ipynb", + "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/sparse_tensor.ipynb b/site/en/guide/sparse_tensor.ipynb index 2395c6e6365..3d4daca7fad 100644 --- a/site/en/guide/sparse_tensor.ipynb +++ b/site/en/guide/sparse_tensor.ipynb @@ -79,7 +79,7 @@ "source": [ "## Sparse tensors in TensorFlow\n", "\n", - "TensorFlow represents sparse tensors through the `tf.SparseTensor` object. Currently, sparse tensors in TensorFlow are encoded using the coordinate list (COO) format. This encoding format is optimized for hyper-sparse matrices such as embeddings.\n", + "TensorFlow represents sparse tensors through the `tf.sparse.SparseTensor` object. Currently, sparse tensors in TensorFlow are encoded using the coordinate list (COO) format. This encoding format is optimized for hyper-sparse matrices such as embeddings.\n", "\n", "The COO encoding for sparse tensors is comprised of:\n", "\n", @@ -87,9 +87,9 @@ " * `indices`: A 2D tensor with shape `[N, rank]`, containing the indices of the nonzero values.\n", " * `dense_shape`: A 1D tensor with shape `[rank]`, specifying the shape of the tensor.\n", "\n", - "A ***nonzero*** value in the context of a `tf.SparseTensor` is a value that's not explicitly encoded. It is possible to explicitly include zero values in the `values` of a COO sparse matrix, but these \"explicit zeros\" are generally not included when referring to nonzero values in a sparse tensor.\n", + "A ***nonzero*** value in the context of a `tf.sparse.SparseTensor` is a value that's not explicitly encoded. It is possible to explicitly include zero values in the `values` of a COO sparse matrix, but these \"explicit zeros\" are generally not included when referring to nonzero values in a sparse tensor.\n", "\n", - "Note: `tf.SparseTensor` does not require that indices/values be in any particular order, but several ops assume that they're in row-major order. Use `tf.sparse.reorder` to create a copy of the sparse tensor that is sorted in the canonical row-major order. " + "Note: `tf.sparse.SparseTensor` does not require that indices/values be in any particular order, but several ops assume that they're in row-major order. Use `tf.sparse.reorder` to create a copy of the sparse tensor that is sorted in the canonical row-major order. " ] }, { @@ -98,7 +98,7 @@ "id": "6Aq7ruwlyz79" }, "source": [ - "## Creating a `tf.SparseTensor`\n", + "## Creating a `tf.sparse.SparseTensor`\n", "\n", "Construct sparse tensors by directly specifying their `values`, `indices`, and `dense_shape`." ] @@ -122,7 +122,7 @@ }, "outputs": [], "source": [ - "st1 = tf.SparseTensor(indices=[[0, 3], [2, 4]],\n", + "st1 = tf.sparse.SparseTensor(indices=[[0, 3], [2, 4]],\n", " values=[10, 20],\n", " dense_shape=[3, 10])" ] @@ -252,11 +252,11 @@ }, "outputs": [], "source": [ - "st_a = tf.SparseTensor(indices=[[0, 2], [3, 4]],\n", + "st_a = tf.sparse.SparseTensor(indices=[[0, 2], [3, 4]],\n", " values=[31, 2], \n", " dense_shape=[4, 10])\n", "\n", - "st_b = tf.SparseTensor(indices=[[0, 2], [7, 0]],\n", + "st_b = tf.sparse.SparseTensor(indices=[[0, 2], [3, 0]],\n", " values=[56, 38],\n", " dense_shape=[4, 10])\n", "\n", @@ -282,7 +282,7 @@ }, "outputs": [], "source": [ - "st_c = tf.SparseTensor(indices=([0, 1], [1, 0], [1, 1]),\n", + "st_c = tf.sparse.SparseTensor(indices=([0, 1], [1, 0], [1, 1]),\n", " values=[13, 15, 17],\n", " dense_shape=(2,2))\n", "\n", @@ -309,14 +309,14 @@ }, "outputs": [], "source": [ - "sparse_pattern_A = tf.SparseTensor(indices = [[2,4], [3,3], [3,4], [4,3], [4,4], [5,4]],\n", + "sparse_pattern_A = tf.sparse.SparseTensor(indices = [[2,4], [3,3], [3,4], [4,3], [4,4], [5,4]],\n", " values = [1,1,1,1,1,1],\n", " dense_shape = [8,5])\n", - "sparse_pattern_B = tf.SparseTensor(indices = [[0,2], [1,1], [1,3], [2,0], [2,4], [2,5], [3,5], \n", + "sparse_pattern_B = tf.sparse.SparseTensor(indices = [[0,2], [1,1], [1,3], [2,0], [2,4], [2,5], [3,5], \n", " [4,5], [5,0], [5,4], [5,5], [6,1], [6,3], [7,2]],\n", " values = [1,1,1,1,1,1,1,1,1,1,1,1,1,1],\n", " dense_shape = [8,6])\n", - "sparse_pattern_C = tf.SparseTensor(indices = [[3,0], [4,0]],\n", + "sparse_pattern_C = tf.sparse.SparseTensor(indices = [[3,0], [4,0]],\n", " values = [1,1],\n", " dense_shape = [8,6])\n", "\n", @@ -381,7 +381,7 @@ }, "outputs": [], "source": [ - "st2_plus_5 = tf.SparseTensor(\n", + "st2_plus_5 = tf.sparse.SparseTensor(\n", " st2.indices,\n", " st2.values + 5,\n", " st2.dense_shape)\n", @@ -394,7 +394,7 @@ "id": "GFhO2ZZ53ga1" }, "source": [ - "## Using `tf.SparseTensor` with other TensorFlow APIs\n", + "## Using `tf.sparse.SparseTensor` with other TensorFlow APIs\n", "\n", "Sparse tensors work transparently with these TensorFlow APIs:\n", "\n", @@ -449,7 +449,7 @@ "y = tf.keras.layers.Dense(4)(x)\n", "model = tf.keras.Model(x, y)\n", "\n", - "sparse_data = tf.SparseTensor(\n", + "sparse_data = tf.sparse.SparseTensor(\n", " indices = [(0,0),(0,1),(0,2),\n", " (4,3),(5,0),(5,1)],\n", " values = [1,1,1,1,1,1],\n", @@ -569,9 +569,9 @@ "\n", "`tf.train.Example` is a standard protobuf encoding for TensorFlow data. When using sparse tensors with `tf.train.Example`, you can:\n", "\n", - "* Read variable-length data into a `tf.SparseTensor` using `tf.io.VarLenFeature`. However, you should consider using `tf.io.RaggedFeature` instead.\n", + "* Read variable-length data into a `tf.sparse.SparseTensor` using `tf.io.VarLenFeature`. However, you should consider using `tf.io.RaggedFeature` instead.\n", "\n", - "* Read arbitrary sparse data into a `tf.SparseTensor` using `tf.io.SparseFeature`, which uses three separate feature keys to store the `indices`, `values`, and `dense_shape`." + "* Read arbitrary sparse data into a `tf.sparse.SparseTensor` using `tf.io.SparseFeature`, which uses three separate feature keys to store the `indices`, `values`, and `dense_shape`." ] }, { @@ -597,7 +597,7 @@ "def f(x,y):\n", " return tf.sparse.sparse_dense_matmul(x,y)\n", "\n", - "a = tf.SparseTensor(indices=[[0, 3], [2, 4]],\n", + "a = tf.sparse.SparseTensor(indices=[[0, 3], [2, 4]],\n", " values=[15, 25],\n", " dense_shape=[3, 10])\n", "\n", @@ -616,11 +616,11 @@ "source": [ "## Distinguishing missing values from zero values\n", "\n", - "Most ops on `tf.SparseTensor`s treat missing values and explicit zero values identically. This is by design — a `tf.SparseTensor` is supposed to act just like a dense tensor.\n", + "Most ops on `tf.sparse.SparseTensor`s treat missing values and explicit zero values identically. This is by design — a `tf.sparse.SparseTensor` is supposed to act just like a dense tensor.\n", "\n", "However, there are a few cases where it can be useful to distinguish zero values from missing values. In particular, this allows for one way to encode missing/unknown data in your training data. For example, consider a use case where you have a tensor of scores (that can have any floating point value from -Inf to +Inf), with some missing scores. You can encode this tensor using a sparse tensor where the explicit zeros are known zero scores but the implicit zero values actually represent missing data and not zero. \n", "\n", - "Note: This is generally not the intended usage of `tf.SparseTensor`s; and you might want to also consier other techniques for encoding this such as for example using a separate mask tensor that identifies the locations of known/unknown values. However, exercise caution while using this approach, since most sparse operations will treat explicit and implicit zero values identically." + "Note: This is generally not the intended usage of `tf.sparse.SparseTensor`s; and you might want to also consider other techniques for encoding this such as for example using a separate mask tensor that identifies the locations of known/unknown values. However, exercise caution while using this approach, since most sparse operations will treat explicit and implicit zero values identically." ] }, { @@ -680,8 +680,7 @@ "metadata": { "colab": { "collapsed_sections": [], - "name": "sparse_tensor_guide.ipynb", - "provenance": [], + "name": "sparse_tensor.ipynb", "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/tensor.ipynb b/site/en/guide/tensor.ipynb index ac27fb83ab8..2eb261aad75 100644 --- a/site/en/guide/tensor.ipynb +++ b/site/en/guide/tensor.ipynb @@ -80,7 +80,7 @@ "id": "VQ3s2J8Vgowq" }, "source": [ - "Tensors are multi-dimensional arrays with a uniform type (called a `dtype`). You can see all supported `dtypes` at `tf.dtypes.DType`.\n", + "Tensors are multi-dimensional arrays with a uniform type (called a `dtype`). You can see all supported `dtypes` at `tf.dtypes`.\n", "\n", "If you're familiar with [NumPy](https://numpy.org/devdocs/user/quickstart.html), tensors are (kind of) like `np.arrays`.\n", "\n", @@ -95,7 +95,7 @@ "source": [ "## Basics\n", "\n", - "Let's create some basic tensors." + "First, create some basic tensors." ] }, { @@ -326,7 +326,7 @@ "a = tf.constant([[1, 2],\n", " [3, 4]])\n", "b = tf.constant([[1, 1],\n", - " [1, 1]]) # Could have also said `tf.ones([2,2])`\n", + " [1, 1]]) # Could have also said `tf.ones([2,2], dtype=tf.int32)`\n", "\n", "print(tf.add(a, b), \"\\n\")\n", "print(tf.multiply(a, b), \"\\n\")\n", @@ -352,7 +352,7 @@ "id": "S3_vIAl2JPVc" }, "source": [ - "Tensors are used in all kinds of operations (ops)." + "Tensors are used in all kinds of operations (or \"Ops\")." ] }, { @@ -368,11 +368,53 @@ "# Find the largest value\n", "print(tf.reduce_max(c))\n", "# Find the index of the largest value\n", - "print(tf.argmax(c))\n", + "print(tf.math.argmax(c))\n", "# Compute the softmax\n", "print(tf.nn.softmax(c))" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "0MNM-q7-MZLz" + }, + "source": [ + "Note: Typically, anywhere a TensorFlow function expects a `Tensor` as input, the function will also accept anything that can be converted to a `Tensor` using `tf.convert_to_tensor`. See below for an example." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_wch0N8xNEt-" + }, + "outputs": [], + "source": [ + "tf.convert_to_tensor([1,2,3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ngqIeWYeNJVI" + }, + "outputs": [], + "source": [ + "tf.reduce_max([1,2,3])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ThVMxqbVNOq3" + }, + "outputs": [], + "source": [ + "tf.reduce_max(np.array([1,2,3]))" + ] + }, { "cell_type": "markdown", "metadata": { @@ -393,7 +435,7 @@ "* **Shape**: The length (number of elements) of each of the axes of a tensor.\n", "* **Rank**: Number of tensor axes. A scalar has rank 0, a vector has rank 1, a matrix is rank 2.\n", "* **Axis** or **Dimension**: A particular dimension of a tensor.\n", - "* **Size**: The total number of items in the tensor, the product shape vector.\n" + "* **Size**: The total number of items in the tensor, the product of the shape vector's elements.\n" ] }, { @@ -461,6 +503,37 @@ "print(\"Total number of elements (3*2*4*5): \", tf.size(rank_4_tensor).numpy())" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "2ZGZp_JOOPOv" + }, + "source": [ + "But note that the `Tensor.ndim` and `Tensor.shape` attributes don't return `Tensor` objects. If you need a `Tensor` use the `tf.rank` or `tf.shape` function. This difference is subtle, but it can be important when building graphs (later)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ptq0-y6APCpD" + }, + "outputs": [], + "source": [ + "tf.rank(rank_4_tensor)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HslrDOEBPICN" + }, + "outputs": [], + "source": [ + "tf.shape(rank_4_tensor)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -962,7 +1035,7 @@ "source": [ "## Broadcasting\n", "\n", - "Broadcasting is a concept borrowed from the [equivalent feature in NumPy](https://numpy.org/doc/stable/user/basics.html). In short, under certain conditions, smaller tensors are \"stretched\" automatically to fit larger tensors when running combined operations on them.\n", + "Broadcasting is a concept borrowed from the [equivalent feature in NumPy](https://numpy.org/doc/stable/user/basics.broadcasting.html). In short, under certain conditions, smaller tensors are \"stretched\" automatically to fit larger tensors when running combined operations on them.\n", "\n", "The simplest and most common case is when you attempt to multiply or add a tensor to a scalar. In that case, the scalar is broadcast to be the same shape as the other argument. " ] @@ -1372,7 +1445,7 @@ "id": "st9OxrUxWSKY" }, "source": [ - "And `tf.string.to_number`:" + "And `tf.strings.to_number`:" ] }, { diff --git a/site/en/guide/tensor_slicing.ipynb b/site/en/guide/tensor_slicing.ipynb index 9f58a206de6..c5cb2d71356 100644 --- a/site/en/guide/tensor_slicing.ipynb +++ b/site/en/guide/tensor_slicing.ipynb @@ -635,7 +635,6 @@ "colab": { "collapsed_sections": [], "name": "tensor_slicing.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/tf_numpy.ipynb b/site/en/guide/tf_numpy.ipynb index 9b469241a3b..3083acb147d 100644 --- a/site/en/guide/tf_numpy.ipynb +++ b/site/en/guide/tf_numpy.ipynb @@ -70,7 +70,7 @@ "source": [ "## Overview\n", "\n", - "TensorFlow implements a subset of the [NumPy API](https://numpy.org/doc/1.16), available as `tf.experimental.numpy`. This allows running NumPy code, accelerated by TensorFlow, while also allowing access to all of TensorFlow's APIs." + "TensorFlow implements a subset of the [NumPy API](https://numpy.org/doc/stable/index.html), available as `tf.experimental.numpy`. This allows running NumPy code, accelerated by TensorFlow, while also allowing access to all of TensorFlow's APIs." ] }, { @@ -142,7 +142,7 @@ "\n", "An instance of `tf.experimental.numpy.ndarray`, called **ND Array**, represents a multidimensional dense array of a given `dtype` placed on a certain device. It is an alias to `tf.Tensor`. Check out the ND array class for useful methods like `ndarray.T`, `ndarray.reshape`, `ndarray.ravel` and others.\n", "\n", - "First create an ND array object, and then invoke different methods. " + "First create an ND array object, and then invoke different methods." ] }, { @@ -170,11 +170,28 @@ { "cell_type": "markdown", "metadata": { - "id": "Mub8-dvJMUr4" + "id": "-BOY8CGRKEhE" }, "source": [ "### Type promotion\n", "\n", + "There are 4 options for type promotion in TensorFlow.\n", + "\n", + "- By default, TensorFlow raises errors instead of promoting types for mixed type operations.\n", + "- Running `tf.numpy.experimental_enable_numpy_behavior()` switches TensorFlow to use `NumPy` type promotion rules (described below).\n", + "- After TensorFlow 2.15, there are two new options (refer to [TF NumPy Type Promotion](tf_numpy_type_promotion.ipynb) for details):\n", + " - `tf.numpy.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")` uses Jax type promotion rules.\n", + " - `tf.numpy.experimental_enable_numpy_behavior(dtype_conversion_mode=\"safe\")` uses Jax type promotion rules, but disallows certain unsafe promotions." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SXskSHrX5J45" + }, + "source": [ + "#### NumPy Type Promotion\n", + "\n", "TensorFlow NumPy APIs have well-defined semantics for converting literals to ND array, as well as for performing type promotion on ND array inputs. Please see [`np.result_type`](https://numpy.org/doc/1.16/reference/generated/numpy.result_type.html) for more details." ] }, @@ -200,7 +217,7 @@ " (tnp.int32, tnp.int64, tnp.float32, tnp.float64)]\n", "for i, v1 in enumerate(values):\n", " for v2 in values[i + 1:]:\n", - " print(\"%s + %s => %s\" % \n", + " print(\"%s + %s => %s\" %\n", " (v1.dtype.name, v2.dtype.name, (v1 + v2).dtype.name))" ] }, @@ -932,8 +949,8 @@ "metadata": { "accelerator": "GPU", "colab": { - "collapsed_sections": [], "name": "tf_numpy.ipynb", + "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/guide/tf_numpy_type_promotion.ipynb b/site/en/guide/tf_numpy_type_promotion.ipynb new file mode 100644 index 00000000000..f984310822a --- /dev/null +++ b/site/en/guide/tf_numpy_type_promotion.ipynb @@ -0,0 +1,1138 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ZjN_IJ8mhJ-4" + }, + "source": [ + "##### Copyright 2023 The TensorFlow Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sY3Ffd83hK3b" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "03Pw58e6mTHI" + }, + "source": [ + "# TF-NumPy Type Promotion" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l9nPKvxK-_pM" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uma-W5v__DYh" + }, + "source": [ + "## Overview\n", + "\n", + "There are 4 options for type promotion in TensorFlow.\n", + "\n", + "- By default, TensorFlow raises errors instead of promoting types for mixed type operations.\n", + "- Running `tf.numpy.experimental_enable_numpy_behavior()` switches TensorFlow to use [NumPy type promotion rules](https://www.tensorflow.org/guide/tf_numpy#type_promotion).\n", + "- **This doc** describes two new options that will be available in TensorFlow 2.15 (or currently in `tf-nightly`):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vMvEKDFOsau7" + }, + "outputs": [], + "source": [ + "!pip install -q tf_nightly" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a6hOFBfPsd3y" + }, + "source": [ + " **Note**: `experimental_enable_numpy_behavior` changes the behavior of all of TensorFlow." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ob1HNwUmYR5b" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AJR558zjAZQu" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tensorflow as tf\n", + "import tensorflow.experimental.numpy as tnp\n", + "\n", + "print(\"Using TensorFlow version %s\" % tf.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M6tacoy0DU6e" + }, + "source": [ + "### Enabling the new type promotion\n", + "\n", + "In order to use the [JAX-like type promotion](https://jax.readthedocs.io/en/latest/type_promotion.html) in TF-Numpy, specify either `'all'` or `'safe'` as the dtype conversion mode when enabling NumPy behavior for TensorFlow.\n", + "\n", + "This new system (with `dtype_conversion_mode=\"all\"`) is associative, commutative, and makes it easy to control what width of float you end up with (it doesn't automatically convert to wider floats). It does introduce some risks of overflows and precision loss, but `dtype_conversion_mode=\"safe\"` forces you to handle those cases explicitly. The two modes are explained more in detail in the [next section](#two_modes)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TfCyofpFDQxm" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sEMXK8-ZWMun" + }, + "source": [ + "\n", + "\n", + "## Two Modes : ALL mode vs SAFE mode\n", + "\n", + "In the new type promotion system, we introduce two modes: `ALL` mode and `SAFE` mode. `SAFE` mode is used to mitigate the concerns of \"risky\" promotions that can result in precision loss or bit-widening." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-ULvTWj_KnHU" + }, + "source": [ + "### Dtypes\n", + "\n", + "We will be using the following abbreviations for brevity.\n", + "\n", + "* `b` means `tf.bool`\n", + "* `u8` means `tf.uint8`\n", + "* `i16` means `tf.int16`\n", + "* `i32` means `tf.int32`\n", + "* `bf16` means `tf.bfloat16`\n", + "* `f32` means `tf.float32`\n", + "* `f64` means `tf.float64`\n", + "* `i32*` means Python `int` or weakly-typed `i32`\n", + "* `f32*` means Python `float` or weakly-typed `f32`\n", + "* `c128*` means Python `complex` or weakly-typed `c128`\n", + "\n", + "The asterisk (*) denotes that the corresponding type is “weak” - such a dtype is temporarily inferred by the system, and could defer to other dtypes. This concept is explained more in detail [here](#weak_tensor)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hXZxLCkuzzq3" + }, + "source": [ + "### Example of precision losing operations\n", + "\n", + "In the following example, `i32` + `f32` is allowed in `ALL` mode but\n", + "not in `SAFE` mode due to the risk of precision loss." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y-yeIvstWStL" + }, + "outputs": [], + "source": [ + "# i32 + f32 returns a f32 result in ALL mode.\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "a = tf.constant(10, dtype = tf.int32)\n", + "b = tf.constant(5.0, dtype = tf.float32)\n", + "a + b # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JNNmZow2WY3G" + }, + "outputs": [], + "source": [ + "# This promotion is not allowed in SAFE mode.\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"safe\")\n", + "a = tf.constant(10, dtype = tf.int32)\n", + "b = tf.constant(5.0, dtype = tf.float32)\n", + "try:\n", + " a + b\n", + "except TypeError as e:\n", + " print(f'{type(e)}: {e}') # TypeError: explicitly specify the dtype or switch to ALL mode." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f0x4Qhff0AKS" + }, + "source": [ + "### Example of bit-widening operations\n", + "\n", + "In the following example, `i8` + `u32` is allowed in `ALL` mode but\n", + "not in `SAFE` mode due to bit-widening, which means using more bits than the number of bits in the inputs. Note that the new type promotion semantics only allows necessary bit-widening." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Etbv-WoWzUXf" + }, + "outputs": [], + "source": [ + "# i8 + u32 returns an i64 result in ALL mode.\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "a = tf.constant(10, dtype = tf.int8)\n", + "b = tf.constant(5, dtype = tf.uint32)\n", + "a + b" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yKRdvtvw0Lvt" + }, + "outputs": [], + "source": [ + "# This promotion is not allowed in SAFE mode.\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"safe\")\n", + "a = tf.constant(10, dtype = tf.int8)\n", + "b = tf.constant(5, dtype = tf.uint32)\n", + "try:\n", + " a + b\n", + "except TypeError as e:\n", + " print(f'{type(e)}: {e}') # TypeError: explicitly specify the dtype or switch to ALL mode." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yh2BwqUzH3C3" + }, + "source": [ + "## A System Based on a Lattice" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HHUnfTPiYVN5" + }, + "source": [ + "### Type Promotion Lattice\n", + "\n", + "The new type promotion behavior is determined via the following type promotion lattice:\n", + "\n", + "![Type Promotion Lattice](https://tensorflow.org/guide/images/new_type_promotion/type_promotion_lattice.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QykluwRyDDle" + }, + "source": [ + "More specifically, promotion between any two types is determined by finding the first common child of the two nodes (including the nodes themselves).\n", + "\n", + "For example, in the diagram above, the first common child of `i8` and `i32` is `i32` because the two nodes intersect for the first time at `i32` when following the direction of the arrows.\n", + "\n", + "Similarly as another example, the result promotion type between `u64` and `f16` would be `f16`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nthziRHaDAUY" + }, + "source": [ + "\n", + "\n", + "### Type Promotion Table\n", + "\n", + "Following the lattice generates the binary promotion table below:\n", + "\n", + "**Note**: `SAFE` mode disallows the highlighted cells. `ALL` mode allows all cases.\n", + "\n", + "![Type Promotion Table](https://tensorflow.org/guide/images/new_type_promotion/type_promotion_table.png)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TPDt5QTkucSC" + }, + "source": [ + "## Advantages of The New Type Promotion\n", + "\n", + "We adopt a JAX-like lattice-based system for our new type promotion, which offers the following advantages:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NUS_b13nue1p" + }, + "source": [ + "\n", + "\n", + "#### Advantages of Lattice-Based System\n", + "\n", + "First, using a lattice-based system ensures three very important properties:\n", + "\n", + "* Existence: There is a unique result promotion type for any combinations of types.\n", + "* Commutativity: `a + b = b + a`\n", + "* Associativity: `a + (b + c) = (a + b) = c`\n", + "\n", + "These three properties are critical for constructing a type promotion semantics that is consistent and predictable." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Sz88hRR6uhls" + }, + "source": [ + "#### Advantages of JAX-like Lattice System\n", + "\n", + "Another crucial advantage of the JAX-like lattice system is that outside unsigned ints, it avoids all wider-than-necessary promotions. This means you cannot get 64-bit results without 64-bit inputs. This is especially beneficial for working on accelerators as it avoids unnecessary 64-bit values, which was frequent in the old type promotion." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rlylb7ieOVbJ" + }, + "source": [ + "However, this comes with a trade-off: mixed float/integer promotion is very prone to precision loss. For instance, in the example below, `i64` + `f16` results in promoting `i64` to `f16`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "abqIkV02OXEF" + }, + "outputs": [], + "source": [ + "# The first input is promoted to f16 in ALL mode.\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "tf.constant(1, tf.int64) + tf.constant(3.2, tf.float16) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mYnh1gZdObfI" + }, + "source": [ + "To migitage this concern, we introduced a `SAFE` mode that will disallow these \"risky\" promotions.\n", + "\n", + "**Note**: To learn more about the design considerations in constructing the lattice system, please refer to the [Design of Type Promotion Semantics for JAX](https://jax.readthedocs.io/en/latest/jep/9407-type-promotion.html)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gAc7LFV0S2dP" + }, + "source": [ + "\n", + "\n", + "## WeakTensor" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "olQ2gsFlS9BH" + }, + "source": [ + "### Overview\n", + "\n", + "*Weak tensors* are Tensors that are \"weakly typed\", similar to a [concept in JAX](https://jax.readthedocs.io/en/latest/type_promotion.html#weakly-typed-values-in-jax).\n", + "\n", + "`WeakTensor`'s dtype is temporarily inferred by the system, and could defer to other dtypes. This concept is introduced in the new type promotion to prevent unwanted type promotion within binary operations between TF values and values with no explicitly user-specified type, such as Python scalar literals." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MYmoFIqZTFtw" + }, + "source": [ + "For instance, in the example below, `tf.constant(1.2)` is considered \"weak\" because it doesn't have a specific dtype. Therefore, `tf.constant(1.2)` defers to the type of `tf.constant(3.1, tf.float16)`, resulting in a `f16` output." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eSBv_mzyTE97" + }, + "outputs": [], + "source": [ + "tf.constant(1.2) + tf.constant(3.1, tf.float16) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KxuqBIFuTm5Z" + }, + "source": [ + "### WeakTensor Construction\n", + "\n", + "WeakTensors are created if you create a tensor without specifying a dtype the result is a WeakTensor. You can check whether a Tensor is \"weak\" or not by checking the weak attribute at the end of the Tensor's string representation." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7UmunnJ8True3" + }, + "source": [ + "**First Case**: When `tf.constant` is called with an input with no user-specified dtype." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fLEtMluNTsI5" + }, + "outputs": [], + "source": [ + "tf.constant(5) # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZQX6MBWHTt__" + }, + "outputs": [], + "source": [ + "tf.constant([5.0, 10.0, 3]) # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ftsKSC5BTweP" + }, + "outputs": [], + "source": [ + "# A normal Tensor is created when dtype arg is specified.\n", + "tf.constant(5, tf.int32) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RqhoRy5iTyag" + }, + "source": [ + "**Second Case**: When an input with no user-specified dtype is passed into a [WeakTensor-supporting API](#weak_tensor_apis)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DuwpgoQJTzE-" + }, + "outputs": [], + "source": [ + "tf.math.abs([100.0, 4.0]) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UTcoR1xvR39k" + }, + "source": [ + "##Effects of turning on the new type promotion\n", + "\n", + "Below is a non-exhaustive list of changes that result from turning on the new type promotion.\n", + "\n", + "* More consistent and predictable promotion results.\n", + "* Reduced risk of bit-widening.\n", + "* `tf.Tensor` mathematical dunder methods use new type promotion.\n", + "* `tf.constant` can return `WeakTensor`.\n", + "* `tf.constant` allows implicit conversions when a Tensor input with a dtype different from the `dtype` arg is passed in.\n", + "* `tf.Variable` in-place ops (`assign`, `assign-add`, `assign-sub`) allow implicit conversions.\n", + "* `tnp.array(1)` and `tnp.array(1.0)` returns 32-bit WeakTensor.\n", + "* `WeakTensor`s will be created and used for [WeakTensor-supporting unary and binary API](#weak_tensor_apis)'s.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KyvonwYcsFX2" + }, + "source": [ + "### More consistent and predictable promotion results\n", + "\n", + "Using a [lattice-based system](#lattice_system_design) allows the new type promotion to produce consistent and predictable type promotion results." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q0Z1njfb7lRa" + }, + "source": [ + "#### Old Type Promotion\n", + "\n", + "Changing the order of operations produces inconsistent results using old type promotion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "M1Ca9v4m7z8e" + }, + "outputs": [], + "source": [ + "# Setup\n", + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"legacy\")\n", + "a = np.array(1, dtype=np.int8)\n", + "b = tf.constant(1)\n", + "c = np.array(1, dtype=np.float16)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WwhTzJ-a4rTc" + }, + "outputs": [], + "source": [ + "# (a + b) + c throws an InvalidArgumentError.\n", + "try:\n", + " tf.add(tf.add(a, b), c)\n", + "except tf.errors.InvalidArgumentError as e:\n", + " print(f'{type(e)}: {e}') # InvalidArgumentError" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d3qDgVYn7ezT" + }, + "outputs": [], + "source": [ + "# (b + a) + c returns an i32 result.\n", + "tf.add(tf.add(b, a), c) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YMH1skEs7oI5" + }, + "source": [ + "#### New Type Promotion\n", + "\n", + "New type promotion produces consistent results regardless of the order." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BOHyJJ8z8uCN" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "a = np.array(1, dtype=np.int8)\n", + "b = tf.constant(1)\n", + "c = np.array(1, dtype=np.float16)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZUKU70jf7E1l" + }, + "outputs": [], + "source": [ + "# (a + b) + c returns a f16 result.\n", + "tf.add(tf.add(a, b), c) # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YOEycjFx7qDn" + }, + "outputs": [], + "source": [ + "# (b + a) + c also returns a f16 result.\n", + "tf.add(tf.add(b, a), c) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FpGMkm6aJsn6" + }, + "source": [ + "### Reduced risk of bit-widening" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JxV2AL-U9Grg" + }, + "source": [ + "#### Old Type Promotion\n", + "\n", + "Old type promotion often resulted in 64-bit results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7L1pxyvn9MlP" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"legacy\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zMJVFdWf4XHp" + }, + "outputs": [], + "source": [ + "np.array(3.2, np.float16) + tf.constant(1, tf.int8) + tf.constant(50) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fBhUH_wD9Is7" + }, + "source": [ + "#### New Type Promotion\n", + "\n", + "New type promotion returns results with minimal number of bits necessary." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aJsj2ZyI9T9Y" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jj0N_Plp4X9l" + }, + "outputs": [], + "source": [ + "np.array(3.2, np.float16) + tf.constant(1, tf.int8) + tf.constant(50) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yKUx7xe-KZ5O" + }, + "source": [ + "### tf.Tensor mathematical dunder methods\n", + "\n", + "All `tf.Tensor` mathematical dunder methods will follow the new type promotion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2c3icBUX4wNl" + }, + "outputs": [], + "source": [ + "-tf.constant(5) # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ydJHQjid45s7" + }, + "outputs": [], + "source": [ + "tf.constant(5, tf.int16) - tf.constant(1, tf.float32) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pLbIjIvbKqcU" + }, + "source": [ + "### tf.Variable in-place ops\n", + "\n", + "Implicit conversions will be allowed in `tf.Variable` in-place ops.\n", + "\n", + "**Note**: Any promotion that results in a dtype that is different from the variable's original dtype will be not allowed. This is because `tf.Variable` cannot change its dtype." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QsXhyK1h-i5S" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "a = tf.Variable(10, tf.int32)\n", + "a.assign_add(tf.constant(5, tf.int16)) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PiA4H-otLDit" + }, + "source": [ + "### tf.constant implicit conversions\n", + "\n", + "In the old type promotion, `tf.constant` required an input Tensor to have the same dtype as the dtype argument. However, in the new type promotion, we implicitly convert Tensor to the specified dtype." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ArrQ9Dj0_OR8" + }, + "outputs": [], + "source": [ + "tnp.experimental_enable_numpy_behavior(dtype_conversion_mode=\"all\")\n", + "a = tf.constant(10, tf.int16)\n", + "tf.constant(a, tf.float32) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WAcK_-XnLWaP" + }, + "source": [ + "### TF-NumPy Array\n", + "\n", + "`tnp.array` defaults to `i32*` and `f32*` for python inputs using the new type promotion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K1pZnYNh_ahm" + }, + "outputs": [], + "source": [ + "tnp.array(1) # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QoQl2PYP_fMT" + }, + "outputs": [], + "source": [ + "tnp.array(1.0) # " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wK5DpQ3Pz3k5" + }, + "source": [ + "##Input Type Inference\n", + "\n", + "This is how different inputs' types are inferred in the new type promotion.\n", + "\n", + "\n", + "* `tf.Tensor`: Since `tf.Tensor` has a dtype property, we don't do further inference.\n", + "* NumPy types: This includes types like `np.array(1)`, `np.int16(1)`, and `np.float`. Since NumPy inputs also have a dtype property, we take the dtype property as the result inference type. Note that NumPy defaults to `i64` and `f64`.\n", + "* Python scalars/Nested types: This includes types like `1`, `[1, 2, 3]`, and `(1.0, 2.0)`.\n", + " * Python `int` is inferred as `i32*`.\n", + " * Python `float` is inferred as `f32*`.\n", + " * Python `complex` is inferred as `c128*`.\n", + "* If the input doesn't fall into any of the above categories but has a dtype property, we take the dtype property as the result inference type." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g_SPfalfSPgg" + }, + "source": [ + "# Further Reading\n", + "\n", + "The new type promotion closely resembles JAX-NumPy's type promotion. If you want to know more details about the new type promotion and the design choices, check out the resources below.\n", + "\n", + "* [JAX Type Promotion Semantics](https://jax.readthedocs.io/en/latest/type_promotion.html)\n", + "* [Design of Type Promotion Semantics for JAX](https://jax.readthedocs.io/en/latest/jep/9407-type-promotion.html)\n", + "* [Old TF-NumPy Promotion Semantics](https://www.tensorflow.org/guide/tf_numpy#type_promotion)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Qg5xBbImT31S" + }, + "source": [ + "# References" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gjB0CVhVXBfW" + }, + "source": [ + "\n", + "\n", + "## WeakTensor-supporting APIs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_GVbqlN9aBS2" + }, + "source": [ + "Below is a list of APIs that supports `WeakTensor`.\n", + "\n", + "For an unary op, this means that if an input with no user-specified type is passed in, it will return a `WeakTensor`.\n", + "\n", + "For a binary op, it will follow the promotion table [here](#promotion_table). It may or may not return a `WeakTensor` depending on the promotion result of the two inputs.\n", + "\n", + "**Note**: All mathematical operations (`+`, `-`, `*`, ...) are supported." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Gi-G68Z8WN2P" + }, + "source": [ + "* `tf.bitwise.invert`\n", + "* `tf.clip_by_value`\n", + "* `tf.debugging.check_numerics`\n", + "* `tf.expand_dims`\n", + "* `tf.identity`\n", + "* `tf.image.adjust_brightness`\n", + "* `tf.image.adjust_gamma`\n", + "* `tf.image.extract_patches`\n", + "* `tf.image.random_brightness`\n", + "* `tf.image.stateless_random_brightness`\n", + "* `tf.linalg.diag`\n", + "* `tf.linalg.diag_part`\n", + "* `tf.linalg.matmul`\n", + "* `tf.linalg.matrix_transpose`\n", + "* `tf.linalg.tensor_diag_part`\n", + "* `tf.linalg.trace`\n", + "* `tf.math.abs`\n", + "* `tf.math.acos`\n", + "* `tf.math.acosh`\n", + "* `tf.math.add`\n", + "* `tf.math.angle`\n", + "* `tf.math.asin`\n", + "* `tf.math.asinh`\n", + "* `tf.math.atan`\n", + "* `tf.math.atanh`\n", + "* `tf.math.ceil`\n", + "* `tf.math.conj`\n", + "* `tf.math.cos`\n", + "* `tf.math.cosh`\n", + "* `tf.math.digamma`\n", + "* `tf.math.divide_no_nan`\n", + "* `tf.math.divide`\n", + "* `tf.math.erf`\n", + "* `tf.math.erfc`\n", + "* `tf.math.erfcinv`\n", + "* `tf.math.erfinv`\n", + "* `tf.math.exp`\n", + "* `tf.math.expm1`\n", + "* `tf.math.floor`\n", + "* `tf.math.floordiv`\n", + "* `tf.math.floormod`\n", + "* `tf.math.imag`\n", + "* `tf.math.lgamma`\n", + "* `tf.math.log1p`\n", + "* `tf.math.log_sigmoid`\n", + "* `tf.math.log`\n", + "* `tf.math.multiply_no_nan`\n", + "* `tf.math.multiply`\n", + "* `tf.math.ndtri`\n", + "* `tf.math.negative`\n", + "* `tf.math.pow`\n", + "* `tf.math.real`\n", + "* `tf.math.real`\n", + "* `tf.math.reciprocal_no_nan`\n", + "* `tf.math.reciprocal`\n", + "* `tf.math.reduce_euclidean_norm`\n", + "* `tf.math.reduce_logsumexp`\n", + "* `tf.math.reduce_max`\n", + "* `tf.math.reduce_mean`\n", + "* `tf.math.reduce_min`\n", + "* `tf.math.reduce_prod`\n", + "* `tf.math.reduce_std`\n", + "* `tf.math.reduce_sum`\n", + "* `tf.math.reduce_variance`\n", + "* `tf.math.rint`\n", + "* `tf.math.round`\n", + "* `tf.math.rsqrt`\n", + "* `tf.math.scalar_mul`\n", + "* `tf.math.sigmoid`\n", + "* `tf.math.sign`\n", + "* `tf.math.sin`\n", + "* `tf.math.sinh`\n", + "* `tf.math.softplus`\n", + "* `tf.math.special.bessel_i0`\n", + "* `tf.math.special.bessel_i0e`\n", + "* `tf.math.special.bessel_i1`\n", + "* `tf.math.special.bessel_i1e`\n", + "* `tf.math.special.bessel_j0`\n", + "* `tf.math.special.bessel_j1`\n", + "* `tf.math.special.bessel_k0`\n", + "* `tf.math.special.bessel_k0e`\n", + "* `tf.math.special.bessel_k1`\n", + "* `tf.math.special.bessel_k1e`\n", + "* `tf.math.special.bessel_y0`\n", + "* `tf.math.special.bessel_y1`\n", + "* `tf.math.special.dawsn`\n", + "* `tf.math.special.expint`\n", + "* `tf.math.special.fresnel_cos`\n", + "* `tf.math.special.fresnel_sin`\n", + "* `tf.math.special.spence`\n", + "* `tf.math.sqrt`\n", + "* `tf.math.square`\n", + "* `tf.math.subtract`\n", + "* `tf.math.tan`\n", + "* `tf.math.tanh`\n", + "* `tf.nn.depth_to_space`\n", + "* `tf.nn.elu`\n", + "* `tf.nn.gelu`\n", + "* `tf.nn.leaky_relu`\n", + "* `tf.nn.log_softmax`\n", + "* `tf.nn.relu6`\n", + "* `tf.nn.relu`\n", + "* `tf.nn.selu`\n", + "* `tf.nn.softsign`\n", + "* `tf.nn.space_to_depth`\n", + "* `tf.nn.swish`\n", + "* `tf.ones_like`\n", + "* `tf.realdiv`\n", + "* `tf.reshape`\n", + "* `tf.squeeze`\n", + "* `tf.stop_gradient`\n", + "* `tf.transpose`\n", + "* `tf.truncatediv`\n", + "* `tf.truncatemod`\n", + "* `tf.zeros_like`\n", + "* `tf.experimental.numpy.abs`\n", + "* `tf.experimental.numpy.absolute`\n", + "* `tf.experimental.numpy.amax`\n", + "* `tf.experimental.numpy.amin`\n", + "* `tf.experimental.numpy.angle`\n", + "* `tf.experimental.numpy.arange`\n", + "* `tf.experimental.numpy.arccos`\n", + "* `tf.experimental.numpy.arccosh`\n", + "* `tf.experimental.numpy.arcsin`\n", + "* `tf.experimental.numpy.arcsinh`\n", + "* `tf.experimental.numpy.arctan`\n", + "* `tf.experimental.numpy.arctanh`\n", + "* `tf.experimental.numpy.around`\n", + "* `tf.experimental.numpy.array`\n", + "* `tf.experimental.numpy.asanyarray`\n", + "* `tf.experimental.numpy.asarray`\n", + "* `tf.experimental.numpy.ascontiguousarray`\n", + "* `tf.experimental.numpy.average`\n", + "* `tf.experimental.numpy.bitwise_not`\n", + "* `tf.experimental.numpy.cbrt`\n", + "* `tf.experimental.numpy.ceil`\n", + "* `tf.experimental.numpy.conj`\n", + "* `tf.experimental.numpy.conjugate`\n", + "* `tf.experimental.numpy.copy`\n", + "* `tf.experimental.numpy.cos`\n", + "* `tf.experimental.numpy.cosh`\n", + "* `tf.experimental.numpy.cumprod`\n", + "* `tf.experimental.numpy.cumsum`\n", + "* `tf.experimental.numpy.deg2rad`\n", + "* `tf.experimental.numpy.diag`\n", + "* `tf.experimental.numpy.diagflat`\n", + "* `tf.experimental.numpy.diagonal`\n", + "* `tf.experimental.numpy.diff`\n", + "* `tf.experimental.numpy.empty_like`\n", + "* `tf.experimental.numpy.exp2`\n", + "* `tf.experimental.numpy.exp`\n", + "* `tf.experimental.numpy.expand_dims`\n", + "* `tf.experimental.numpy.expm1`\n", + "* `tf.experimental.numpy.fabs`\n", + "* `tf.experimental.numpy.fix`\n", + "* `tf.experimental.numpy.flatten`\n", + "* `tf.experimental.numpy.flip`\n", + "* `tf.experimental.numpy.fliplr`\n", + "* `tf.experimental.numpy.flipud`\n", + "* `tf.experimental.numpy.floor`\n", + "* `tf.experimental.numpy.full_like`\n", + "* `tf.experimental.numpy.imag`\n", + "* `tf.experimental.numpy.log10`\n", + "* `tf.experimental.numpy.log1p`\n", + "* `tf.experimental.numpy.log2`\n", + "* `tf.experimental.numpy.log`\n", + "* `tf.experimental.numpy.max`\n", + "* `tf.experimental.numpy.mean`\n", + "* `tf.experimental.numpy.min`\n", + "* `tf.experimental.numpy.moveaxis`\n", + "* `tf.experimental.numpy.nanmean`\n", + "* `tf.experimental.numpy.negative`\n", + "* `tf.experimental.numpy.ones_like`\n", + "* `tf.experimental.numpy.positive`\n", + "* `tf.experimental.numpy.prod`\n", + "* `tf.experimental.numpy.rad2deg`\n", + "* `tf.experimental.numpy.ravel`\n", + "* `tf.experimental.numpy.real`\n", + "* `tf.experimental.numpy.reciprocal`\n", + "* `tf.experimental.numpy.repeat`\n", + "* `tf.experimental.numpy.reshape`\n", + "* `tf.experimental.numpy.rot90`\n", + "* `tf.experimental.numpy.round`\n", + "* `tf.experimental.numpy.signbit`\n", + "* `tf.experimental.numpy.sin`\n", + "* `tf.experimental.numpy.sinc`\n", + "* `tf.experimental.numpy.sinh`\n", + "* `tf.experimental.numpy.sort`\n", + "* `tf.experimental.numpy.sqrt`\n", + "* `tf.experimental.numpy.square`\n", + "* `tf.experimental.numpy.squeeze`\n", + "* `tf.experimental.numpy.std`\n", + "* `tf.experimental.numpy.sum`\n", + "* `tf.experimental.numpy.swapaxes`\n", + "* `tf.experimental.numpy.tan`\n", + "* `tf.experimental.numpy.tanh`\n", + "* `tf.experimental.numpy.trace`\n", + "* `tf.experimental.numpy.transpose`\n", + "* `tf.experimental.numpy.triu`\n", + "* `tf.experimental.numpy.vander`\n", + "* `tf.experimental.numpy.var`\n", + "* `tf.experimental.numpy.zeros_like`" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "tf_numpy_type_promotion.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/guide/tpu.ipynb b/site/en/guide/tpu.ipynb index f64450ba04c..49eee544bec 100644 --- a/site/en/guide/tpu.ipynb +++ b/site/en/guide/tpu.ipynb @@ -6,7 +6,7 @@ "id": "Tce3stUlHN0L" }, "source": [ - "##### Copyright 2018 The TensorFlow Authors.\n" + "##### Copyright 2024 The TensorFlow Authors.\n" ] }, { @@ -61,7 +61,9 @@ "id": "Ys81cOhXOWUP" }, "source": [ - "Before you run this Colab notebook, make sure that your hardware accelerator is a TPU by checking your notebook settings: **Runtime** > **Change runtime type** > **Hardware accelerator** > **TPU**." + "This guide demonstrates how to perform basic training on [Tensor Processing Units (TPUs)](https://cloud.google.com/tpu/) and TPU Pods, a collection of TPU devices connected by dedicated high-speed network interfaces, with `tf.keras` and custom training loops.\n", + "\n", + "TPUs are Google's custom-developed application-specific integrated circuits (ASICs) used to accelerate machine learning workloads. They are available through [Google Colab](https://colab.research.google.com/), the [TPU Research Cloud](https://sites.research.google/trc/), and [Cloud TPU](https://cloud.google.com/tpu)." ] }, { @@ -73,6 +75,17 @@ "## Setup" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "ebf7f8489bb7" + }, + "source": [ + "Before you run this Colab notebook, make sure that your hardware accelerator is a TPU by checking your notebook settings: **Runtime** > **Change runtime type** > **Hardware accelerator** > **TPU v2**.\n", + "\n", + "Import some necessary libraries, including TensorFlow Datasets:" + ] + }, { "cell_type": "code", "execution_count": null, @@ -95,7 +108,7 @@ "source": [ "## TPU initialization\n", "\n", - "TPUs are typically Cloud TPU workers, which are different from the local process running the user's Python program. Thus, you need to do some initialization work to connect to the remote cluster and initialize the TPUs. Note that the `tpu` argument to `tf.distribute.cluster_resolver.TPUClusterResolver` is a special address just for Colab. If you are running your code on Google Compute Engine (GCE), you should instead pass in the name of your Cloud TPU." + "TPUs are typically [Cloud TPU](https://cloud.google.com/tpu/docs/) workers, which are different from the local process running the user's Python program. Thus, you need to do some initialization work to connect to the remote cluster and initialize the TPUs. Note that the `tpu` argument to `tf.distribute.cluster_resolver.TPUClusterResolver` is a special address just for Colab. If you are running your code on Google Compute Engine (GCE), you should instead pass in the name of your Cloud TPU." ] }, { @@ -115,7 +128,7 @@ }, "outputs": [], "source": [ - "resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')\n", + "resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='local')\n", "tf.config.experimental_connect_to_cluster(resolver)\n", "# This is the TPU initialization code that has to be at the beginning.\n", "tf.tpu.experimental.initialize_tpu_system(resolver)\n", @@ -159,7 +172,7 @@ "source": [ "## Distribution strategies\n", "\n", - "Usually you run your model on multiple TPUs in a data-parallel way. To distribute your model on multiple TPUs (or other accelerators), TensorFlow offers several distribution strategies. You can replace your distribution strategy and the model will run on any given (TPU) device. Check the [distribution strategy guide](./distributed_training.ipynb) for more information." + "Usually, you run your model on multiple TPUs in a data-parallel way. To distribute your model on multiple TPUs (as well as multiple GPUs or multiple machines), TensorFlow offers the `tf.distribute.Strategy` API. You can replace your distribution strategy and the model will run on any given (TPU) device. Learn more in the [Distributed training with TensorFlow](./distributed_training.ipynb) guide." ] }, { @@ -168,6 +181,8 @@ "id": "DcDPMZs-9uLJ" }, "source": [ + "Using the `tf.distribute.TPUStrategy` option implements synchronous distributed training. TPUs provide their own implementation of efficient all-reduce and other collective operations across multiple TPU cores, which are used in `TPUStrategy`.\n", + "\n", "To demonstrate this, create a `tf.distribute.TPUStrategy` object:" ] }, @@ -188,7 +203,7 @@ "id": "JlaAmswWPsU6" }, "source": [ - "To replicate a computation so it can run in all TPU cores, you can pass it into the `strategy.run` API. Below is an example that shows all cores receiving the same inputs `(a, b)` and performing matrix multiplication on each core independently. The outputs will be the values from all the replicas." + "To replicate a computation so it can run in all TPU cores, you can pass it into the `Strategy.run` API. Below is an example that shows all cores receiving the same inputs `(a, b)` and performing matrix multiplication on each core independently. The outputs will be the values from all the replicas." ] }, { @@ -216,7 +231,7 @@ "source": [ "## Classification on TPUs\n", "\n", - "Having covered the basic concepts, consider a more concrete example. This section demonstrates how to use the distribution strategy—`tf.distribute.TPUStrategy`—to train a Keras model on a Cloud TPU.\n" + "Having covered the basic concepts, consider a more concrete example. This section demonstrates how to use the distribution strategy—`tf.distribute.TPUStrategy`—to train a Keras model on a Cloud TPU." ] }, { @@ -227,7 +242,7 @@ "source": [ "### Define a Keras model\n", "\n", - "Start with a definition of a `Sequential` Keras model for image classification on the MNIST dataset using Keras. It's no different than what you would use if you were training on CPUs or GPUs. Note that Keras model creation needs to be inside `strategy.scope`, so the variables can be created on each TPU device. Other parts of the code are not necessary to be inside the strategy scope." + "Start with a definition of a [`Sequential` Keras model](https://www.tensorflow.org/guide/keras/sequential_model) for image classification on the MNIST dataset. It's no different than what you would use if you were training on CPUs or GPUs. Note that Keras model creation needs to be inside the `Strategy.scope`, so the variables can be created on each TPU device. Other parts of the code are not necessary to be inside the `Strategy` scope." ] }, { @@ -239,13 +254,32 @@ "outputs": [], "source": [ "def create_model():\n", + " regularizer = tf.keras.regularizers.L2(1e-5)\n", " return tf.keras.Sequential(\n", - " [tf.keras.layers.Conv2D(256, 3, activation='relu', input_shape=(28, 28, 1)),\n", - " tf.keras.layers.Conv2D(256, 3, activation='relu'),\n", + " [tf.keras.layers.Conv2D(256, 3, input_shape=(28, 28, 1),\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", + " tf.keras.layers.Conv2D(256, 3,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", " tf.keras.layers.Flatten(),\n", - " tf.keras.layers.Dense(256, activation='relu'),\n", - " tf.keras.layers.Dense(128, activation='relu'),\n", - " tf.keras.layers.Dense(10)])" + " tf.keras.layers.Dense(256,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", + " tf.keras.layers.Dense(128,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", + " tf.keras.layers.Dense(10,\n", + " kernel_regularizer=regularizer)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h-2qaXgfyONQ" + }, + "source": [ + "This model puts L2 regularization terms on the weights of each layer, so that the custom training loop below can show how you pick them up from `Model.losses`." ] }, { @@ -256,9 +290,9 @@ "source": [ "### Load the dataset\n", "\n", - "Efficient use of the `tf.data.Dataset` API is critical when using a Cloud TPU, as it is impossible to use the Cloud TPUs unless you can feed them data quickly enough. You can learn more about dataset performance in the [Input pipeline performance guide](./data_performance.ipynb).\n", + "Efficient use of the `tf.data.Dataset` API is critical when using a Cloud TPU. You can learn more about dataset performance in the [Input pipeline performance guide](./data_performance.ipynb).\n", "\n", - "For all but the simplest experiments (using `tf.data.Dataset.from_tensor_slices` or other in-graph data), you need to store all data files read by the Dataset in Google Cloud Storage (GCS) buckets.\n", + "If you are using [TPU Nodes](https://cloud.google.com/tpu/docs/managing-tpus-tpu-vm), you need to store all data files read by the TensorFlow `Dataset` in [Google Cloud Storage (GCS) buckets](https://cloud.google.com/tpu/docs/storage-buckets). If you are using [TPU VMs](https://cloud.google.com/tpu/docs/users-guide-tpu-vm), you can store data wherever you like. For more information on TPU Nodes and TPU VMs, refer to the [TPU System Architecture](https://cloud.google.com/tpu/docs/system-architecture-tpu-vm) documentation.\n", "\n", "For most use cases, it is recommended to convert your data into the `TFRecord` format and use a `tf.data.TFRecordDataset` to read it. Check the [TFRecord and tf.Example tutorial](../tutorials/load_data/tfrecord.ipynb) for details on how to do this. It is not a hard requirement and you can use other dataset readers, such as `tf.data.FixedLengthRecordDataset` or `tf.data.TextLineDataset`.\n", "\n", @@ -266,7 +300,7 @@ "\n", "Regardless of the data format used, it is strongly recommended that you use large files on the order of 100MB. This is especially important in this networked setting, as the overhead of opening a file is significantly higher.\n", "\n", - "As shown in the code below, you should use the `tensorflow_datasets` module to get a copy of the MNIST training and test data. Note that `try_gcs` is specified to use a copy that is available in a public GCS bucket. If you don't specify this, the TPU will not be able to access the downloaded data. " + "As shown in the code below, you should use the Tensorflow Datasets `tfds.load` module to get a copy of the MNIST training and test data. Note that `try_gcs` is specified to use a copy that is available in a public GCS bucket. If you don't specify this, the TPU will not be able to access the downloaded data." ] }, { @@ -311,7 +345,7 @@ "source": [ "### Train the model using Keras high-level APIs\n", "\n", - "You can train your model with Keras `fit` and `compile` APIs. There is nothing TPU-specific in this step—you write the code as if you were using mutliple GPUs and a `MirroredStrategy` instead of the `TPUStrategy`. You can learn more in the [Distributed training with Keras](https://www.tensorflow.org/tutorials/distribute/keras) tutorial." + "You can train your model with Keras `Model.fit` and `Model.compile` APIs. There is nothing TPU-specific in this step—you write the code as if you were using multiple GPUs and a `MirroredStrategy` instead of the `TPUStrategy`. You can learn more in the [Distributed training with Keras](../tutorials/distribute/keras.ipynb) tutorial." ] }, { @@ -338,7 +372,7 @@ "model.fit(train_dataset,\n", " epochs=5,\n", " steps_per_epoch=steps_per_epoch,\n", - " validation_data=test_dataset, \n", + " validation_data=test_dataset,\n", " validation_steps=validation_steps)" ] }, @@ -348,7 +382,7 @@ "id": "8hSGBIYtUugJ" }, "source": [ - "To reduce Python overhead and maximize the performance of your TPU, pass in the argument—`steps_per_execution`—to `Model.compile`. In this example, it increases throughput by about 50%:" + "To reduce Python overhead and maximize the performance of your TPU, pass in the `steps_per_execution` argument to Keras `Model.compile`. In this example, it increases throughput by about 50%:" ] }, { @@ -382,7 +416,7 @@ "source": [ "### Train the model using a custom training loop\n", "\n", - "You can also create and train your model using `tf.function` and `tf.distribute` APIs directly. You can use the `strategy.experimental_distribute_datasets_from_function` API to distribute the dataset given a dataset function. Note that in the example below the batch size passed into the dataset is the per-replica batch size instead of the global batch size. To learn more, check out the [Custom training with tf.distribute.Strategy](https://www.tensorflow.org/tutorials/distribute/custom_training) tutorial.\n" + "You can also create and train your model using `tf.function` and `tf.distribute` APIs directly. You can use the `Strategy.distribute_datasets_from_function` API to distribute the `tf.data.Dataset` given a dataset function. Note that in the example below the batch size passed into the `Dataset` is the per-replica batch size instead of the global batch size. To learn more, check out the [Custom training with `tf.distribute.Strategy`](../tutorials/distribute/custom_training.ipynb) tutorial.\n" ] }, { @@ -391,7 +425,7 @@ "id": "DxdgXPAL6iFE" }, "source": [ - "First, create the model, datasets and tf.functions:" + "First, create the model, datasets and `tf.function`s:" ] }, { @@ -402,8 +436,8 @@ }, "outputs": [], "source": [ - "# Create the model, optimizer and metrics inside the strategy scope, so that the\n", - "# variables can be mirrored on each device.\n", + "# Create the model, optimizer and metrics inside the `tf.distribute.Strategy`\n", + "# scope, so that the variables can be mirrored on each device.\n", "with strategy.scope():\n", " model = create_model()\n", " optimizer = tf.keras.optimizers.Adam()\n", @@ -411,11 +445,11 @@ " training_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(\n", " 'training_accuracy', dtype=tf.float32)\n", "\n", - "# Calculate per replica batch size, and distribute the datasets on each TPU\n", - "# worker.\n", + "# Calculate per replica batch size, and distribute the `tf.data.Dataset`s\n", + "# on each TPU worker.\n", "per_replica_batch_size = batch_size // strategy.num_replicas_in_sync\n", "\n", - "train_dataset = strategy.experimental_distribute_datasets_from_function(\n", + "train_dataset = strategy.distribute_datasets_from_function(\n", " lambda _: get_dataset(per_replica_batch_size, is_training=True))\n", "\n", "@tf.function\n", @@ -427,9 +461,13 @@ " images, labels = inputs\n", " with tf.GradientTape() as tape:\n", " logits = model(images, training=True)\n", - " loss = tf.keras.losses.sparse_categorical_crossentropy(\n", + " per_example_loss = tf.keras.losses.sparse_categorical_crossentropy(\n", " labels, logits, from_logits=True)\n", - " loss = tf.nn.compute_average_loss(loss, global_batch_size=batch_size)\n", + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " model_losses = model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", + "\n", " grads = tape.gradient(loss, model.trainable_variables)\n", " optimizer.apply_gradients(list(zip(grads, model.trainable_variables)))\n", " training_loss.update_state(loss * strategy.num_replicas_in_sync)\n", @@ -463,7 +501,7 @@ "\n", " for step in range(steps_per_epoch):\n", " train_step(train_iterator)\n", - " print('Current step: {}, training loss: {}, accuracy: {}%'.format(\n", + " print('Current step: {}, training loss: {}, training accuracy: {}%'.format(\n", " optimizer.iterations.numpy(),\n", " round(float(training_loss.result()), 4),\n", " round(float(training_accuracy.result()) * 100, 2)))\n", @@ -479,9 +517,9 @@ "source": [ "### Improving performance with multiple steps inside `tf.function`\n", "\n", - "You can improve the performance by running multiple steps within a `tf.function`. This is achieved by wrapping the `strategy.run` call with a `tf.range` inside `tf.function`, and AutoGraph will convert it to a `tf.while_loop` on the TPU worker.\n", + "You can improve the performance by running multiple steps within a `tf.function`. This is achieved by wrapping the `Strategy.run` call with a `tf.range` inside `tf.function`, and AutoGraph will convert it to a `tf.while_loop` on the TPU worker. You can learn more about `tf.function`s in the [Better performance with `tf.function`](./function.ipynb) guide.\n", "\n", - "Despite the improved performance, there are tradeoffs with this method compared to running a single step inside `tf.function`. Running multiple steps in a `tf.function` is less flexible—you cannot run things eagerly or arbitrary Python code within the steps.\n" + "Despite the improved performance, there are tradeoffs with this method compared to running a single step inside a `tf.function`. Running multiple steps in a `tf.function` is less flexible—you cannot run things eagerly or arbitrary Python code within the steps.\n" ] }, { @@ -501,9 +539,12 @@ " images, labels = inputs\n", " with tf.GradientTape() as tape:\n", " logits = model(images, training=True)\n", - " loss = tf.keras.losses.sparse_categorical_crossentropy(\n", + " per_example_loss = tf.keras.losses.sparse_categorical_crossentropy(\n", " labels, logits, from_logits=True)\n", - " loss = tf.nn.compute_average_loss(loss, global_batch_size=batch_size)\n", + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " model_losses = model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", " grads = tape.gradient(loss, model.trainable_variables)\n", " optimizer.apply_gradients(list(zip(grads, model.trainable_variables)))\n", " training_loss.update_state(loss * strategy.num_replicas_in_sync)\n", @@ -512,11 +553,11 @@ " for _ in tf.range(steps):\n", " strategy.run(step_fn, args=(next(iterator),))\n", "\n", - "# Convert `steps_per_epoch` to `tf.Tensor` so the `tf.function` won't get \n", + "# Convert `steps_per_epoch` to `tf.Tensor` so the `tf.function` won't get\n", "# retraced if the value changes.\n", "train_multiple_steps(train_iterator, tf.convert_to_tensor(steps_per_epoch))\n", "\n", - "print('Current step: {}, training loss: {}, accuracy: {}%'.format(\n", + "print('Current step: {}, training loss: {}, training accuracy: {}%'.format(\n", " optimizer.iterations.numpy(),\n", " round(float(training_loss.result()), 4),\n", " round(float(training_accuracy.result()) * 100, 2)))" @@ -530,19 +571,27 @@ "source": [ "## Next steps\n", "\n", - "- [Google Cloud TPU documentation](https://cloud.google.com/tpu/docs/): How to set up and run a Google Cloud TPU.\n", + "To learn more about Cloud TPUs and how to use them:\n", + "\n", + "- [Google Cloud TPU](https://cloud.google.com/tpu): The Google Cloud TPU homepage.\n", + "- [Google Cloud TPU documentation](https://cloud.google.com/tpu/docs/): Google Cloud TPU documentation, which includes:\n", + " - [Introduction to Cloud TPU](https://cloud.google.com/tpu/docs/intro-to-tpu): An overview of working with Cloud TPUs.\n", + " - [Cloud TPU quickstarts](https://cloud.google.com/tpu/docs/quick-starts): Quickstart introductions to working with Cloud TPU VMs using TensorFlow and other main machine learning frameworks.\n", "- [Google Cloud TPU Colab notebooks](https://cloud.google.com/tpu/docs/colabs): End-to-end training examples.\n", "- [Google Cloud TPU performance guide](https://cloud.google.com/tpu/docs/performance-guide): Enhance Cloud TPU performance further by adjusting Cloud TPU configuration parameters for your application\n", - "- [Distributed training with TensorFlow](./distributed_training.ipynb): How to use distribution strategies—including `tf.distribute.TPUStrategy`—with examples showing best practices." + "- [Distributed training with TensorFlow](./distributed_training.ipynb): How to use distribution strategies—including `tf.distribute.TPUStrategy`—with examples showing best practices.\n", + "- TPU embeddings: TensorFlow includes specialized support for training embeddings on TPUs via `tf.tpu.experimental.embedding`. In addition, [TensorFlow Recommenders](https://www.tensorflow.org/recommenders) has `tfrs.layers.embedding.TPUEmbedding`. Embeddings provide efficient and dense representations, capturing complex similarities and relationships between features. TensorFlow's TPU-specific embedding support allows you to train embeddings that are larger than the memory of a single TPU device, and to use sparse and ragged inputs on TPUs.\n", + "- [TPU Research Cloud (TRC)](https://sites.research.google/trc/about/): TRC enables researchers to apply for access to a cluster of more than 1,000 Cloud TPU devices.\n" ] } ], "metadata": { "accelerator": "TPU", "colab": { - "collapsed_sections": [], "name": "tpu.ipynb", - "toc_visible": true + "toc_visible": true, + "machine_shape": "hm", + "gpuType": "V28" }, "kernelspec": { "display_name": "Python 3", @@ -551,4 +600,4 @@ }, "nbformat": 4, "nbformat_minor": 0 -} +} \ No newline at end of file diff --git a/site/en/guide/variable.ipynb b/site/en/guide/variable.ipynb index 0502c324a35..868ee9119e2 100644 --- a/site/en/guide/variable.ipynb +++ b/site/en/guide/variable.ipynb @@ -166,7 +166,7 @@ "source": [ "print(\"A variable:\", my_variable)\n", "print(\"\\nViewed as a tensor:\", tf.convert_to_tensor(my_variable))\n", - "print(\"\\nIndex of highest value:\", tf.argmax(my_variable))\n", + "print(\"\\nIndex of highest value:\", tf.math.argmax(my_variable))\n", "\n", "# This creates a new tensor; it does not reshape the variable.\n", "print(\"\\nCopying and reshaping: \", tf.reshape(my_variable, [1,4]))" diff --git a/site/en/guide/versions.md b/site/en/guide/versions.md index b20eea717bc..5b1206cc5f4 100644 --- a/site/en/guide/versions.md +++ b/site/en/guide/versions.md @@ -6,10 +6,11 @@ to modify TensorFlow while preserving compatibility. ## Semantic versioning 2.0 -TensorFlow follows Semantic Versioning 2.0 ([semver](http://semver.org)) for its -public API. Each release version of TensorFlow has the form `MAJOR.MINOR.PATCH`. -For example, TensorFlow version 1.2.3 has `MAJOR` version 1, `MINOR` version 2, -and `PATCH` version 3. Changes to each number have the following meaning: +TensorFlow mostly follows Semantic Versioning 2.0 ([semver](http://semver.org)) +for its public API. Each release version of TensorFlow has the form +`MAJOR.MINOR.PATCH`. For example, TensorFlow version 1.2.3 has `MAJOR` version +1, `MINOR` version 2, and `PATCH` version 3. Changes to each number have the +following meaning: * **MAJOR**: Potentially backwards incompatible changes. Code and data that worked with a previous major release will not necessarily work with the new @@ -22,6 +23,10 @@ and `PATCH` version 3. Changes to each number have the following meaning: data that worked with a previous minor release *and* which depends only on the non-experimental public API will continue to work unchanged. For details on what is and is not the public API, see [What is covered](#what_is_covered). + Note that TensorFlow sometimes makes breaking changes in new minor releases, + where the impact is expected to be minor. For examples of these kinds of + changes, see the "Breaking Changes" sections for past minor releases at + https://github.com/tensorflow/tensorflow/releases. * **PATCH**: Backwards compatible bug fixes. @@ -34,44 +39,153 @@ release 0.12.1. However, release 1.1.1 was backwards *compatible* with release Only the public APIs of TensorFlow are backwards compatible across minor and patch versions. The public APIs consist of -* All the documented [Python](../api_docs/python) functions and classes in the - `tensorflow` module and its submodules, except for +* All the documented [Python](https://www.tensorflow.org/api_docs/python) + functions and classes in the `tensorflow` module and its submodules, except + for + + * Private symbols: any function, class, etc., whose name start with `_` + * Experimental and `tf.contrib` symbols, see [below](#not_covered) for + details. + + Note that the code in the `examples/` and `tools/` directories is not + reachable through the `tensorflow` Python module and is thus not covered by + the compatibility guarantee. + + If a symbol is available through the `tensorflow` Python module or its + submodules, but is not documented, then it is **not** considered part of the + public API. + +* The compatibility API (in Python, the `tf.compat` module). At major + versions, we may release utilities and additional endpoints to help users + with the transition to a new major version. These API symbols are deprecated + and not supported (i.e., we will not add any features, and we will not fix + bugs other than to fix vulnerabilities), but they do fall under our + compatibility guarantees. + +* The TensorFlow C API: + + * [tensorflow/c/c_api.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h) + +* The following protocol buffer files: + + * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto) + * [`config`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto) + * [`event`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/event.proto) + * [`graph`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto) + * [`op_def`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def.proto) + * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/reader_base.proto) + * [`summary`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto) + * [`tensor`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto) + * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto) + * [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto) + + + +## Separate version number for TensorFlow Lite + +Currently TensorFlow Lite is distributed as a part of TensorFlow. However, we +reserve the right to in future release changes to the TensorFlow Lite APIs on a +different schedule than for the other TensorFlow APIs, or even to move +TensorFlow Lite into a separate source distribution and/or a separate source +repository than TensorFlow. + +Because of this, we use a different version number for TensorFlow Lite +(`TFLITE_VERSION_STRING` in `tensorflow/lite/version.h`, and `TfLiteVersion()` +in `tensorflow/lite/c/c_api.h`) than for TensorFlow (`TF_VERSION_STRING` in +`tensorflow/core/public/release_version.h`, and `TF_Version()` in +`tensorflow/c/c_api.h`). Currently, these two version numbers happen to have the +same value. But in future, they may diverge; for example, we may increment the +major version number for TensorFlow Lite without incrementing the major version +number for TensorFlow, or vice versa. + +The API surface that is covered by the TensorFlow Lite version number is +comprised of the following public APIs: + +* The TensorFlow Lite C API: + + * [tensorflow/lite/c/c_api.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/c_api.h) + * [tensorflow/lite/c/c_api_types.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/c_api_types.h). + +* The TensorFlow Lite Android (Java/Kotlin) API: + + * In `org.tensorflow.lite`: + * [org.tensorflow.lite.TensorFlowLite](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/TensorFlowLite) + * [org.tensorflow.lite.InterpreterApi](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/InterpreterApi) + * [org.tensorflow.lite.Delegate](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/Delegate) + * [org.tensorflow.lite.DelegateFactory](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/DelegateFactory) + * [org.tensorflow.lite.Tensor](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/Tensor) + * [org.tensorflow.lite.DataType](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/DataType) + * [org.tensorflow.lite.RuntimeFlavor](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/RuntimeFlavor) + * In `org.tensorflow.lite.gpu`: + * [org.tensorflow.lite.gpu.GpuDelegate](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/gpu/GpuDelegate) + * [org.tensorflow.lite.gpu.GpuDelegateFactory](https://www.tensorflow.org/lite/api_docs/java/org/tensorflow/lite/gpu/GpuDelegateFactory) + +* The TensorFlow Lite Objective-C APIs: + + * [tensorflow/lite/objc/apis/](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/objc/apis/) + * TFLCoreMLDelegate.h + * TFLDelegate.h + * TFLInterpreter.h + * TFLInterpreterOptions.h + * TFLMetalDelegate.h + * TFLQuantizationParameters.h + * TFLSignatureRunner.h + * TFLTensorFlowLite.h + * TFLTensor.h + +* The TensorFlow Lite Swift APIs: + + * [tensorflow/lite/swift/Sources/](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/swift/Sources/). + * CoreMLDelegate.swift + * Delegate.swift + * InterpreterError.swift + * Interpreter.swift + * MetalDelegate.swift + * Model.swift + * QuantizationParameters.swift + * SignatureRunnerError.swift + * SignatureRunner.swift + * TensorFlowLite.swift + * Tensor.swift + +Experimental symbols are not covered; see [below](#not_covered) for details. + +## Separate version number for TensorFlow Lite Extension APIs + +TensorFlow Lite provides C APIs for extending the TensorFlow Lite interpreter +with "custom ops", which provide user-defined operations in a graph, or +"delegates", which allow delegating the computation for a graph (or for a subset +of a graph) to a custom backend. These APIs, which we collectively call the +"TensorFlow Lite Extension APIs", require more intimate dependencies on some of +the details of the TensorFlow Lite implementation. + +We reserve the right to in future release changes to these APIs, potentially +including non-backwards-compatible changes, on a different schedule than for the +other TensorFlow Lite APIs. So we use a different version number for the +TensorFlow Lite Extension APIs than the version numbers for TensorFlow Lite or +TensorFlow (which were described in the previous section). We are introducing +some new APIs in TensorFlow Lite version 2.15 to get the TensorFlow Lite +Extension APIs version (`TFLITE_EXTENSION_APIS_VERSION_STRING` in +`tensorflow/lite/version.h`, and TfLiteExtensionApisVersion() in +`tensorflow/lite/c/c_api.h`). The version number for the TensorFlow Lite +Extension APIs is currently the same as the version number for TensorFlow and +TensorFlow Lite. But in future, they may diverge; for example, we may increment +the major version number for the TensorFlow Lite Extension APIs without +incrementing the major version number for TensorFlow Lite, or vice versa. + +The API surface that is covered by the TensorFlow Lite Extension APIs version +number is comprised of the following public APIs: + +* [tensorflow/lite/c/c_api_opaque.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/c_api_opaque.h) +* [tensorflow/lite/c/common.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/common.h) +* [tensorflow/lite/c/builtin_op_data.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/c/builtin_op_data.h) +* [tensorflow/lite/builtin_ops.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/builtin_ops.h) + +Again, experimental symbols are not covered; see [below](#not_covered) for +details. + + - * Private symbols: any function, class, etc., whose name start with `_` - * Experimental and `tf.contrib` symbols, see [below](#not_covered) for - details. - - Note that the code in the `examples/` and `tools/` directories is not - reachable through the `tensorflow` Python module and is thus not covered by - the compatibility guarantee. - - If a symbol is available through the `tensorflow` Python module or its - submodules, but is not documented, then it is **not** considered part of the - public API. - -* The compatibility API (in Python, the `tf.compat` module). At major versions, - we may release utilities and additional endpoints to help users with the - transition to a new major version. These API symbols are deprecated and not - supported (i.e., we will not add any features, and we will not fix bugs - other than to fix vulnerabilities), but they do fall under our compatibility - guarantees. - -* The [C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h). - -* The following protocol buffer files: - - * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto) - * [`config`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto) - * [`event`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/event.proto) - * [`graph`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto) - * [`op_def`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def.proto) - * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/reader_base.proto) - * [`summary`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto) - * [`tensor`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto) - * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto) - * [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto) - - ## What is *not* covered Some parts of TensorFlow can change in backward incompatible ways at any point. @@ -82,21 +196,27 @@ These include: particular, the following are not covered by any compatibility guarantees: - any symbol in the `tf.contrib` module or its submodules; - - any symbol (module, function, argument, property, class, or constant) - whose name contains `experimental` or `Experimental`; or - - any symbol whose fully qualified name includes a module or class which - is itself experimental. This includes fields and submessages of any - protocol buffer called `experimental`. + - any symbol (module, function, argument, property, class, constant, type, + package, etc.) whose name contains `experimental` or `Experimental`; or + - any symbol whose fully qualified name includes a module or class or + package which is itself experimental. This includes fields and + submessages of any protocol buffer called `experimental`. * **Other languages**: TensorFlow APIs in languages other than Python and C, such as: - - [C++](../install/lang_c.md) (exposed through header files in - [`tensorflow/cc`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/cc)). - - [Java](../install/lang_java.md), - - [Go](../install/lang_go.md) + - [C++](../install/lang_c.ipynb) (exposed through header files in + [`tensorflow/cc/`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/cc)). + - [Java](../install/lang_java_legacy.md), + - [Go](https://github.com/tensorflow/build/blob/master/golang_install_guide/README.md) - [JavaScript](https://www.tensorflow.org/js) + and TensorFlow **Lite** APIs in languages other than Java/Kotlin, C, + Objective-C, and Swift, in particular + + - **C++** (exposed through header files in + [`tensorflow/lite/`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/lite/)) + * **Details of composite ops:** Many public functions in Python expand to several primitive ops in the graph, and these details will be part of any graphs saved to disk as `GraphDef`s. These details may change for minor @@ -222,7 +342,8 @@ This section is relevant only when making incompatible changes to the `GraphDef` format, such as when adding ops, removing ops, or changing the functionality of existing ops. The previous section should suffice for most users. - + + ### Backward and partial forward compatibility @@ -253,7 +374,9 @@ guidelines for evolving `GraphDef` versions. There are different data versions for graphs and checkpoints. The two data formats evolve at different rates from each other and also at different rates from TensorFlow. Both versioning systems are defined in -[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h). +[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h) +and +[`core/public/release_version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/release_version.h). Whenever a new version is added, a note is added to the header detailing what changed and the date. @@ -353,7 +476,7 @@ existing producer scripts will not suddenly use the new functionality. 1. Add a new similar op named `SomethingV2` or similar and go through the process of adding it and switching existing Python wrappers to use it. To ensure forward compatibility use the checks suggested in - [compat.py](https://www.tensorflow.org/code/tensorflow/python/compat/compat.py) + [compat.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/compat/compat.py) when changing the Python wrappers. 2. Remove the old op (Can only take place with a major version change due to backward compatibility). diff --git a/site/en/hub/_book.yaml b/site/en/hub/_book.yaml new file mode 100644 index 00000000000..4a969d413bc --- /dev/null +++ b/site/en/hub/_book.yaml @@ -0,0 +1,85 @@ +upper_tabs: +# Tabs left of dropdown menu +- include: /_upper_tabs_left.yaml +- include: /api_docs/_upper_tabs_api.yaml +# Dropdown menu +- name: Resources + path: /resources + is_default: true + menu: + - include: /resources/_menu_toc.yaml + lower_tabs: + # Subsite tabs + other: + # [Guide] + - name: "Guide" + contents: + # TF Hub Platform overview. + - heading: Getting Started + - title: Overview + path: /hub/overview + - title: Installation + path: /hub/installation + - title: Community and support + path: /hub/community + # Python library usage information + - heading: Using the library + - title: Overview + path: /hub/lib_overview + - title: SavedModels for TensorFlow 2 + path: /hub/tf2_saved_model + - title: Caching model downloads + path: /hub/caching + - title: Migration to TF2 + path: /hub/migration_tf2 + - title: Model compatibility for TF1/TF2 + path: /hub/model_compatibility + - title: "Deprecated: TF1 Hub format" + path: /hub/tf1_hub_module + status: deprecated + # SavedModel APIs + - heading: Common SavedModel APIs + - title: Overview + path: /hub/common_saved_model_apis/index.md + - title: Reusable SavedModels (for all tasks) + path: /hub/reusable_saved_models + - title: Image tasks + path: /hub/common_saved_model_apis/images + - title: Text tasks + path: /hub/common_saved_model_apis/text + # Publishing models + - heading: Publishing models + - title: Publishing process + path: /hub/publish + - title: Data portability and deletion + path: /hub/portability_and_deletion + # Advanced developer info + - heading: Advanced developer info + - title: Model formats + path: /hub/model_formats + - title: Model hosting protocol + path: /hub/hosting + - title: Build from source + path: /hub/build_from_source + - title: Common issues + path: /hub/common_issues + - title: Contribute to TensorFlow Hub + path: /hub/contribute + # [Tutorials] + - name: Tutorials + path: /hub/tutorials + contents: + - include: /hub/tutorials/_toc.yaml + # [API] + - name: API + skip_translation: true + contents: + - include: /hub/api_docs/python/hub/_toc.yaml + # [Models] + - name: "Models ↗" + contents: + - title: Models + path: https://tfhub.dev + status: external + +- include: /_upper_tabs_right.yaml diff --git a/site/en/hub/_index.yaml b/site/en/hub/_index.yaml new file mode 100644 index 00000000000..00e67b15265 --- /dev/null +++ b/site/en/hub/_index.yaml @@ -0,0 +1,145 @@ +# This file is rendered on tensorflow.org/hub. +# ../README.md is rendered on github.com/tensorflow/hub. +# Both link to ./overview.md and ./*.md for detailed docs. +book_path: /hub/_book.yaml +project_path: /hub/_project.yaml +description: > + TensorFlow Hub is a repository of trained machine learning models ready for fine-tuning and + deployable anywhere. Reuse trained models like BERT and Faster R-CNN with just a few lines of code. +landing_page: + custom_css_path: /site-assets/css/style.css + rows: + - heading: TensorFlow Hub is a repository of trained machine learning models. + items: + - classname: + tfo-landing-row-item-code-block + devsite-landing-row-50 + description: > + TensorFlow Hub is a repository of trained machine learning models ready for fine-tuning and + deployable anywhere. Reuse trained models like BERT and Faster R-CNN with just a few lines of code. + list: + - heading: See the guide + description: Learn about how to use TensorFlow Hub and how it works. + path: /hub/overview + icon: + path: /hub/images/guide_basics.png + - heading: See tutorials + description: Tutorials show you end-to-end examples using TensorFlow Hub. + path: /hub/tutorials + icon: + path: /site-assets/images/marketing/learn/lite-pick.svg + - heading: See models + description: Find trained TF, TFLite, and TF.js models for your use case. + path: https://tfhub.dev + icon: + path: /site-assets/images/marketing/learn/js-run.svg + code_block: | +
+          !pip install --upgrade tensorflow_hub
+
+          import tensorflow_hub as hub
+
+          model = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim128/2")
+          embeddings = model(["The rain in Spain.", "falls",
+                              "mainly", "In the plain!"])
+
+          print(embeddings.shape)  #(4,128)
+        
+ - options: + - cards + - centered-header + heading: > +

Models + description: > + Find trained models from the TensorFlow community on TFHub.dev + items: + - heading: BERT + description: Check out BERT for NLP tasks including text classification and question answering. + path: https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3 + image_path: /hub/images/bert.png + buttons: + - label: See the model + path: https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3 + - heading: Object detection + description: Use the Faster R-CNN Inception ResNet V2 640x640 model for detecting objects in images. + path: https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1 + image_path: /hub/images/object_detection.png + buttons: + - label: See the model + path: https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1 + - heading: Style transfer + description: Transfer the style of one image to another using the image style transfer model. + path: https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2 + image_path: /hub/images/style_transfer.png + buttons: + - label: See the model + path: https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2 + - heading: On-device food classifier + description: Use this TFLite model to classify photos of food on a mobile device. + path: https://tfhub.dev/google/lite-model/aiy/vision/classifier/food_V1/1 + image_path: /hub/images/food.png + buttons: + - label: See the model + path: https://tfhub.dev/google/lite-model/aiy/vision/classifier/food_V1/1 + - options: + - cards + - centered-header + heading: > +

News & announcements + description: > + Check out our blog for more announcements and view the latest #TFHub updates on Twitter + items: + - heading: TensorFlow Hub for Real World Impact at Google I/O + youtube_id: BE5nkhFe3AE + description: > + Learn how you can use TensorFlow Hub to build ML solutions with real world impact. + buttons: + - label: Watch the video + path: https://www.youtube.com/watch?v=BE5nkhFe3AE + - heading: "On-device ML solutions" + description: > + To explore ML solutions for your mobile and web apps including TensorFlow Hub, visit the Google on-device machine learning page. + path: https://g.co/on-device-ml + image_path: /hub/images/odml.png + buttons: + - label: Visit the site + path: https://g.co/on-device-ml + - heading: "Making BERT Easier with Preprocessing Models From TensorFlow Hub" + description: > + TensorFlow Hub makes BERT simple to use with new preprocessing models. + path: https://blog.tensorflow.org/2020/12/making-bert-easier-with-preprocessing-models-from-tensorflow-hub.html + image_path: /hub/images/bert_preprocess_wide.png + buttons: + - label: Read the blog + path: https://blog.tensorflow.org/2020/12/making-bert-easier-with-preprocessing-models-from-tensorflow-hub.html + - heading: "From singing to musical scores: Estimating pitch with SPICE and Tensorflow Hub" + description: > + Learn how to use the SPICE model to automatically transcribe sheet music from live audio. + path: https://blog.tensorflow.org/2020/06/estimating-pitch-with-spice-and-tensorflow-hub.html + image_path: /hub/images/spice_blog.png + buttons: + - label: Read the blog + path: https://blog.tensorflow.org/2020/06/estimating-pitch-with-spice-and-tensorflow-hub.html + - options: + - cards + - centered-header + heading: > +

Community + description: Join the TensorFlow Hub community + items: + - heading: TensorFlow Hub on GitHub + icon: + path: /hub/images/github_icon.svg + path: https://github.com/tensorflow/hub + - heading: Contribute models + icon: + name: publish + path: /hub/publish + - options: + - cta + items: + - heading: Get started with TensorFlow Hub + buttons: + - label: Find trained models + path: https://tfhub.dev + classname: button diff --git a/site/en/hub/_redirects.yaml b/site/en/hub/_redirects.yaml new file mode 100644 index 00000000000..bee1cbec873 --- /dev/null +++ b/site/en/hub/_redirects.yaml @@ -0,0 +1,7 @@ +redirects: +- from: /hub/becoming_a_publisher + to: /hub/publish +- from: /hub/writing_model_documentation + to: /hub/writing_documentation#model +- from: /hub/creating_a_collection + to: /hub/writing_documentation#collection diff --git a/site/en/hub/build_from_source.md b/site/en/hub/build_from_source.md new file mode 100644 index 00000000000..42e19eb6208 --- /dev/null +++ b/site/en/hub/build_from_source.md @@ -0,0 +1,195 @@ + + + +# Creating the TensorFlow Hub pip package using Linux + +Note: This document is for developers interested in modifying TensorFlow Hub +itself. To _use_ TensorFlow Hub, see the [Install instructions](installation.md) + +If you make changes to TensorFlow Hub pip package, you will likely want to +rebuild the pip package from source to try out your changes. + +This requires: + +* Python +* TensorFlow +* Git +* [Bazel](https://docs.bazel.build/versions/master/install.html) + +Alternatively, if you install the protobuf compiler you can +[try out your changes without using bazel](#develop). + +## Setup a virtualenv {:#setup} + +### Activate virtualenv + +Install virtualenv if it's not installed already: + +```shell +~$ sudo apt-get install python-virtualenv +``` + +Create a virtual environment for the package creation: + +```shell +~$ virtualenv --system-site-packages tensorflow_hub_env +``` + +And activate it: + +```shell +~$ source ~/tensorflow_hub_env/bin/activate # bash, sh, ksh, or zsh +~$ source ~/tensorflow_hub_env/bin/activate.csh # csh or tcsh +``` + +### Clone the TensorFlow Hub repository. + +```shell +(tensorflow_hub_env)~/$ git clone https://github.com/tensorflow/hub +(tensorflow_hub_env)~/$ cd hub +``` + +## Test your changes + +### Run TensorFlow Hub's tests + +```shell +(tensorflow_hub_env)~/hub/$ bazel test tensorflow_hub:all +``` + +## Build and install the package + +### Build TensorFlow Hub pip packaging script + +To build a pip package for TensorFlow Hub: + +```shell +(tensorflow_hub_env)~/hub/$ bazel build tensorflow_hub/pip_package:build_pip_package +``` + +### Create the TensorFlow Hub pip package + +```shell +(tensorflow_hub_env)~/hub/$ bazel-bin/tensorflow_hub/pip_package/build_pip_package \ +/tmp/tensorflow_hub_pkg +``` + +### Install and test the pip package (optional) + +Run the following commands to install the pip package. + +```shell +(tensorflow_hub_env)~/hub/$ pip install /tmp/tensorflow_hub_pkg/*.whl +``` + +Test import TensorFlow Hub: + +```shell +(tensorflow_hub_env)~/hub/$ cd .. # exit the directory to avoid confusion +(tensorflow_hub_env)~/$ python -c "import tensorflow_hub as hub" +``` + +## "Developer" install (experimental) + + + +Warning: This approach to running TensorFlow is experimental, and not officially +supported by the TensorFlow Hub team. + +Building the package with bazel is the only officially supported method. However +if you are unfamiliar with bazel simpler to work with open source tools. For +that you can do a "developer install" of the package. + +This installation method allows you to install the working directory into your +python environment, so that ongoing changes are reflected when you import the +package. + +### Setup the repository + +First setup the virtualenv and repository, as described [above](#setup). + +### Install `protoc` + +Because TensorFlow Hub uses protobufs you will need the protobuf compiler to +create the necessary python `_pb2.py` files from the `.proto` files. + +#### On a Mac: + +``` +(tensorflow_hub_env)~/hub/$ brew install protobuf +``` + +#### On Linux + +``` +(tensorflow_hub_env)~/hub/$ sudo apt install protobuf-compiler +``` + +### Compile the `.proto` files + +Initially there are no `_pb2.py` files in the directory: + +``` +(tensorflow_hub_env)~/hub/$ ls -1 tensorflow_hub/*_pb2.py +``` + +Run `protoc` to create them: + +``` +(tensorflow_hub_env)~/hub/$ protoc -I=tensorflow_hub --python_out=tensorflow_hub tensorflow_hub/*.proto +(tensorflow_hub_env)~/hub/$ ls -1 tensorflow_hub/*_pb2.py +``` + +
+tensorflow_hub/image_module_info_pb2.py
+tensorflow_hub/module_attachment_pb2.py
+tensorflow_hub/module_def_pb2.py
+
+ +Note: Don't forget to recompile the `_pb2.py` files if you make changes to the +`.proto` definitions. + +### Import directly from the repository + +With the `_pb2.py` files in place, you can use try out your modifications +directly from the TensorFlow Hub directory: + +``` +(tensorflow_hub_env)~/$ python -c "import tensorflow_hub as hub" +``` + +### Install in "developer" mode + +Or to use this from outside the repository root, you can use the `setup.py +develop` installation: + +``` +(tensorflow_hub_env)~/hub/$ python tensorflow_hub/pip_package/setup.py develop +``` + +Now you can use your local changes in a regular python virtualenv, without the +need to rebuild and install the pip package for each new change: + +```shell +(tensorflow_hub_env)~/hub/$ cd .. # exit the directory to avoid confusion +(tensorflow_hub_env)~/$ python -c "import tensorflow_hub as hub" +``` + +## De-activate the virtualenv + +```shell +(tensorflow_hub_env)~/hub/$ deactivate +``` diff --git a/site/en/hub/caching.md b/site/en/hub/caching.md new file mode 100644 index 00000000000..678b2c22af0 --- /dev/null +++ b/site/en/hub/caching.md @@ -0,0 +1,86 @@ + +# Caching model downloads from TF Hub + +## Overview + +The `tensorflow_hub` library currently supports two modes for downloading +models. By default, a model is downloaded as a compressed archive and cached on +disk. Secondly, models can directly be read from remote storage into TensorFlow. +Either way, the calls to `tensorflow_hub` functions in the actual Python code +can and should continue to use the canonical tfhub.dev URLs of models, which are +portable across systems and navigable for documentation. In the rare case that +user code needs the actual filesystem location (after downloading and +decompressing, or after resolving a model handle into a filesystem path), +it can be obtained by the function `hub.resolve(handle)`. + +### Caching of compressed downloads + +The `tensorflow_hub` library by default caches models on the filesystem when +they have been downloaded from tfhub.dev (or other [hosting sites](hosting.md)) +and decompressed. This mode is recommended for most environments, except if disk +space is scarce but network bandwidth and latency are superb. + +The download location defaults to a local temporary directory but can be +customized by setting the environment variable `TFHUB_CACHE_DIR` (recommended) +or by passing the command-line flag `--tfhub_cache_dir`. The default cache +location `/tmp/tfhub_modules` (or whatever `os.path.join(tempfile.gettempdir(), +"tfhub_modules")` is evaluated to) should work in most cases. + +Users who prefer persistent caching across system reboots can instead set +`TFHUB_CACHE_DIR` to a location in their home directory. For example, a user of +the bash shell on a Linux system can add a line like the following to +`~/.bashrc`: + +```bash +export TFHUB_CACHE_DIR=$HOME/.cache/tfhub_modules +``` + +...restart the shell, and then this location will be used. When using a +persistent location, be aware that there is no automatic cleanup. + +### Reading from remote storage + +Users can instruct the `tensorflow_hub` library to directly read models from +remote storage (GCS) instead of downloading the models locally with: + +```shell +os.environ["TFHUB_MODEL_LOAD_FORMAT"] = "UNCOMPRESSED" +``` + +or by setting the command-line flag `--tfhub_model_load_format` to +`UNCOMPRESSED`. This way, no caching directory is needed, which is especially +helpful in environments that provide little disk space but a fast internet +connection. + +### Running on TPU in Colab notebooks + +On [colab.research.google.com](https://colab.research.google.com), downloading +compressed models will conflict with the TPU runtime since the computation +workload is delegated to another machine that does not have access to the cache +location by default. There are two workarounds for this situation: + +#### 1) Use a GCS bucket that the TPU worker can access + +The easiest solution is to instruct the `tensorflow_hub` library to read the +models from TF Hub's GCS bucket as explained above. Users with their own GCS +bucket can instead specify a directory in their bucket as the cache location +with code like: + +```python +import os +os.environ["TFHUB_CACHE_DIR"] = "gs://my-bucket/tfhub-modules-cache" +``` + +...before calling the `tensorflow_hub` library. + +#### 2) Redirect all reads through the Colab host + +Another workaround is to redirect all reads (even of large variables) through +the Colab host: + +```python +load_options = +tf.saved_model.LoadOptions(experimental_io_device='/job:localhost') +reloaded_model = hub.load("https://tfhub.dev/...", options=load_options) +``` +**Note:** See more information regarding valid handles [here](tf2_saved_model.md#model_handles). diff --git a/site/en/hub/common_issues.md b/site/en/hub/common_issues.md new file mode 100644 index 00000000000..03ba4a62a8e --- /dev/null +++ b/site/en/hub/common_issues.md @@ -0,0 +1,148 @@ + +# Common issues + +If your issue is not listed here, please search the +[github issues](https://github.com/tensorflow/hub/issues) before filling a new +one. + +**Note:** This documentation uses TFhub.dev URL handles in examples. See more +information regarding other valid handle types [here](tf2_saved_model.md#model_handles). + +## TypeError: 'AutoTrackable' object is not callable + +```python +# BAD: Raises error +embed = hub.load('https://tfhub.dev/google/nnlm-en-dim128/1') +embed(['my text', 'batch']) +``` + +This error frequently arises when loading models in TF1 Hub format with the +`hub.load()` API in TF2. Adding the correct signature should fix this problem. +See the [TF-Hub migration guide for TF2](migration_tf2.md) for more details on +moving to TF2 and the use of models in TF1 Hub format in TF2. + +```python + +embed = hub.load('https://tfhub.dev/google/nnlm-en-dim128/1') +embed.signatures['default'](['my text', 'batch']) +``` + +## Cannot download a module + +In the process of using a module from an URL there are many errors that can show +up due to the network stack. Often this is a problem specific to the machine +running the code and not an issue with the library. Here is a list of the common +ones: + +* **"EOF occurred in violation of protocol"** - This issue is likely to be + generated if the installed python version does not support the TLS + requirements of the server hosting the module. Notably, python 2.7.5 is + known to fail resolving modules from tfhub.dev domain. **FIX**: Please + update to a newer python version. + +* **"cannot verify tfhub.dev's certificate"** - This issue is likely to be + generated if something on the network is trying to act as the dev gTLD. + Before .dev was used as a gTLD, developers and frameworks would sometimes + use .dev names to help testing code. **FIX:** Identify and reconfigure the + software that intercepts name resolution in the ".dev" domain. + +* Failures to write to the cache directory `/tmp/tfhub_modules` (or similar): + see [Caching](caching.md) for what that is and how to change its location. + +If the above errors and fixes do not work, one can try to manually download a +module by simulating the protocol of attaching `?tf-hub-format=compressed` to +the URL to download a tar compressed file that has to be manually decompressed +into a local file. The path to the local file can then be used instead of the +URL. Here is a quick example: + +```bash +# Create a folder for the TF hub module. +$ mkdir /tmp/moduleA +# Download the module, and uncompress it to the destination folder. You might want to do this manually. +$ curl -L "https://tfhub.dev/google/universal-sentence-encoder/2?tf-hub-format=compressed" | tar -zxvC /tmp/moduleA +# Test to make sure it works. +$ python +> import tensorflow_hub as hub +> hub.Module("/tmp/moduleA") +``` + +## Running inference on a pre-initialized module + +If you are writing a Python program that applies a module many times on input +data, you can apply the following recipes. (Note: For serving requests in +production services, consider +[TensorFlow Serving](https://www.tensorflow.org/tfx/guide/serving) or other +scalable, Python-free solutions.) + +Assuming your use-case model is **initialization** and subsequent **requests** +(for example Django, Flask, custom HTTP server, etc.), you can set-up the +serving as follows: + +### TF2 SavedModels + +* In the initialization part: + * Load the TF2.0 model. + +```python +import tensorflow_hub as hub + +embedding_fn = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4") +``` + +* In the request part: + * Use the embedding function to run inference. + +```python +embedding_fn(["Hello world"]) +``` + +This call of a tf.function is optimized for performance, see +[tf.function guide](https://www.tensorflow.org/guide/function). + +### TF1 Hub modules + +* In the initialization part: + * Build the graph with a **placeholder** - entry point into the graph. + * Initialize the session. + +```python +import tensorflow as tf +import tensorflow_hub as hub + +# Create graph and finalize (finalizing optional but recommended). +g = tf.Graph() +with g.as_default(): + # We will be feeding 1D tensors of text into the graph. + text_input = tf.placeholder(dtype=tf.string, shape=[None]) + embed = hub.Module("https://tfhub.dev/google/universal-sentence-encoder/2") + embedded_text = embed(text_input) + init_op = tf.group([tf.global_variables_initializer(), tf.tables_initializer()]) +g.finalize() + +# Create session and initialize. +session = tf.Session(graph=g) +session.run(init_op) +``` + +* In the request part: + * Use the session to feed data into the graph through the placeholder. + +```python +result = session.run(embedded_text, feed_dict={text_input: ["Hello world"]}) +``` + +## Cannot change a model's dtype (e.g., float32 to bfloat16) + +TensorFlow's SavedModels (shared on TF Hub or otherwise) contain operations that +work on fixed data types (often, float32 for the weights and intermediate +activations of neural networks). These cannot be changed after the fact when +loading the SavedModel (but model publishers can choose to publish different +models with different data types). + +## Update a model version + +The documentation metadata of model versions can be updated. However, the +version's assets (model files) are immutable. If you want to change the model +assets, you can publish a newer version of the model. It's a good practice to +extend the documentation with a change log that describes what changed between +versions. diff --git a/site/en/hub/common_saved_model_apis/images.md b/site/en/hub/common_saved_model_apis/images.md new file mode 100644 index 00000000000..5413f0adc07 --- /dev/null +++ b/site/en/hub/common_saved_model_apis/images.md @@ -0,0 +1,155 @@ + +# Common SavedModel APIs for Image Tasks + +This page describes how [TF2 SavedModels](../tf2_saved_model.md) for +image-related tasks should implement the +[Reusable SavedModel API](../reusable_saved_models.md). (This replaces the +[Common Signatures for Images](../common_signatures/images.md) for the +now-deprecated [TF1 Hub format](../tf1_hub_module).) + + + +## Image Feature Vector + +### Usage summary + +An **image feature vector** is a dense 1-D tensor that represents a whole image, +typically for use by a simple feed-forward classifier in the consumer model. (In +terms of classic CNNs, this is the bottleneck value after the spatial extent has +been pooled or flattened away, but before classification is done; for that, see +[image classification](#classification) below.) + +A Reusable SavedModel for image feature extraction has a `__call__` method on +the root object that maps a batch of images to a batch of feature vectors. It +can be used like so: + +```python +obj = hub.load("path/to/model") # That's tf.saved_model.load() after download. +images = ... # A batch of images with shape [batch_size, height, width, 3]. +features = obj(images) # A batch with shape [batch_size, num_features]. +``` + +In Keras, the equivalent is + +```python +features = hub.KerasLayer("path/to/model")(images) +``` + +The input follows the general convention for [input of images](#input). The +model documentation specifies the permissible range for `height` and `width` of +the input. + +The output is a single tensor of dtype `float32` and shape `[batch_size, +num_features]`. The `batch_size` is the same as in the input. `num_features` is +a module-specific constant independent of input size. + +### API details + +The [Reusable SavedModel API](../reusable_saved_models.md) also provides a list +of `obj.variables` (e.g., for initialization when not loading eagerly). + +A model that supports fine-tuning provides a list of `obj.trainable_variables`. +It may require you to pass `training=True` to execute in training mode (e.g., +for dropout). Some models allow optional arguments to override hyperparameters +(e.g., dropout rate; to be described in model documentation). The model may also +provide a list of `obj.regularization_losses`. For details, see the +[Reusable SavedModel API](../reusable_saved_models.md). + +In Keras, this is taken care of by `hub.KerasLayer`: initialize it with +`trainable=True` to enable fine-tuning, and (in the rare case that hparam +overrides apply) with `arguments=dict(some_hparam=some_value, ...))`. + +### Notes + +Applying dropout to the output features (or not) should be left to the model +consumer. The SavedModel itself should not perform dropout on the actual outputs +(even if it uses dropout internally in other places). + +### Examples + +Reusable SavedModels for image feature vectors are used in + +* the Colab tutorial + [Retraining an Image Classifier](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb), + + + +## Image Classification + +### Usage summary + +**Image classification** maps the pixels of an image to linear scores (logits) +for membership in the classes of a taxonomy _selected by the module publisher_. +This allows model consumers to to draw conclusions from the particular +classification learned by the publisher module. (For image classification with +a new set of classes, it is common to reuse an +[Image Feature Vector](#feature-vector) model with a new classifier instead.) + +A Reusable SavedModel for image classification has a `__call__` method on the +root object that maps a batch of images to a batch of logits. It can be used +like so: + +```python +obj = hub.load("path/to/model") # That's tf.saved_model.load() after download. +images = ... # A batch of images with shape [batch_size, height, width, 3]. +logits = obj(images) # A batch with shape [batch_size, num_classes]. +``` + +In Keras, the equivalent is + +```python +logits = hub.KerasLayer("path/to/model")(images) +``` + +The input follows the general convention for [input of images](#input). The +model documentation specifies the permissible range for `height` and `width` of +the input. + +The output `logits` is a single tensor of dtype `float32` and shape +`[batch_size, num_classes]`. The `batch_size` is the same as in the input. +`num_classes` is the number of classes in the classification, which is a +model-specific constant. + +The value `logits[i, c]` is a score predicting the membership of example `i` in +the class with index `c`. + +It depends on the underlying classification whether these scores are meant to be +used with softmax (for mutually exclusive classes), sigmoid (for orthogonal +classes), or something else. The module documentation should describe this, and +refer to a definition of the class indices. + +### API details + +The [Reusable SavedModel API](../reusable_saved_models.md) also provides a list +of `obj.variables` (e.g., for initialization when not loading eagerly). + +A model that supports fine-tuning provides a list of `obj.trainable_variables`. +It may require you to pass `training=True` to execute in training mode (e.g., +for dropout). Some models allow optional arguments to override hyperparameters +(e.g., dropout rate; to be described in model documentation). The model may also +provide a list of `obj.regularization_losses`. For details, see the +[Reusable SavedModel API](../reusable_saved_models.md). + +In Keras, this is taken care of by `hub.KerasLayer`: initialize it with +`trainable=True` to enable fine-tuning, and (in the rare case that hparam +overrides apply) with `arguments=dict(some_hparam=some_value, ...))`. + + + +## Image input + +This is common to all types of image models. + +A model that takes a batch of images as input accepts them as a dense 4-D tensor +of dtype `float32` and shape `[batch_size, height, width, 3]` whose elements are +RGB color values of pixels normalized to the range [0, 1]. This is what you get +from `tf.image.decode_*()` followed by `tf.image.convert_image_dtype(..., +tf.float32)`. + +The model accepts any `batch_size`. The model documentation specifies the +permissible range for `height` and `width`. The last dimension is fixed to 3 RGB +channels. + +It is recommended that models use the `channels_last` (or `NHWC`) layout of +Tensors throughout, and leave it to TensorFlow's graph optimizer to rewrite to +`channels_first` (or `NCHW`) if needed. diff --git a/site/en/hub/common_saved_model_apis/index.md b/site/en/hub/common_saved_model_apis/index.md new file mode 100644 index 00000000000..356505f9952 --- /dev/null +++ b/site/en/hub/common_saved_model_apis/index.md @@ -0,0 +1,46 @@ + +# Common SavedModel APIs for TF Hub + +## Introduction + +[TensorFlow Hub](https://tfhub.dev) hosts models for a variety of tasks. Models +for the same task are encouraged to implement a common API so that model +consumers can easily exchange them without modifying the code that uses them, +even if they come from different publishers. + +The goal is to make exchanging different models for the same task as simple as +switching a string-valued hyperparameter. With that, model consumers can easily +find the best one for their problem. + +This directory collects specifications of common APIs for models in the +[TF2 SavedModel format](../tf2_saved_model.md). (It replaces the +[Common Signatures](../common_signatures/index.md) for the now-deprecated +[TF1 Hub format](../tf1_hub_module.md).) + +## Reusable SavedModel: the common foundation + +The [Reusable SavedModel API](../reusable_saved_models.md) defines general +conventions how to load a SavedModel back into a Python program and reuse it as +part of a bigger TensorFlow model. + +Basic usage: + +```python +obj = hub.load("path/to/model") # That's tf.saved_model.load() after download. +outputs = obj(inputs, training=False) # Invokes the tf.function obj.__call__. +``` + +Key point: This uses the object-based interface to restored SavedModels that was +added in TensorFlow 2, not the SavedModel signatures for serving. + +For Keras users, the `hub.KerasLayer` class relies on this API to wrap the +Reusable SavedModel as a Keras Layer (shielding Keras users from its details), +with inputs and outputs according to the task-specific APIs listed below. + +## Task-specific APIs + +These refine the [Reusable SavedModel API](../reusable_saved_models.md) with +conventions for particular ML tasks and types of data. + +* [Image tasks](images.md) +* [Text tasks](text.md) diff --git a/site/en/hub/common_saved_model_apis/text.md b/site/en/hub/common_saved_model_apis/text.md new file mode 100644 index 00000000000..209319f27a9 --- /dev/null +++ b/site/en/hub/common_saved_model_apis/text.md @@ -0,0 +1,361 @@ + +# Common SavedModel APIs for Text Tasks + +This page describes how [TF2 SavedModels](../tf2_saved_model.md) for +text-related tasks should implement the +[Reusable SavedModel API](../reusable_saved_models.md). (This replaces and +extends the [Common Signatures for Text](../common_signatures/text.md) for the +now-deprecated [TF1 Hub format](../tf1_hub_module).) + +## Overview + +There are several APIs to compute **text embeddings** (also known as dense +representations of text, or text feature vectors). + +* The API for *text embeddings from text inputs* is implemented by a + SavedModel that maps a batch of strings to a batch of embedding vectors. + This is very easy to use, and many models on TF Hub have implemented it. + However, this does not allow fine-tuning the model on TPU. + +* The API for *text embeddings with preprocessed inputs* solves the same task, + but is implemented by two separate SavedModels: + + * a *preprocessor* that can run inside a tf.data input pipeline and + converts strings and other variable-length data into numeric Tensors, + * an *encoder* that accepts the results of the preprocessor and performs + the trainable part of the embedding computation. + + This split allows inputs to be preprocessed asynchronously before being fed + into the training loop. In particular, it allows building encoders that can + be run and fine-tuned on [TPU](https://www.tensorflow.org/guide/tpu). + +* The API for *text embeddings with Transformer encoders* extends the API for + text embeddings from preprocessed inputs to the particular case of BERT and + other Transformer encoders. + + * The *preprocessor* is extended to build encoder inputs from more than + one segment of input text. + * The *Transformer encoder* exposes the context-aware embeddings of + individual tokens. + +In each case, the text inputs are UTF-8 encoded strings, typically of plain +text, unless the model documentation provides otherwise. + +Regardless of API, different models have been pre-trained on text from different +languages and domains, and with different tasks in mind. Therefore, not every +text embedding model is suitable for every problem. + + + + +## Text Embedding from Text Inputs + +A SavedModel for **text embeddings from text inputs** accepts a batch of inputs +in a string Tensor of shape `[batch_size]` and maps them to a float32 Tensor of +shape `[batch_size, dim]` with dense representations (feature vectors) of the +inputs. + +### Usage synopsis + +```python +obj = hub.load("path/to/model") +text_input = ["A long sentence.", + "single-word", + "http://example.com/"] +embeddings = obj(text_input) +``` + +Recall from the [Reusable SavedModel API](../reusable_saved_models.md) that +running the model in training mode (e.g., for dropout) may require a keyword +argument `obj(..., training=True)`, and that `obj` provides attributes +`.variables`, `.trainable_variables` and `.regularization_losses` as applicable. + +In Keras, all this is taken care of by + +```python +embeddings = hub.KerasLayer("path/to/model", trainable=...)(text_input) +``` + +### Distributed training + +If the text embedding is used as part of a model that gets trained with a +distribution strategy, the call to `hub.load("path/to/model")` or +`hub.KerasLayer("path/to/model", ...)`, resp., must happen inside the +DistributionStrategy scope in order to create the model's variables in the +distributed way. For example + +```python + with strategy.scope(): + ... + model = hub.load("path/to/model") + ... +``` + +### Examples + +* Colab tutorial + [Text Classification with Movie Reviews](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb). + + + +## Text Embeddings with Preprocessed Inputs + +A **text embedding with preprocessed inputs** is implemented by two separate +SavedModels: + +* a **preprocessor** that maps a string Tensor of shape `[batch_size]` to a + dict of numeric Tensors, +* an **encoder** that accepts a dict of Tensors as returned by the + preprocessor, performs the trainable part of the embedding computation, and + returns a dict of outputs. The output under key `"default"` is a float32 + Tensor of shape `[batch_size, dim]`. + +This allows to run the preprocessor in an input pipeline but fine-tune the +embeddings computed by the encoder as part of a larger model. In particular, it +allows to build encoders that can be run and fine-tuned on +[TPU](https://www.tensorflow.org/guide/tpu). + +It is an implementation detail which Tensors are contained in the preprocessor's +output, and which (if any) additional Tensors besides `"default"` are contained +in the encoder's output. + +The documentation of the encoder must specify which preprocessor to use with it. +Typically, there is exactly one correct choice. + +### Usage synopsis + +```python +text_input = tf.constant(["A long sentence.", + "single-word", + "http://example.com/"]) +preprocessor = hub.load("path/to/preprocessor") # Must match `encoder`. +encoder_inputs = preprocessor(text_input) + +encoder = hub.load("path/to/encoder") +encoder_outputs = encoder(encoder_inputs) +embeddings = encoder_outputs["default"] +``` + +Recall from the [Reusable SavedModel API](../reusable_saved_models.md) that +running the encoder in training mode (e.g., for dropout) may require a keyword +argument `encoder(..., training=True)`, and that `encoder` provides attributes +`.variables`, `.trainable_variables` and `.regularization_losses` as applicable. + +The `preprocessor` model may have `.variables` but is not meant to be trained +further. Preprocessing is not mode-dependent: if `preprocessor()` has a +`training=...` argument at all, it has no effect. + +In Keras, all this is taken care of by + +```python +encoder_inputs = hub.KerasLayer("path/to/preprocessor")(text_input) +encoder_outputs = hub.KerasLayer("path/to/encoder", trainable=True)(encoder_inputs) +embeddings = encoder_outputs["default"] +``` + +### Distributed training + +If the encoder is used as part of a model that gets trained with a distribution +strategy, the call to `hub.load("path/to/encoder")` or +`hub.KerasLayer("path/to/encoder", ...)`, resp., must happen inside + +```python + with strategy.scope(): + ... +``` + +in order to re-create the encoder variables in the distributed way. + +Likewise, if the preprocessor is part of the trained model (as in the simple +example above), it also needs to be loaded under the distribution strategy +scope. If, however, the preprocessor is used in an input pipeline (e.g., in a +callable passed to `tf.data.Dataset.map()`), its loading must happen *outside* +the distribution strategy scope, in order to place its variables (if any) on the +host CPU. + +### Examples + +* Colab tutorial + [Classify text with BERT](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/text/classify_text_with_bert.ipynb). + + + +## Text embeddings with Transformer Encoders + +Transformer encoders for text operate on a batch of input sequences, each +sequence comprising *n* ≥ 1 segments of tokenized text, within some +model-specific bound on *n*. For BERT and many of its extensions, that bound is +2, so they accept single segments and segment pairs. + +The API for **text embeddings with Transformer encoders** extends the API for +text embeddings with preprocessed inputs to this setting. + +### Preprocessor + +A preprocessor SavedModel for text embeddings with Transformer encoders +implements the API of a preprocessor SavedModel for text embeddings with +preprocessed inputs (see above), which provides a way to map single-segment text +inputs directly to encoder inputs. + +In addition, the preprocessor SavedModel provides callable subobjects `tokenize` +for tokenization (separately per segment) and `bert_pack_inputs` for packing *n* +tokenized segments into one input sequence for the encoder. Each subobject +follows the [Reusable SavedModel API](../reusable_saved_models.md). + +#### Usage synopsis + +As a concrete example for two segments of text, let us look at a sentence +entailment task that asks whether a premise (first segment) does or does not +imply a hypothesis (second segment). + +```python +preprocessor = hub.load("path/to/preprocessor") + +# Tokenize batches of both text inputs. +text_premises = tf.constant(["The quick brown fox jumped over the lazy dog.", + "Good day."]) +tokenized_premises = preprocessor.tokenize(text_premises) +text_hypotheses = tf.constant(["The dog was lazy.", # Implied. + "Axe handle!"]) # Not implied. +tokenized_hypotheses = preprocessor.tokenize(text_hypotheses) + +# Pack input sequences for the Transformer encoder. +seq_length = 128 +encoder_inputs = preprocessor.bert_pack_inputs( + [tokenized_premises, tokenized_hypotheses], + seq_length=seq_length) # Optional argument. +``` + +In Keras, this computation can be expressed as + +```python +tokenize = hub.KerasLayer(preprocessor.tokenize) +tokenized_hypotheses = tokenize(text_hypotheses) +tokenized_premises = tokenize(text_premises) + +bert_pack_inputs = hub.KerasLayer( + preprocessor.bert_pack_inputs, + arguments=dict(seq_length=seq_length)) # Optional argument. +encoder_inputs = bert_pack_inputs([tokenized_premises, tokenized_hypotheses]) +``` + +#### Details of `tokenize` + +A call to `preprocessor.tokenize()` accepts a string Tensor of shape +`[batch_size]` and returns a +[RaggedTensor](https://www.tensorflow.org/guide/ragged_tensor) of shape +`[batch_size, ...]` whose values are int32 token ids representing the input +strings. There can be *r* ≥ 1 ragged dimensions after `batch_size` but no other +uniform dimension. + +* If *r*=1, the shape is `[batch_size, (tokens)]`, and each input is simply + tokenized into a flat sequence of tokens. +* If *r*>1, there are *r*-1 additional levels of grouping. For example, + [tensorflow_text.BertTokenizer](https://github.com/tensorflow/text/blob/v2.3.0/tensorflow_text/python/ops/bert_tokenizer.py#L138) + uses *r*=2 to group tokens by words and yields shape `[batch_size, (words), + (tokens_per_word)]`. It is up to the model at hand how many of these extra + level(s) exist, if any, and what groupings they represent. + +The user can (but need not) modify tokenized inputs, e.g., to accommodate the +seq_length limit that will be enforced in packing encoder inputs. Extra +dimensions in the tokenizer output can help here (e.g., to respect word +boundaries) but become meaningless in the next step. + +In terms of the [Reusable SavedModel API](../reusable_saved_models.md), the +`preprocessor.tokenize` object may have `.variables` but is not meant to be +trained further. Tokenization is not mode-dependent: if +`preprocessor.tokenize()` has a `training=...` argument at all, it has no +effect. + +#### Details of `bert_pack_inputs` + +A call to `preprocessor.bert_pack_inputs()` accepts a Python list of tokenized +inputs (batched separately for each input segment) and returns a dict of Tensors +representing a batch of fixed-length input sequences for the Transformer encoder +model. + +Each tokenized input is an int32 RaggedTensor of shape `[batch_size, ...]`, +where the number *r* of ragged dimensions after batch_size is either 1 or the +same as in the output of `preprocessor.tokenize().` (The latter is for +convenience only; the extra dimensions are flattened out before packing.) + +Packing adds special tokens around the input segments as expected by the +encoder. The `bert_pack_inputs()` call implements exactly the packing scheme +used by the original BERT models and many of their extensions: the packed +sequence starts with one start-of-sequence token, followed by the tokenized +segments, each terminated by one end-of-segment token. Remaining positions up to +seq_length, if any, are filled up with padding tokens. + +If a packed sequence would exceed seq_length, `bert_pack_inputs()` truncates its +segments to prefixes of approximately equal sizes so that the packed sequence +fits exactly within seq_length. + +Packing is not mode-dependent: if `preprocessor.bert_pack_inputs()` has a +`training=...` argument at all, it has no effect. Also, +`preprocessor.bert_pack_inputs` is not expected to have variables, or support +fine-tuning. + +### Encoder + +The encoder is called on the dict of `encoder_inputs` in the same way as in the +API for text embeddings with preprocessed inputs (see above), including the +provisions from the [Reusable SavedModel API](../reusable_saved_models.md). + +#### Usage synopsis + +```python +encoder = hub.load("path/to/encoder") +encoder_outputs = encoder(encoder_inputs) +``` + +or equivalently in Keras: + +```python +encoder = hub.KerasLayer("path/to/encoder", trainable=True) +encoder_outputs = encoder(encoder_inputs) +``` + +#### Details + +The `encoder_outputs` are a dict of Tensors with the following keys. + + +* `"sequence_output"`: a float32 Tensor of shape `[batch_size, seq_length, + dim]` with the context-aware embedding of each token of every packed input + sequence. +* `"pooled_output"`: a float32 Tensor of shape `[batch_size, dim]` with the + embedding of each input sequence as a whole, derived from sequence_output in + some trainable manner. +* `"default"`, as required by the API for text embeddings with preprocessed + inputs: a float32 Tensor of shape `[batch_size, dim]` with the embedding of + each input sequence. (This might be just an alias of pooled_output.) + +The contents of the `encoder_inputs` are not strictly required by this API +definition. However, for encoders that use BERT-style inputs, it is recommended +to use the following names (from the +[NLP Modeling Toolkit of TensorFlow Model Garden](https://github.com/tensorflow/models/tree/master/official/nlp)) +to minimize friction in interchanging encoders and reusing preprocessor models: + +* `"input_word_ids"`: an int32 Tensor of shape `[batch_size, seq_length]` with + the token ids of the packed input sequence (that is, including a + start-of-sequence token, end-of-segment tokens, and padding). +* `"input_mask"`: an int32 Tensor of shape `[batch_size, seq_length]` with + value 1 at the position of all input tokens present before padding and value + 0 for the padding tokens. +* `"input_type_ids"`: an int32 Tensor of shape `[batch_size, seq_length]` with + the index of the input segment that gave rise to the input token at the + respective position. The first input segment (index 0) includes the + start-of-sequence token and its end-of-segment token. The second and later + segments (if present) include their respective end-of-segment token. Padding + tokens get index 0 again. + +### Distributed training + +For loading the preprocessor and encoder objects inside or outside a +distribution strategy scope, the same rules apply as in the API for text +embeddings with preprocessed inputs (see above). + +### Examples + +* Colab tutorial + [Solve GLUE tasks using BERT on TPU](https://colab.research.google.com/github/tensorflow/text/blob/master/docs/tutorials/bert_glue.ipynb). diff --git a/site/en/hub/common_signatures/images.md b/site/en/hub/common_signatures/images.md new file mode 100644 index 00000000000..5e41c3e2960 --- /dev/null +++ b/site/en/hub/common_signatures/images.md @@ -0,0 +1,155 @@ + +# Common Signatures for Images + +This page describes common signatures that should be implemented by modules in +the [TF1 Hub format](../tf1_hub_module.md) for image-related tasks. (For the +[TF2 SavedModel format](../tf2_saved_model.md), see the analogous +[SavedModel API](../common_saved_model_apis/images.md).) + +Some modules can be used for more than one task (e.g., image classification +modules tend to do some feature extraction on the way). Therefore, each module +provides (1) named signatures for all the tasks anticipated by the publisher, +and (2) a default signature `output = m(images)` for its designated primary +task. + + +## Image Feature Vector + +### Usage summary + +An **image feature vector** is a dense 1-D tensor that represents a whole image, +typically for classification by the consumer model. (Unlike the intermediate +activations of CNNs, it does not offer a spatial breakdown. Unlike [image +classification](#classification), it discards the classification learned +by the publisher model.) + +A module for image feature extraction has a default signature that maps a batch +of images to a batch of feature vectors. It can be used like so: + +```python + module_spec = hub.load_module_spec("path/to/module") + height, width = hub.get_expected_image_size(module_spec) + images = ... # A batch of images with shape [batch_size, height, width, 3]. + module = hub.Module(module_spec) + features = module(images) # A batch with shape [batch_size, num_features]. +``` + +It also defines the corresponding named signature. + +### Signature specification + +The named signature for extracting image feature vectors is invoked as + +```python + outputs = module(dict(images=images), signature="image_feature_vector", + as_dict=True) + features = outputs["default"] +``` + +The input follows the general convention for +[input of images](#input). + +The outputs dictionary contains a `"default"` output of dtype `float32` and +shape `[batch_size, num_features]`. The `batch_size` is the same as in the +input, but not known at graph construction time. `num_features` is a known, +module-specific constant independent of input size. + +These feature vectors are meant to be usable for classification with a simple +feed-forward classifier (like the pooled features from the topmost convolutional +layer in a typical CNN for image classification). + +Applying dropout to the output features (or not) should be left to the module +consumer. The module itself should not perform dropout on the actual outputs +(even if it uses dropout internally in other places). + +The outputs dictionary may provide further outputs, for example, the activations +of hidden layers inside the module. Their keys and values are module-dependent. +It is recommended to prefix architecture-dependent keys with an architecture +name (e.g., to avoid confusing the intermediate layer `"InceptionV3/Mixed_5c"` +with the topmost convolutional layer `"InceptionV2/Mixed_5c"`). + + +## Image Classification + +### Usage summary + +**Image classification** maps the pixels of an image to linear scores (logits) +for membership in the classes of a taxonomy _selected by the module publisher_. +This allows consumers to draw conclusions from the particular classification +learned by the publisher module, and not just its underlying features (cf. +[Image Feature Vector](#feature-vector)). + +A module for image feature extraction has a default signature that maps a batch +of images to a batch of logits. It can be used like so: + +```python + module_spec = hub.load_module_spec("path/to/module") + height, width = hub.get_expected_image_size(module_spec) + images = ... # A batch of images with shape [batch_size, height, width, 3]. + module = hub.Module(module_spec) + logits = module(images) # A batch with shape [batch_size, num_classes]. +``` + +It also defines the corresponding named signature. + +### Signature specification + +The named signature for extracting image feature vectors is invoked as + +```python + outputs = module(dict(images=images), signature="image_classification", + as_dict=True) + logits = outputs["default"] +``` + +The input follows the general convention for +[input of images](#input). + +The outputs dictionary contains a `"default"` output of dtype `float32` and +shape `[batch_size, num_classes]`. The `batch_size` is the same as in the input, +but not known at graph construction time. `num_classes` is the number of classes +in the classification, which is a known constant independent of input size. + +Evaluating `outputs["default"][i, c]` yields a score predicting the membership +of example `i` in the class with index `c`. + +It depends on the underlying classification whether these scores are meant to be +used with softmax (for mutually exclusive classes), sigmoid (for orthogonal +classes), or something else. The module documentation should describe this, +and refer to a definition of the class indices. + +The outputs dictionary may provide further outputs, for example, the activations +of hidden layers inside the module. Their keys and values are module-dependent. +It is recommended to prefix architecture-dependent keys with an architecture +name (e.g., to avoid confusing the intermediate layer `"InceptionV3/Mixed_5c"` +with the topmost convolutional layer `"InceptionV2/Mixed_5c"`). + + +## Image input + +This is common to all types of image modules and image signatures. + +A signature that takes a batch of images as input accepts them as a dense 4-D +tensor of dtype `float32` and shape `[batch_size, height, width, 3]` whose +elements are RGB color values of pixels normalized to the range [0, 1]. This is +what you get from `tf.image.decode_*()` followed by +`tf.image.convert_image_dtype(..., tf.float32)`. + +A module with exactly one (or one principal) input of images uses the name +`"images"` for this input. + +The module accepts any `batch_size`, and correspondingly sets the first +dimension of TensorInfo.tensor_shape to "unknown". The last dimension is fixed +to the number `3` of RGB channels. The `height` and `width` dimensions are +fixed to the expected size of input images. (Future work may remove that +restriction for fully convolutional modules.) + +Consumers of the module should not inspect the shape directly, but obtain +the size information by calling hub.get_expected_image_size() +on the module or module spec, and are expected to resize input images +accordingly (typically before/during batching). + +For simplicity, TF-Hub modules use the `channels_last` +(or `NHWC`) layout of Tensors, and leave it to TensorFlow's graph optimizer +to rewrite to `channels_first` (or `NCHW`) if needed. It has been doing that +by default since TensorFlow version 1.7. diff --git a/site/en/hub/common_signatures/index.md b/site/en/hub/common_signatures/index.md new file mode 100644 index 00000000000..05eacc8b37f --- /dev/null +++ b/site/en/hub/common_signatures/index.md @@ -0,0 +1,25 @@ + +# Common Signatures for TF Hub Modules + +## Introduction + +[TensorFlow Hub](https://tfhub.dev) hosts models for a variety of tasks. Models +for the same task are encouraged to implement a common API so that model +consumers can easily exchange them without modifying the code that uses them, +even if they come from different publishers. + +The goal is to make exchanging different models for the same task as simple as +switching a string-valued hyperparameter. With that, model consumers can easily +find the best one for their problem. + +This directory collects specifications of common signatures for modules in the +[TF1 Hub format](../tf1_hub_module.md). + +Note that the TF1 Hub format has been **deprecated** in favor of the +[TF2 SavedModel format](../tf2_saved_model.md) and its +[Common SavedModel APIs](../common_saved_model_apis/index.md). + +## Signatures + +* [Image Signatures](images.md) +* [Text Signatures](text.md) diff --git a/site/en/hub/common_signatures/text.md b/site/en/hub/common_signatures/text.md new file mode 100644 index 00000000000..3ea8f27c91d --- /dev/null +++ b/site/en/hub/common_signatures/text.md @@ -0,0 +1,46 @@ + +# Common Signatures for Text + +This page describes common signatures that should be implemented by modules in +the [TF1 Hub format](../tf1_hub_module.md) for tasks that accept text inputs. +(For the [TF2 SavedModel format](../tf2_saved_model.md), see the analogous +[SavedModel API](../common_saved_model_apis/text.md).) + +## Text feature vector + +A **text feature vector** module creates a dense vector representation +from text features. +It accepts a batch of strings of shape `[batch_size]` and maps them to +a `float32` tensor of shape `[batch_size, N]`. This is often called +**text embedding** in dimension `N`. + +### Basic usage + +```python + embed = hub.Module("path/to/module") + representations = embed([ + "A long sentence.", + "single-word", + "http://example.com/"]) +``` + +### Feature column usage + +```python + feature_columns = [ + hub.text_embedding_column("comment", "path/to/module", trainable=False), + ] + input_fn = tf.estimator.inputs.numpy_input_fn(features, labels, shuffle=True) + estimator = tf.estimator.DNNClassifier(hidden_units, feature_columns) + estimator.train(input_fn, max_steps=100) +``` + +## Notes + +Modules have been pre-trained on different domains and/or tasks, +and therefore not every text feature vector module would be suitable for +your problem. E.g.: some modules could have been trained on a single language. + +This interface does not allow fine-tuning of the text representation on TPUs, +because it requires the module to instantiate both string processing and the +trainable variables at the same time. diff --git a/site/en/hub/community.md b/site/en/hub/community.md new file mode 100644 index 00000000000..a7a4c2bf0ec --- /dev/null +++ b/site/en/hub/community.md @@ -0,0 +1,6 @@ + +# Community and support + +* The source code is available on [GitHub](https://github.com/tensorflow/hub). + We use [GitHub issues](https://github.com/tensorflow/hub/issues) for + tracking feature requests and bugs. \ No newline at end of file diff --git a/site/en/hub/contribute.md b/site/en/hub/contribute.md new file mode 100644 index 00000000000..e537f79f766 --- /dev/null +++ b/site/en/hub/contribute.md @@ -0,0 +1,16 @@ + +# Contribute + +To learn more about how to publish a model or model collection on +[tfhub.dev](https://tfhub.dev/), see the [becoming_a_publisher](publish.md) +guide. + +You can find more information of how to contribute to the +[TensorFlow Hub library](https://github.com/tensorflow/hub) in our +[GitHub contributing guide](https://github.com/tensorflow/hub/blob/master/CONTRIBUTING.md). + +Content published to tfhub.dev can be automatically mirrored to other model +hubs, provided it follows a specified format and is permitted by our Terms +(https://tfhub.dev/terms). See [our publishing documentation](publish.md) for +more details, and [our contribution documentation](contribute_a_model.md) if +you'd like to opt-out of mirroring. diff --git a/site/en/hub/hosting.md b/site/en/hub/hosting.md new file mode 100644 index 00000000000..ce2ce76b0a6 --- /dev/null +++ b/site/en/hub/hosting.md @@ -0,0 +1,175 @@ + +# Model hosting protocol + +This document describes the URL conventions used when hosting all model types on +[tfhub.dev](https://tfhub.dev) - TFJS, TF Lite and TensorFlow models. It also +describes the HTTP(S)-based protocol implemented by the `tensorflow_hub` library +in order to load TensorFlow models from [tfhub.dev](https://tfhub.dev) and +compatible services into TensorFlow programs. + +Its key feature is to use the same URL in code to load a model and in a browser +to view the model documentation. + +## General URL conventions + +[tfhub.dev](https://tfhub.dev) supports the following URL formats: + +* TF Hub publishers follow `https://tfhub.dev/` +* TF Hub collections follow + `https://tfhub.dev//collection/` +* TF Hub models have versioned url + `https://tfhub.dev///` and unversioned url + `https://tfhub.dev//` that resolves to the latest + version of the model. + +TF Hub models can be downloaded as compressed assets by appending URL parameters +to the [tfhub.dev](https://tfhub.dev) model URL. However, the URL parameters +required to achieve that depend on the model type: + +* TensorFlow models (both SavedModel and TF1 Hub formats): append + `?tf-hub-format=compressed` to the TensorFlow model url. +* TFJS models: append `?tfjs-format=compressed` to the TFJS model url to + download the compressed or `/model.json?tfjs-format=file` to read if from + remote storage. +* TF lite models: append `?lite-format=tflite` to the TF Lite model url. + +For example: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeModel URLDownload typeURL paramDownload URL
TensorFlow (SavedModel, TF1 Hub format)https://tfhub.dev/google/spice/2.tar.gz?tf-hub-format=compressed https://tfhub.dev/google/spice/2?tf-hub-format=compressed
TF Litehttps://tfhub.dev/google/lite-model/spice/1.tflite?lite-format=tflitehttps://tfhub.dev/google/lite-model/spice/1?lite-format=tflite
TF.jshttps://tfhub.dev/google/tfjs-model/spice/2/default/1.tar.gz?tfjs-format=compressedhttps://tfhub.dev/google/tfjs-model/spice/2/default/1?tfjs-format=compressed
+ +Additionally, some models also are hosted in a format that can be read directly +from remote storage without being downloaded. This is especially useful if there +is no local storage available, such as running a TF.js model in the browser or +loading a SavedModel on [Colab](https://colab.research.google.com/). Be +conscious that reading models that are hosted remotely without being downloaded +locally may increase latency. + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
TypeModel URLResponse typeURL paramRequest URL
TensorFlow (SavedModel, TF1 Hub format)https://tfhub.dev/google/spice/2String (Path to GCS folder where the uncompressed model is stored)?tf-hub-format=uncompressedhttps://tfhub.dev/google/spice/2?tf-hub-format=uncompressed
TF.jshttps://tfhub.dev/google/tfjs-model/spice/2/default/1.json?tfjs-format=filehttps://tfhub.dev/google/tfjs-model/spice/2/default/1/model.json?tfjs-format=file
+ +## tensorflow_hub library protocol + +This section describes how we host models on [tfhub.dev](https://tfhub.dev) for +use with the tensorflow_hub library. If you want to host your own model +repository to work with the tensorflow_hub library, your HTTP(s) distribution +service should provide an implementation of this protocol. + +Note that this section does not address hosting TF Lite and TFJS models since +they are not downloaded via the `tensorflow_hub` library. For more information +on hosting these model types, please check [above](#general-url-conventions). + +### Compressed Hosting + +Models are stored on [tfhub.dev](https://tfhub.dev) as compressed tar.gz files. +By default, the tensorflow_hub library automatically downloads the compressed +model. They can also be manually downloaded by appending the +`?tf-hub-format=compressed` to the model url, for example: + +```shell +wget https://tfhub.dev/tensorflow/albert_en_xxlarge/1?tf-hub-format=compressed +``` + +The root of the archive is the root of the model directory and should contain a +SavedModel, as in this example: + +```shell +# Create a compressed model from a SavedModel directory. +$ tar -cz -f model.tar.gz --owner=0 --group=0 -C /tmp/export-model/ . + +# Inspect files inside a compressed model +$ tar -tf model.tar.gz +./ +./variables/ +./variables/variables.data-00000-of-00001 +./variables/variables.index +./assets/ +./saved_model.pb +``` + +Tarballs for use with the legacy +[TF1 Hub format](https://www.tensorflow.org/hub/tf1_hub_module) will also +contain a `./tfhub_module.pb` file. + +When one of `tensorflow_hub` library model loading APIs is invoked +([hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer), +[hub.load](https://www.tensorflow.org/hub/api_docs/python/hub/load), etc) the +library downloads the model, uncompresses the model and caches it locally. The +`tensorflow_hub` library expects that model URLs are versioned and that the +model content of a given version is immutable, so that it can be cached +indefinitely. Learn more about [caching models](caching.md). + +![](https://raw.githubusercontent.com/tensorflow/hub/master/docs/images/library_download_cache.png) + +### Uncompressed Hosting + +When the environment variable `TFHUB_MODEL_LOAD_FORMAT` or the command-line flag +`--tfhub_model_load_format` is set to `UNCOMPRESSED`, the model is read directly +from remote storage (GCS) instead of being downloaded and uncompressed locally. +When this behavior is enabled the library appends `?tf-hub-format=uncompressed` +to the model URL. That request returns the path to the folder on GCS that +contains the uncompressed model files. As an example, \ +`https://tfhub.dev/google/spice/2?tf-hub-format=uncompressed` \ +returns \ +`gs://kaggle-tfhub-models-uncompressed/tfhub-modules/google/spice/2/uncompressed` +in the body of the 303 response. The library then reads the model from that GCS +destination. diff --git a/site/en/hub/images/action_recognition.gif b/site/en/hub/images/action_recognition.gif new file mode 100644 index 00000000000..a58c22ac8c3 Binary files /dev/null and b/site/en/hub/images/action_recognition.gif differ diff --git a/site/en/hub/images/bert.png b/site/en/hub/images/bert.png new file mode 100644 index 00000000000..e36f69c9c7b Binary files /dev/null and b/site/en/hub/images/bert.png differ diff --git a/site/en/hub/images/bert_preprocess.png b/site/en/hub/images/bert_preprocess.png new file mode 100644 index 00000000000..18b3b435d1b Binary files /dev/null and b/site/en/hub/images/bert_preprocess.png differ diff --git a/site/en/hub/images/bert_preprocess_wide.png b/site/en/hub/images/bert_preprocess_wide.png new file mode 100644 index 00000000000..b414196724e Binary files /dev/null and b/site/en/hub/images/bert_preprocess_wide.png differ diff --git a/site/en/hub/images/bit_blog.jpg b/site/en/hub/images/bit_blog.jpg new file mode 100644 index 00000000000..260415bf0b1 Binary files /dev/null and b/site/en/hub/images/bit_blog.jpg differ diff --git a/site/en/hub/images/boundless.png b/site/en/hub/images/boundless.png new file mode 100644 index 00000000000..ccc52d17f84 Binary files /dev/null and b/site/en/hub/images/boundless.png differ diff --git a/site/en/hub/images/colab_logo.svg b/site/en/hub/images/colab_logo.svg new file mode 100644 index 00000000000..d03f1106221 --- /dev/null +++ b/site/en/hub/images/colab_logo.svg @@ -0,0 +1 @@ + diff --git a/site/en/hub/images/food.png b/site/en/hub/images/food.png new file mode 100644 index 00000000000..41865be3984 Binary files /dev/null and b/site/en/hub/images/food.png differ diff --git a/site/en/hub/images/gan_faces.gif b/site/en/hub/images/gan_faces.gif new file mode 100644 index 00000000000..a34b8d517f4 Binary files /dev/null and b/site/en/hub/images/gan_faces.gif differ diff --git a/site/en/hub/images/github_icon.svg b/site/en/hub/images/github_icon.svg new file mode 100644 index 00000000000..0a607bb98b3 --- /dev/null +++ b/site/en/hub/images/github_icon.svg @@ -0,0 +1 @@ + diff --git a/site/en/hub/images/guide_basics.png b/site/en/hub/images/guide_basics.png new file mode 100644 index 00000000000..e6aee34f516 Binary files /dev/null and b/site/en/hub/images/guide_basics.png differ diff --git a/site/en/hub/images/image_classification.png b/site/en/hub/images/image_classification.png new file mode 100644 index 00000000000..a3840e3482c Binary files /dev/null and b/site/en/hub/images/image_classification.png differ diff --git a/site/en/hub/images/interpolation.png b/site/en/hub/images/interpolation.png new file mode 100644 index 00000000000..d2f062da7c1 Binary files /dev/null and b/site/en/hub/images/interpolation.png differ diff --git a/site/en/hub/images/library_download_cache.png b/site/en/hub/images/library_download_cache.png new file mode 100644 index 00000000000..1b581a4a819 Binary files /dev/null and b/site/en/hub/images/library_download_cache.png differ diff --git a/site/en/hub/images/object_detection.png b/site/en/hub/images/object_detection.png new file mode 100644 index 00000000000..57b327099ae Binary files /dev/null and b/site/en/hub/images/object_detection.png differ diff --git a/site/en/hub/images/odml.png b/site/en/hub/images/odml.png new file mode 100644 index 00000000000..29bf3bcc61b Binary files /dev/null and b/site/en/hub/images/odml.png differ diff --git a/site/en/hub/images/similarity.png b/site/en/hub/images/similarity.png new file mode 100644 index 00000000000..3155e8706e1 Binary files /dev/null and b/site/en/hub/images/similarity.png differ diff --git a/site/en/hub/images/spice_blog.png b/site/en/hub/images/spice_blog.png new file mode 100644 index 00000000000..cf19769e6d8 Binary files /dev/null and b/site/en/hub/images/spice_blog.png differ diff --git a/site/en/hub/images/spice_color.png b/site/en/hub/images/spice_color.png new file mode 100644 index 00000000000..35b68d7c444 Binary files /dev/null and b/site/en/hub/images/spice_color.png differ diff --git a/site/en/hub/images/stackoverflow_icon.svg b/site/en/hub/images/stackoverflow_icon.svg new file mode 100644 index 00000000000..491a75e464d --- /dev/null +++ b/site/en/hub/images/stackoverflow_icon.svg @@ -0,0 +1 @@ + diff --git a/site/en/hub/images/style_transfer.png b/site/en/hub/images/style_transfer.png new file mode 100644 index 00000000000..d0427408830 Binary files /dev/null and b/site/en/hub/images/style_transfer.png differ diff --git a/site/en/hub/images/super_resolution.png b/site/en/hub/images/super_resolution.png new file mode 100644 index 00000000000..7d3f3741077 Binary files /dev/null and b/site/en/hub/images/super_resolution.png differ diff --git a/site/en/hub/images/text_video.gif b/site/en/hub/images/text_video.gif new file mode 100644 index 00000000000..5fe639b1eea Binary files /dev/null and b/site/en/hub/images/text_video.gif differ diff --git a/site/en/hub/images/yamnet.png b/site/en/hub/images/yamnet.png new file mode 100644 index 00000000000..416956ad6fb Binary files /dev/null and b/site/en/hub/images/yamnet.png differ diff --git a/site/en/hub/installation.md b/site/en/hub/installation.md new file mode 100644 index 00000000000..2381fbea614 --- /dev/null +++ b/site/en/hub/installation.md @@ -0,0 +1,57 @@ + +# Installation + +## Installing tensorflow_hub + +The `tensorflow_hub` library can be installed alongside TensorFlow 1 and +TensorFlow 2. We recommend that new users start with TensorFlow 2 right away, +and current users upgrade to it. + +### Use with TensorFlow 2 + +Use [pip](https://pip.pypa.io/) to +[install TensorFlow 2](https://www.tensorflow.org/install) as usual. (See there +for extra instructions about GPU support.) Then install a current version of +[`tensorflow-hub`](https://pypi.org/project/tensorflow-hub/) next to it (must be +0.5.0 or newer). + +```bash +$ pip install "tensorflow>=2.0.0" +$ pip install --upgrade tensorflow-hub +``` + +The TF1-style API of TensorFlow Hub works with the v1 compatibility mode of +TensorFlow 2. + +### Legacy use with TensorFlow 1 + +TensorFlow 1.15 is the only version of TensorFlow 1.x still supported by the +`tensorflow_hub` library (as of release 0.11.0). TensorFlow 1.15 defaults to +TF1-compatible behavior but contains many TF2 features under the hood to allow +some use of TensorFlow Hub's TF2-style APIs. + +```bash +$ pip install "tensorflow>=1.15,<2.0" +$ pip install --upgrade tensorflow-hub +``` + +### Use of pre-release versions + +The pip packages `tf-nightly` and `tf-hub-nightly` are built automatically from +the source code on github, with no release testing. This lets developers try out +the latest code without [building from source](build_from_source.md). + +```bash +$ pip install tf-nightly +$ pip install --upgrade tf-hub-nightly +``` + +## Next Steps + +- [Library overview](lib_overview.md) +- Tutorials: + - [Text classification](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb) + - [Image classification](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb) + - Additional examples + [on GitHub](https://github.com/tensorflow/hub/blob/master/examples/README.md) +- Find models on [tfhub.dev](https://tfhub.dev). \ No newline at end of file diff --git a/site/en/hub/lib_overview.md b/site/en/hub/lib_overview.md new file mode 100644 index 00000000000..c480ad2fbdf --- /dev/null +++ b/site/en/hub/lib_overview.md @@ -0,0 +1,50 @@ + +# TensorFlow Hub Library Overview + +The [`tensorflow_hub`](https://github.com/tensorflow/hub) library lets you +download and reuse trained models in your TensorFlow program with a minimum +amount of code. The main way to load a trained model is using the +`hub.KerasLayer` API. + +```python +import tensorflow_hub as hub + +embed = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim128/2") +embeddings = embed(["A long sentence.", "single-word", "http://example.com/"]) +print(embeddings.shape, embeddings.dtype) +``` +**Note:** This documentation uses TFhub.dev URL handles in examples. See more +information regarding other valid handle types [here](tf2_saved_model.md#model_handles). + +## Setting the cache location for downloads. + +By default, `tensorflow_hub` uses a system-wide, temporary directory to cache +downloaded and uncompressed models. See [Caching](caching.md) for options to use +other, possibly more persistent locations. + +## API stability + +Although we hope to prevent breaking changes, this project is still under active +development and is not yet guaranteed to have a stable API or model format. + +## Fairness + +As in all of machine learning, [fairness](http://ml-fairness.com) is an +[important](https://research.googleblog.com/2016/10/equality-of-opportunity-in-machine.html) +consideration. Many pre-trained models are trained on large datasets. When +reusing any model, it’s important to be mindful of what data the model was +trained on (and whether there are any existing biases there), and how these +might impact your use of it. + +## Security + +Since they contain arbitrary TensorFlow graphs, models can be thought of as +programs. +[Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md) +describes the security implications of referencing a model from an untrusted +source. + +## Next Steps + +- [Use the library](tf2_saved_model.md) +- [Reusable SavedModels](reusable_saved_models.md) diff --git a/site/en/hub/migration_tf2.md b/site/en/hub/migration_tf2.md new file mode 100644 index 00000000000..c2cc4b50759 --- /dev/null +++ b/site/en/hub/migration_tf2.md @@ -0,0 +1,114 @@ + +# Migrating from TF1 to TF2 with TensorFlow Hub + +This page explains how to keep using TensorFlow Hub while migrating your +TensorFlow code from TensorFlow 1 to TensorFlow 2. It complements TensorFlow's +general [migration guide](https://www.tensorflow.org/guide/migrate). + +For TF2, TF Hub has switched away from the legacy `hub.Module` API for building +a `tf.compat.v1.Graph` like `tf.contrib.v1.layers` do. Instead, there is now a +`hub.KerasLayer` for use alongside other Keras layers for building a +`tf.keras.Model` (typically in TF2's new +[eager execution environment](https://www.tensorflow.org/api_docs/python/tf/executing_eagerly)) +and its underlying `hub.load()` method for low-level TensorFlow code. + +The `hub.Module` API remains available in the `tensorflow_hub` library for use +in TF1 and in the TF1 compatibility mode of TF2. It can only load models in the +[TF1 Hub format](tf1_hub_module.md). + +The new API of `hub.load()` and `hub.KerasLayer` works for TensorFlow 1.15 (in +eager and graph mode) and in TensorFlow 2. This new API can load the new +[TF2 SavedModel](tf2_saved_model.md) assets, and, with the restrictions laid out +in the [model compatibility guide](model_compatibility.md), the legacy models in +TF1 Hub format. + +In general, it is recommended to use new API wherever possible. + +## Summary of the new API + +`hub.load()` is the new low-level function to load a SavedModel from TensorFlow +Hub (or compatible services). It wraps TF2's `tf.saved_model.load()`; +TensorFlow's [SavedModel Guide](https://www.tensorflow.org/guide/saved_model) +describes what you can do with the result. + +```python +m = hub.load(handle) +outputs = m(inputs) +``` + +The `hub.KerasLayer` class calls `hub.load()` and adapts the result for use in +Keras alongside other Keras layers. (It may even be a convenient wrapper for +loaded SavedModels used in other ways.) + +```python +model = tf.keras.Sequential([ + hub.KerasLayer(handle), + ...]) +``` + +Many tutorials show these APIs in action. Here are some examples: + +* [Text classification example notebook](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb) +* [Image classification example notebook](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb) + +### Using the new API in Estimator training + +If you use a TF2 SavedModel in an Estimator for training with parameter servers +(or otherwise in a TF1 Session with variables placed on remote devices), you +need to set `experimental.share_cluster_devices_in_session` in the tf.Session's +ConfigProto, or else you will get an error like "Assigned device +'/job:ps/replica:0/task:0/device:CPU:0' does not match any device." + +The necessary option can be set like + +```python +session_config = tf.compat.v1.ConfigProto() +session_config.experimental.share_cluster_devices_in_session = True +run_config = tf.estimator.RunConfig(..., session_config=session_config) +estimator = tf.estimator.Estimator(..., config=run_config) +``` + +Starting with TF2.2, this option is no longer experimental, and the +`.experimental` piece can be dropped. + +## Loading legacy models in TF1 Hub format + +It can happen that a new TF2 SavedModel is not yet available for your use-case +and you need to load an legacy model in TF1 Hub format. Starting in +`tensorflow_hub` release 0.7, you can use legacy model in TF1 Hub format +together with `hub.KerasLayer` as shown below: + +```python +m = hub.KerasLayer(handle) +tensor_out = m(tensor_in) +``` + +Additionally `KerasLayer` exposes the ability to specify `tags`, `signature`, +`output_key` and `signature_outputs_as_dict` for more specific usages of legacy +models in TF1 Hub format and legacy SavedModels. + +For more information on TF1 Hub format compatibility see the +[model compatibility guide](model_compatibility.md). + +## Using lower level APIs + +Legacy TF1 Hub format models can be loaded via `tf.saved_model.load`. Instead of + +```python +# DEPRECATED: TensorFlow 1 +m = hub.Module(handle, tags={"foo", "bar"}) +tensors_out_dict = m(dict(x1=..., x2=...), signature="sig", as_dict=True) +``` + +it is recommended to use: + +```python +# TensorFlow 2 +m = hub.load(path, tags={"foo", "bar"}) +tensors_out_dict = m.signatures["sig"](x1=..., x2=...) +``` + +In these examples `m.signatures` is a dict of TensorFlow +[concrete functions](https://www.tensorflow.org/tutorials/customization/performance#tracing) +keyed by signature names. Calling such a function computes all its outputs, even +if unused. (This is different from the lazy evaluation of TF1's graph mode.) diff --git a/site/en/hub/model_compatibility.md b/site/en/hub/model_compatibility.md new file mode 100644 index 00000000000..e37ed717c3b --- /dev/null +++ b/site/en/hub/model_compatibility.md @@ -0,0 +1,144 @@ + +# Model compatibility for TF1/TF2 + +## TF Hub model formats + +TF Hub offers reusable model pieces that can be loaded back, built upon, and +possibly be retrained in a TensorFlow program. These come in two different +formats: + +* The custom [TF1 Hub format](https://www.tensorflow.org/hub/tf1_hub_module) . + Its main intended use is in TF1 (or TF1 compatibility mode in TF2) via its + [hub.Module API](https://www.tensorflow.org/hub/api_docs/python/hub/Module). + Full compatibility details [below](#compatibility_of_hubmodule). +* The native [TF2 SavedModel](https://www.tensorflow.org/hub/tf2_saved_model) + format. Its main intended use is in TF2 via the + [hub.load](https://www.tensorflow.org/hub/api_docs/python/hub/load) and + [hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer) + APIs. Full compatibility details [below](#compatibility_of_tf2_savedmodel). + +The model format can be found on the model page on +[tfhub.dev](https://tfhub.dev). Model **loading/inference**, **fine-tuning** or +**creation** might not be supported in TF1/2 based on the model formats. + +## Compatibility of the TF1 Hub format {:#compatibility_of_hubmodule} + + + + + + + + + + + + + + + + + + + + + + + + + +
OperationTF1/ TF1 compat mode in TF2 [1]TF2
Loading / Inference + Fully supported (complete TF1 Hub format loading guide) +
m = hub.Module(handle)
+outputs = m(inputs)
+
It's recommended to use either hub.load +
m = hub.load(handle)
+outputs = m.signatures["sig"](inputs)
+ or hub.KerasLayer +
m = hub.KerasLayer(handle, signature="sig")
+outputs = m(inputs)
+
Fine-tuning + Fully supported (complete TF1 Hub format fine-tuning guide) +
m = hub.Module(handle,
+               trainable=True,
+               tags=["train"]*is_training)
+outputs = m(inputs)
+
+ Note: modules that don't need a separate train graph don't have a train + tag. +
+
+ Not supported +
Creation Fully supported (see complete TF1 Hub format creation guide)
+ Note: The TF1 Hub format is geared towards TF1 and is only partially supported in TF2. Consider creating a TF2 SavedModel. +
Not supported
+ +## Compatibility of TF2 SavedModel {:#compatibility_of_tf2_savedmodel} + +Not supported before TF1.15. + + + + + + + + + + + + + + + + + + + + + + + + +
OperationTF1.15/ TF1 compat mode in TF2 [1]TF2
Loading / Inference + Use either hub.load +
m = hub.load(handle)
+outputs = m(inputs)
+ or hub.KerasLayer +
m = hub.KerasLayer(handle)
+outputs = m(inputs)
+
Fully supported (complete TF2 SavedModel loading guide). Use either hub.load +
m = hub.load(handle)
+outputs = m(inputs)
+ or hub.KerasLayer +
m = hub.KerasLayer(handle)
+outputs = m(inputs)
+
Fine-tuning + Supported for a hub.KerasLayer used in tf.keras.Model when trained with + Model.fit() or trained in an Estimator whose model_fn wraps the Model per the custom model_fn guide. +
+ Note: hub.KerasLayer does not + fill in graph collections like the old tf.compat.v1.layers or hub.Module + APIs did. +
+
+ Fully supported (complete TF2 SavedModel fine-tuning guide). + Use either hub.load: +
m = hub.load(handle)
+outputs = m(inputs, training=is_training)
+ or hub.KerasLayer: +
m =  hub.KerasLayer(handle, trainable=True)
+outputs = m(inputs)
+
Creation + The TF2 API + tf.saved_model.save() can be called from within compat mode. + Fully supported (see complete TF2 SavedModel creation guide)
+ +

[1] "TF1 compat mode in TF2" refers to the combined + effect of importing TF2 with + import tensorflow.compat.v1 as tf + and running + tf.disable_v2_behavior() + as described in the + TensorFlow Migration guide + .

diff --git a/site/en/hub/model_formats.md b/site/en/hub/model_formats.md new file mode 100644 index 00000000000..73ae7c247a1 --- /dev/null +++ b/site/en/hub/model_formats.md @@ -0,0 +1,79 @@ + +# Model formats + +[tfhub.dev](https://tfhub.dev) hosts the following model +formats: TF2 SavedModel, TF1 Hub format, TF.js and TFLite. This page provides an +overview of each model format. + +Content published to tfhub.dev can be automatically mirrored to other model +hubs, provided it follows a specified format and is permitted by our Terms +(https://tfhub.dev/terms). See [our publishing documentation](publish.md) for +more details, and [our contribution documentation](contribute_a_model.md) if +you'd like to opt-out of mirroring. + +## TensorFlow formats + +[tfhub.dev](https://tfhub.dev) hosts TensorFlow models in the TF2 SavedModel +format and TF1 Hub format. We recommend using models in the standardized TF2 +SavedModel format instead of the deprecated TF1 Hub format when possible. + +### SavedModel + +TF2 SavedModel is the recommended format for sharing TensorFlow models. You can +learn more about the SavedModel format in the +[TensorFlow SavedModel](https://www.tensorflow.org/guide/saved_model) guide. + +You can browse SavedModels on tfhub.dev by using the TF2 version filter on the +[tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder) or by +following +[this link](https://tfhub.dev/s?subtype=module,placeholder&tf-version=tf2). + +You can use SavedModels from tfhub.dev without depending on the `tensorflow_hub` +library, since this format is a part of core TensorFlow. + +Learn more about SavedModels on TF Hub: + +* [Using TF2 SavedModels](tf2_saved_model.md) +* [Exporting a TF2 SavedModel](exporting_tf2_saved_model.md) +* [TF1/TF2 compatibility of TF2 SavedModels](model_compatibility.md) + +### TF1 Hub format + +The TF1 Hub format is a custom serialization format used in by TF Hub library. +The TF1 Hub format is similar to the SavedModel format of TensorFlow 1 on a +syntactic level (same file names and protocol messages) but semantically +different to allow for module reuse, composition and re-training (e.g., +different storage of resource initializers, different tagging conventions for +metagraphs). The easiest way to tell them apart on disk is the presence or +absence of the `tfhub_module.pb` file. + +You can browse models in the TF1 Hub format on tfhub.dev by using the TF1 +version filter on the +[tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder) or by +following +[this link](https://tfhub.dev/s?subtype=module,placeholder&tf-version=tf1). + +Learn more about models in TF1 Hub format on TF Hub: + +* [Using TF1 Hub format models](tf1_hub_module.md) +* [Exporting a model in the TF1 Hub format](exporting_hub_format.md) +* [TF1/TF2 compatibility of TF1 Hub format](model_compatibility.md) + +## TFLite format + +The TFLite format is used for on-device inference. You can learn more at the +[TFLite documentation](https://www.tensorflow.org/lite). + +You can browse TF Lite models on tfhub.dev by using the TF Lite model format +filter on the +[tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder) or by +following [this link](https://tfhub.dev/lite). + +## TFJS format + +The TF.js format is used for in-browser ML. You can learn more at the +[TF.js documentation](https://www.tensorflow.org/js). + +You can browse TF.js models on tfhub.dev by using the TF.js model format filter +on the [tfhub.dev browse page](https://tfhub.dev/s?subtype=module,placeholder) +or by following [this link](https://tfhub.dev/js). diff --git a/site/en/hub/overview.md b/site/en/hub/overview.md new file mode 100644 index 00000000000..b6d814eba73 --- /dev/null +++ b/site/en/hub/overview.md @@ -0,0 +1,31 @@ + +# TensorFlow Hub + +TensorFlow Hub is an open repository and library for reusable machine learning. +The [tfhub.dev](https://tfhub.dev) repository provides many pre-trained models: +text embeddings, image classification models, TF.js/TFLite models and much more. +The repository is open to +[community contributors](https://tfhub.dev/s?subtype=publisher). + +The [`tensorflow_hub`](https://github.com/tensorflow/hub) library lets you +download and reuse them in your TensorFlow program with a minimum amount of +code. + +```python +import tensorflow_hub as hub + +model = hub.KerasLayer("https://tfhub.dev/google/nnlm-en-dim128/2") +embeddings = model(["The rain in Spain.", "falls", + "mainly", "In the plain!"]) + +print(embeddings.shape) #(4,128) +``` + +## Next Steps + +- [Find models on tfhub.dev](https://tfhub.dev) +- [Publish models on tfhub.dev](publish.md) +- TensorFlow Hub library + - [Install TensorFlow Hub](installation.md) + - [Library overview](lib_overview.md) +- [Follow tutorials](tutorials) diff --git a/site/en/hub/portability_and_deletion.md b/site/en/hub/portability_and_deletion.md new file mode 100644 index 00000000000..67fa401d161 --- /dev/null +++ b/site/en/hub/portability_and_deletion.md @@ -0,0 +1,18 @@ + +## I want to see what I’ve uploaded to TensorFlow Hub. Can I get a copy of my data? + +Yes. If you’d like the Kaggle Team to **send you a copy** of all of the +data you have uploaded, please send us an email at [support@kaggle.com](mailto:support@kaggle.com) +and we’ll respond as soon as possible. + +## How do I delete what I’ve uploaded to TensorFlow Hub? + +Similarly, if you’d like us to **delete or remove content**, please send us an +email at [support@kaggle.com](mailto:support@kaggle.com) and we’ll delete +all copies that we have and stop serving it on tfhub.dev. Please note: + +* Because TensorFlow Hub is an open-source platform, copies of your assets may +still be retained by members of the public. +* Deletion is permanent and cannot be undone. +* Deletion can cause downstream breakages if users are not caching your model +locally and/or are not properly warned prior to deletion. diff --git a/site/en/hub/publish.md b/site/en/hub/publish.md new file mode 100644 index 00000000000..7fc5e7c1751 --- /dev/null +++ b/site/en/hub/publish.md @@ -0,0 +1,19 @@ + +# Publishing Process + +Thank you for considering to publish your models! + +**Please join the Early Access Model Publishing (EAP) on +[Kaggle Models](https://www.kaggle.com/models):** + +- Email [kaggle-models@google.com](mailto:kaggle-models@google.com) and + provide the following to get access to EAP: + - (1) Your Kaggle username + - (2) Your desired organization slug + - (3) A URL to a square-shaped profile image (which is needed for the + organization creation) +- Follow the + [documentation instructions](https://www.kaggle.com/model-publishing-instructions) + to create and publish your model +- Feel free to raise any questions and get support from + [Kaggle Discord channel](https://discord.gg/rKEyxj9WF) diff --git a/site/en/hub/reusable_saved_models.md b/site/en/hub/reusable_saved_models.md new file mode 100644 index 00000000000..b2114135d77 --- /dev/null +++ b/site/en/hub/reusable_saved_models.md @@ -0,0 +1,208 @@ + +# Reusable SavedModels + +## Introduction + +TensorFlow Hub hosts SavedModels for TensorFlow 2, among other assets. +They can be loaded back into a Python program with `obj = hub.load(url)` +[[learn more](tf2_saved_model)]. The returned `obj` is the result +of `tf.saved_model.load()` (see TensorFlow's +[SavedModel guide](https://www.tensorflow.org/guide/saved_model)). +This object can have arbitrary attributes that are tf.functions, +tf.Variables (initialized from their pre-trained values), other resources +and, recursively, more such objects. + +This page describes an interface to be implemented by the loaded `obj` +in order to be *reused* in a TensorFlow Python program. +SavedModels conforming to this interface are called *Reusable SavedModels*. + +Reusing means building a larger model around `obj`, including the ability +to fine-tune it. Fine-tuning means further training of the weights in the loaded +`obj` as part of the surrounding model. The loss function and the +optimizer are determined by the surrounding model; `obj` only defines +the mapping of input to output activations (the "forward pass"), possibly +including techniques such as dropout or batch normalization. + +**The TensorFlow Hub team recommends implementing the Reusable SavedModel +interface** in all SavedModels that are meant to be reused in the above sense. +Many utilities from the `tensorflow_hub` library, notably `hub.KerasLayer`, +require SavedModels to implement it. + +### Relation to SignatureDefs + +This interface in terms of tf.functions and other TF2 features +is separate from the SavedModel's signatures, which have been +available since TF1 and continue to be used in TF2 for inference +(such as deploying SavedModels to TF Serving or TF Lite). +Signatures for inference are not expressive enough to support fine-tuning, +and [`tf.function`](https://www.tensorflow.org/api_docs/python/tf/function) +provides a more natural and expressive +[Python API](https://www.tensorflow.org/tutorials/customization/performance) +for the reused model. + +### Relation to model-building libraries + +A Reusable SavedModel uses only TensorFlow 2 primitives, independent of any +particular model-building library like Keras or Sonnet. This facilitates reuse +across model-building libraries, free from dependencies on the original +model-building code. + +Some amount of adaptation will be needed load Reusable SavedModels into or save +them from any given model-building library. For Keras, +[hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer) +provides the loading, and Keras's built-in saving in the SavedModel format has +been redesigned for TF2 with the goal of providing a superset of this interface +(see the +[RFC](https://github.com/tensorflow/community/blob/master/rfcs/20190509-keras-saved-model.md) +from May 2019). + +### Relation to task-specific "Common SavedModel APIs" + +The interface definition on this page allows for any number and type of inputs +and outputs. The +[Common SavedModel APIs for TF Hub](common_saved_model_apis/index.md) refine +this general interface with usage conventions for specific tasks to make models +easily interchangeable. + +## Interface definition + +### Attributes + +A Reusable SavedModel is a TensorFlow 2 SavedModel such that +`obj = tf.saved_model.load(...)` returns an object that has the following +attributes + + * `__call__`. Required. A tf.function implementing the model's computation + (the "forward pass") subject to the specification below. + + * `variables`: A list of tf.Variable objects, listing all the variables + used by any possible invocation of `__call__`, including both + trainable and non-trainable ones. + + This list can be omitted if empty. + + Note: Conveniently, this name coincides with the attribute synthesized by + `tf.saved_model.load(...)` when loading a TF1 SavedModel to represent + its `GLOBAL_VARIABLES` collection. + + * `trainable_variables`: A list of tf.Variable objects such that + `v.trainable` is true for all elements. + These variables must be a subset of `variables`. + These are the variables to be trained when fine-tuning the object. + The SavedModel creator may choose to omit some variables here that were + originally trainable to indicate that these should not be modified during + fine-tuning. + + This list can be omitted if empty, in particular, if the SavedModel does not + support fine-tuning. + + * `regularization_losses`: A list of tf.functions, each taking zero inputs + and returning a single scalar float tensor. For fine-tuning, the + SavedModel user is advised to include these as additional regularization + terms into the loss (in the simplest case without further scaling). + Typically, these are used to represent weight regularizers. + (For lack of inputs, these tf.functions cannot express + activity regularizers.) + + This list can be omitted if empty, in particular, if the SavedModel does not + support fine-tuning or does not wish to prescribe weight regularization. + +### The `__call__` function + +A Restored SavedModel `obj` has an `obj.__call__` attribute that is +a restored tf.function and allows `obj` to be called as follows. + +Synopsis (pseudo-code): + +```python +outputs = obj(inputs, trainable=..., **kwargs) +``` + +#### Arguments + +The arguments are as follows. + + * There is one positional, required argument with a batch of input activations + of the SavedModel. Its type is one of + + * a single Tensor for a single input, + * a list of Tensors for an ordered sequence of unnamed inputs, + * a dict of Tensors keyed by a particular set of input names. + + (Future revisions of this interface may allow more general nests.) + The SavedModel creator chooses one of those and the tensor shapes + and dtypes. Where useful, some dimensions of the shape should be + undefined (notably batch size). + + * There may be an optional keyword argument `training` that accepts a Python + boolean, `True` or `False`. The default is `False`. + If the model supports fine-tuning, and if its computation differs between + the two (e.g., as in dropout and batch normalization), that distinction + is implemented with this argument. Otherwise, this argument may be absent. + + It is not required that `__call__` accept a Tensor-valued `training` + argument. It falls on the caller to use `tf.cond()` if necessary + to dispatch between them. + + * The SavedModel creator may choose to accept more optional `kwargs` + of particular names. + + * For Tensor-valued arguments, the SavedModel creator defines their + permissible dtypes and shapes. `tf.function` accepts a Python default + value on an argument that is traced with a tf.TensorSpec input. + Such arguments can be used to allow customization of numeric + hyperparameters involved in `__call__` (e.g., dropout rate). + + * For Python-valued arguments, the SavedModel creator defines their + permissible values. Such arguments can be used as flags to make + discrete choices in the traced function (but mind the combinatorial + explosion of traces). + +The restored `__call__` function must provide traces for all permissible +combinations of arguments. Flipping `training` between `True` and `False` +must not change the permissibility of arguments. + +#### Result + +The `outputs` from calling `obj` can be + + * a single Tensor for a single output, + * a list of Tensors for an ordered sequence of unnamed outputs, + * a dict of Tensors keyed by a particular set of output names. + +(Future revisions of this interface may allow more general nests.) +The return type may vary depending on the Python-valued kwargs. +This allows for flags producing extra outputs. +The SavedModel creator defines the output dtypes and shapes and their +dependency on inputs. + + +### Named callables + +A Reusable SavedModel can provide multiple model pieces in the way +described above by putting them into named subobjects, for example, +`obj.foo`, `obj.bar` and so on. +Each subobject provides a `__call__` method and supporting attributes +about the variables etc. specific to that model piece. +For the example above, there would be `obj.foo.__call__`, +`obj.foo.variables` and so on. + +Note that this interface does *not* cover the approach of adding +a bare tf.function directly as `tf.foo`. + +Users of Reusable SavedModels are only expected to handle one level of nesting +(`obj.bar` but not `obj.bar.baz`). (Future revisions of this interface may allow +deeper nesting, and may waive the requirement that the top-level object be +callable itself.) + +## Closing remarks + +### Relation to in-process APIs + +This document describes an interface of a Python class which consists +of primitives like tf.function and tf.Variable that survive a +round-trip through serialization via `tf.saved_model.save()` +and `tf.saved_model.load()`. However, the interface was already present +on the original object that was passed to `tf.saved_model.save()`. +Adaptation to that interface enables the exchange of model pieces +across model-building APIs within a single TensorFlow program. diff --git a/site/en/hub/tf1_hub_module.md b/site/en/hub/tf1_hub_module.md new file mode 100644 index 00000000000..7601878dc1b --- /dev/null +++ b/site/en/hub/tf1_hub_module.md @@ -0,0 +1,198 @@ + +# TF1 Hub format + +At its launch in 2018, TensorFlow Hub offered a single type of asset: TF1 Hub +format for import into TensorFlow 1 programs. + +This page explains how to use TF1 Hub format in TF1 (or the TF1 compatibility +mode of TF2) with the `hub.Module` class and associated APIs. (The typical use +is to build a `tf.Graph`, possibly inside a TF1 `Estimator`, by combining one or +more models in TF1 Hub format with `tf.compat.layers` or `tf.layers`). + +Users of TensorFlow 2 (outside TF1 compatibility mode) must use +[the new API with `hub.load()` or `hub.KerasLayer`](tf2_saved_model.md). The new +API loads the new TF2 SavedModel asset type, but also has limited +[support for loading TF1 Hub format into TF2](migration_tf2.md). + +## Using a model in TF1 Hub format + +### Instantiating a model in TF1 Hub format + +A model in TF1 Hub format is imported into a TensorFlow program by creating a +`hub.Module` object from a string with its URL or filesystem path, such as: + +```python +m = hub.Module("path/to/a/module_dir") +``` +**Note:** See more information regarding other valid handle types [here](tf2_saved_model.md#model_handles). + +This adds the module's variables to the current TensorFlow graph. +Running their initializers will read their pre-trained values from disk. +Likewise, tables and other state is added to the graph. + +### Caching Modules + +When creating a module from a URL, the module content is downloaded and cached +in the local system temporary directory. The location where modules are cached +can be overridden using `TFHUB_CACHE_DIR` environment variable. For details, see +[Caching](caching.md). + +### Applying a Module + +Once instantiated, a module `m` can be called zero or more times like a Python +function from tensor inputs to tensor outputs: + +```python +y = m(x) +``` + +Each such call adds operations to the current TensorFlow graph to compute +`y` from `x`. If this involves variables with trained weights, these are +shared between all applications. + +Modules can define multiple named *signatures* in order to allow being applied +in more than one way (similar to how Python objects have *methods*). +A module's documentation should describe the available +signatures. The call above applies the signature named `"default"`. Any +signature can be selected by passing its name to the optional `signature=` +argument. + +If a signature has multiple inputs, they must be passed as a dict, with the keys +defined by the signature. Likewise, if a signature has multiple outputs, these +can be retrieved as a dict by passing `as_dict=True`, under the keys defined by +the signature (the key `"default"` is for the single output returned if +`as_dict=False`). So the most general form of applying a Module looks like: + +```python +outputs = m(dict(apples=x1, oranges=x2), signature="fruit_to_pet", as_dict=True) +y1 = outputs["cats"] +y2 = outputs["dogs"] +``` + +A caller must supply all inputs defined by a signature, but there is no +requirement to use all of a module's outputs. +TensorFlow will run only those parts of the module that end up +as dependencies of a target in `tf.Session.run()`. Indeed, module publishers may +choose to provide various outputs for advanced uses (like activations of +intermediate layers) along with the main outputs. Module consumers should +handle additional outputs gracefully. + +### Trying out alternative modules + +Whenever there are multiple modules for the same task, TensorFlow Hub +encourages to equip them with compatible signatures (interfaces) +such that trying different ones is as easy as varying the module handle +as a string-valued hyperparameter. + +To this end, we maintain a collection of recommended +[Common Signatures](common_signatures/index.md) for popular tasks. + + +## Creating a New Module + +### Compatibility note + +The TF1 Hub format is geared towards TensorFlow 1. It is only partially +supported by TF Hub in TensorFlow 2. Please do consider publishing in the new +[TF2 SavedModel](tf2_saved_model.md) format instead. + +The TF1 Hub format is similar to the SavedModel format of TensorFlow 1 on a +syntactic level (same file names and protocol messages) but semantically +different to allow for module reuse, composition and re-training (e.g., +different storage of resource initializers, different tagging conventions for +metagraphs). The easiest way to tell them apart on disk is the presence or +absence of the `tfhub_module.pb` file. + +### General approach + +To define a new module, a publisher calls `hub.create_module_spec()` with a +function `module_fn`. This function constructs a graph representing the module's +internal structure, using `tf.placeholder()` for inputs to be supplied by +the caller. Then it defines signatures by calling +`hub.add_signature(name, inputs, outputs)` one or more times. + +For example: + +```python +def module_fn(): + inputs = tf.placeholder(dtype=tf.float32, shape=[None, 50]) + layer1 = tf.layers.dense(inputs, 200) + layer2 = tf.layers.dense(layer1, 100) + outputs = dict(default=layer2, hidden_activations=layer1) + # Add default signature. + hub.add_signature(inputs=inputs, outputs=outputs) + +... +spec = hub.create_module_spec(module_fn) +``` + +The result of `hub.create_module_spec()` can be used, instead of a path, +to instantiate a module object within a particular TensorFlow graph. In +such case, there is no checkpoint, and the module instance will use the +variable initializers instead. + +Any module instance can be serialized to disk via its `export(path, session)` +method. Exporting a module serializes its definition together with the current +state of its variables in `session` into the passed path. This can be used +when exporting a module for the first time, as well as when exporting a fine +tuned module. + +For compatibility with TensorFlow Estimators, `hub.LatestModuleExporter` exports +modules from the latest checkpoint, just like `tf.estimator.LatestExporter` +exports the entire model from the latest checkpoint. + +Module publishers should implement a [common +signature](common_signatures/index.md) when possible, so that consumers can +easily exchange modules and find the best one for their problem. + +### Real example + +Take a look at our [text embedding module exporter](https://github.com/tensorflow/hub/blob/master/examples/text_embeddings/export.py) +for a real-world example of how to create a module from a common text embedding +format. + + +## Fine-Tuning + +Training the variables of an imported module together with those of the model +around it is called *fine-tuning*. Fine-tuning can result in better quality, but +adds new complications. We advise consumers to look into fine-tuning only after +exploring simpler quality tweaks, and only if the module publisher recommends +it. + +### For Consumers + +To enable fine-tuning, instantiate the module with +`hub.Module(..., trainable=True)` to make its variables trainable and +import TensorFlow's `REGULARIZATION_LOSSES`. If the module has multiple +graph variants, make sure to pick the one appropriate for training. +Usually, that's the one with tags `{"train"}`. + +Choose a training regime that does not ruin the pre-trained weights, +for example, a lower learning rate than for training from scratch. + +### For Publishers + +To make fine-tuning easier for consumers, please be mindful of the following: + +* Fine-tuning needs regularization. Your module is exported with the + `REGULARIZATION_LOSSES` collection, which is what puts your choice of + `tf.layers.dense(..., kernel_regularizer=...)` etc. into what the consumer + gets from `tf.losses.get_regularization_losses()`. Prefer this way of + defining L1/L2 regularization losses. + +* In the publisher model, avoid defining L1/L2 regularization via the `l1_` + and `l2_regularization_strength` parameters of `tf.train.FtrlOptimizer`, + `tf.train.ProximalGradientDescentOptimizer`, and other proximal optimizers. + These are not exported alongside the module, and setting regularization + strengths globally may not be appropriate for the consumer. Except for L1 + regularization in wide (i.e. sparse linear) or wide & deep models, it should + be possible to use individual regularization losses instead. + +* If you use dropout, batch normalization, or similar training techniques, set + their hyperparameters to values that make sense across many expected uses. + The dropout rate may have to be adjusted to the target problem's propensity + to overfitting. In batch normalization, the momentum (a.k.a. decay + coefficient) should be small enough to enable fine-tuning with small + datasets and/or large batches. For advanced consumers, consider adding a + signature that exposes control over critical hyperparameters. diff --git a/site/en/hub/tf2_saved_model.md b/site/en/hub/tf2_saved_model.md new file mode 100644 index 00000000000..e41337b2548 --- /dev/null +++ b/site/en/hub/tf2_saved_model.md @@ -0,0 +1,289 @@ + +# SavedModels from TF Hub in TensorFlow 2 + +The +[SavedModel format of TensorFlow 2](https://www.tensorflow.org/guide/saved_model) +is the recommended way to share pre-trained models and model pieces on +TensorFlow Hub. It replaces the older [TF1 Hub format](tf1_hub_module.md) and +comes with a new set of APIs. + +This page explains how to reuse TF2 SavedModels in a TensorFlow 2 program with +the low-level `hub.load()` API and its `hub.KerasLayer` wrapper. (Typically, +`hub.KerasLayer` is combined with other `tf.keras.layers` to build a Keras model +or the `model_fn` of a TF2 Estimator.) These APIs can also load the legacy +models in TF1 Hub format, within limits, see the +[compatibility guide](model_compatibility.md). + +Users of TensorFlow 1 can update to TF 1.15 and then use the same APIs. +Older versions of TF1 do not work. + +## Using SavedModels from TF Hub + +### Using a SavedModel in Keras + +[Keras](https://www.tensorflow.org/guide/keras/) is TensorFlow's high-level API +for building deep learning models by composing Keras Layer objects. +The `tensorflow_hub` library provides the class `hub.KerasLayer` that gets +initialized with the URL (or filesystem path) of a SavedModel and then +provides the computation from the SavedModel, including its pre-trained +weights. + +Here is an example of using a pre-trained text embedding: + +```python +import tensorflow as tf +import tensorflow_hub as hub + +hub_url = "https://tfhub.dev/google/nnlm-en-dim128/2" +embed = hub.KerasLayer(hub_url) +embeddings = embed(["A long sentence.", "single-word", "http://example.com/"]) +print(embeddings.shape, embeddings.dtype) +``` + +From this, a text classifier can be built in the usual Keras way: + +```python +model = tf.keras.Sequential([ + embed, + tf.keras.layers.Dense(16, activation="relu"), + tf.keras.layers.Dense(1, activation="sigmoid"), +]) +``` + +The [Text classification +colab](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_text_classification.ipynb) +is a complete example how to train and evaluate such a classifier. + +The model weights in a `hub.KerasLayer` are set to non-trainable by default. +See the section on fine-tuning below for how to change that. Weights are +shared between all applications of the same layer object, as usual in Keras. + + +### Using a SavedModel in an Estimator + +Users of TensorFlow's +[Estimator](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_estimator) +API for distributed training can use SavedModels from TF Hub by +writing their `model_fn` in terms of `hub.KerasLayer` among other +`tf.keras.layers`. + + +### Behind the scenes: SavedModel downloading and caching + +Using a SavedModel from TensorFlow Hub (or other HTTPS servers that implement +its [hosting](hosting.md) protocol) downloads and decompresses it to the local +filesystem if not already present. The environment variable `TFHUB_CACHE_DIR` +can be set to override the default temporary location for caching the downloaded +and uncompressed SavedModels. For details, see [Caching](caching.md). + +### Using a SavedModel in low-level TensorFlow +#### Model Handles + +SavedModels can be loaded from a specified `handle`, where the `handle` is a +filesystem path, valid TFhub.dev model URL (e.g. "https://tfhub.dev/..."). +Kaggle Models URLs mirror TFhub.dev handles in accordance with our Terms and the +license associated with the model assets, e.g., "https://www.kaggle.com/...". +Handles from Kaggle Models are equivalent to their corresponding TFhub.dev +handle. + +The function `hub.load(handle)` downloads and decompresses a SavedModel +(unless `handle` is already a filesystem path) and then returns the result +of loading it with TensorFlow's built-in function `tf.saved_model.load()`. +Therefore, `hub.load()` can handle any valid SavedModel (unlike its +predecessor `hub.Module` for TF1). + +#### Advanced topic: what to expect from the SavedModel after loading + +Depending on the contents of the SavedModel, the result of +`obj = hub.load(...)` can be invoked in various ways (as explained in +much greater detail in TensorFlow's [SavedModel +Guide](https://www.tensorflow.org/guide/saved_model): + + * The serving signatures of the SavedModel (if any) are represented as a + dictionary of concrete functions and can be called like + `tensors_out = obj.signatures["serving_default"](**tensors_in)`, + with dictionaries of tensors keyed by the respective input and output + names and subject to the signature's shape and dtype constraints. + + * The + [`@tf.function`](https://www.tensorflow.org/api_docs/python/tf/function)-decorated + methods of the saved object (if any) are restored as tf.function objects + that can be called by all combinations of Tensor and non-Tensor arguments + for which the tf.function had been + [traced](https://www.tensorflow.org/tutorials/customization/performance#tracing) + prior to saving. In particular, if there is an `obj.__call__` method + with suitable traces, `obj` itself can be called like a Python function. + A simple example could look like + `output_tensor = obj(input_tensor, training=False)`. + +This leaves enormous liberty in the interfaces that SavedModels can +implement. The [Reusable SavedModels interface](reusable_saved_models.md) +for `obj` establishes conventions such that client code, including adapters +like `hub.KerasLayer`, know how to use the SavedModel. + +Some SavedModels may not follow that convention, especially whole models +not meant to be reused in larger models, and just provide serving signatures. + +The trainable variables in a SavedModel are reloaded as trainable, +and `tf.GradientTape` will watch them by default. See the section on +fine-tuning below for some caveats, and consider avoiding this for starters. +Even if you want to fine-tune, you may want to see if `obj.trainable_variables` +advises to re-train only a subset of the originally trainable variables. + + +## Creating SavedModels for TF Hub + +### Overview + +SavedModel is TensorFlow's standard serialization format for trained models +or model pieces. +It stores the model's trained weights together with the exact TensorFlow +operations to perform its computation. It can be used independently from +the code that created it. In particular, it can be reused across different +high-level model-building APIs like Keras, because TensorFlow operations +are their common basic language. + +### Saving from Keras + +Starting with TensorFlow 2, `tf.keras.Model.save()` and +`tf.keras.models.save_model()` default to the SavedModel format (not HDF5). +The resulting SavedModels that can be used with `hub.load()`, +`hub.KerasLayer` and similar adapters for other high-level APIs +as they become available. + +To share a complete Keras Model, just save it with `include_optimizer=False`. + +To share a piece of a Keras Model, make the piece a Model in itself and then +save that. You can either lay out the code like that from the start.... + +```python +piece_to_share = tf.keras.Model(...) +full_model = tf.keras.Sequential([piece_to_share, ...]) +full_model.fit(...) +piece_to_share.save(...) +``` + +...or cut out the piece to share after the fact (if it aligns with the +layering of your full model): + +```python +full_model = tf.keras.Model(...) +sharing_input = full_model.get_layer(...).get_output_at(0) +sharing_output = full_model.get_layer(...).get_output_at(0) +piece_to_share = tf.keras.Model(sharing_input, sharing_output) +piece_to_share.save(..., include_optimizer=False) +``` + +[TensorFlow Models](https://github.com/tensorflow/models) on GitHub uses the +former approach for BERT (see +[nlp/tools/export_tfhub_lib.py](https://github.com/tensorflow/models/blob/master/official/nlp/tools/export_tfhub_lib.py), +note the split between `core_model` for export and the `pretrainer` for +restoring the checkpoint) and the latter approach for ResNet (see +[legacy/image_classification/tfhub_export.py](https://github.com/tensorflow/models/blob/master/official/legacy/image_classification/resnet/tfhub_export.py)). + +### Saving from low-level TensorFlow + +This requires good familiarity with TensorFlow's [SavedModel +Guide](https://www.tensorflow.org/guide/saved_model). + +If you want to provide more than just a serving signature, you should +implement the [Reusable SavedModel interface](reusable_saved_models.md). +Conceptually, this looks like + +```python +class MyMulModel(tf.train.Checkpoint): + def __init__(self, v_init): + super().__init__() + self.v = tf.Variable(v_init) + self.variables = [self.v] + self.trainable_variables = [self.v] + self.regularization_losses = [ + tf.function(input_signature=[])(lambda: 0.001 * self.v**2), + ] + + @tf.function(input_signature=[tf.TensorSpec(shape=None, dtype=tf.float32)]) + def __call__(self, inputs): + return tf.multiply(inputs, self.v) + +tf.saved_model.save(MyMulModel(2.0), "/tmp/my_mul") + +layer = hub.KerasLayer("/tmp/my_mul") +print(layer([10., 20.])) # [20., 40.] +layer.trainable = True +print(layer.trainable_weights) # [2.] +print(layer.losses) # 0.004 +``` + + +## Fine-Tuning + +Training the already-trained variables of an imported SavedModel together with +those of the model around it is called *fine-tuning* the SavedModel. +This can result in better quality, but often makes the training more +demanding (may take more time, depend more on the optimizer and its +hyperparameters, increase the risk of overfitting and require dataset +augmentation, esp. for CNNs). We advise SavedModel consumers to look into +fine-tuning only after having established a good training regime, +and only if the SavedModel publisher recommends it. + +Fine-tuning changes the "continuous" model parameters that are trained. +It does not change hard-coded transformations, such as tokenizing text +input and mapping tokens to their corresponding entries in an embedding matrix. + +### For SavedModel consumers + +Creating a `hub.KerasLayer` like + +```python +layer = hub.KerasLayer(..., trainable=True) +``` + +enables fine-tuning of the SavedModel loaded by the layer. It adds the +trainable weights and weight regularizers declared in the SavedModel +to the Keras model, and runs the SavedModel's computation in training +mode (think of dropout etc.). + +The [image classification +colab](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_image_retraining.ipynb) +contains an end-to-end example with optional fine-tuning. + +#### Re-exporting the fine-tuning result + +Advanced users may want to save the results of fine-tuning back into +a SavedModel that can be used instead of the originally loaded one. +This can be done with code like + +```python +loaded_obj = hub.load("https://tfhub.dev/...") +hub_layer = hub.KerasLayer(loaded_obj, trainable=True, ...) + +model = keras.Sequential([..., hub_layer, ...]) +model.compile(...) +model.fit(...) + +export_module_dir = os.path.join(os.getcwd(), "finetuned_model_export") +tf.saved_model.save(loaded_obj, export_module_dir) +``` + +### For SavedModel creators + +When creating a SavedModel for sharing on TensorFlow Hub, +think ahead if and how its consumers should fine-tune it, +and provide guidance in the documentation. + +Saving from a Keras Model should make all the mechanics of fine-tuning work +(saving weight regularization losses, declaring trainable variables, tracing +`__call__` for both `training=True` and `training=False`, etc.) + +Choose a model interface that plays well with gradient flow, +e.g., output logits instead of softmax probabilities or top-k predictions. + +If the model use dropout, batch normalization, or similar training techniques +that involve hyperparameters, set them to values that make sense across many +expected target problems and batch sizes. (As of this writing, saving from +Keras does not make it easy to let consumers adjust them.) + +Weight regularizers on individual layers are saved (with their regularization +strength coefficients), but weight regularization from within the optimizer +(like `tf.keras.optimizers.Ftrl.l1_regularization_strength=...)`) +is lost. Advise consumers of your SavedModel accordingly. diff --git a/site/en/hub/tutorials/_index.yaml b/site/en/hub/tutorials/_index.yaml new file mode 100644 index 00000000000..deb98108393 --- /dev/null +++ b/site/en/hub/tutorials/_index.yaml @@ -0,0 +1,174 @@ +book_path: /hub/_book.yaml +project_path: /hub/_project.yaml +title: Tutorials +landing_page: + custom_css_path: /site-assets/css/style.css + nav: left + meta_tags: + - name: description + content: > + TensorFlow Hub tutorials to help you get started with using and adapting pre-trained + machine learning models to your needs. + rows: + # Getting started + - classname: devsite-landing-row-100 + items: + - description: > + +

TensorFlow Hub is a comprehensive repository of pre-trained + models ready for fine-tuning and deployable anywhere. Download the latest trained models + with a minimal amount of code with the tensorflow_hub library.

+

The following tutorials should help you getting + started with using and applying models from TF Hub for your needs. Interactive tutorials let you + modify them and execute them with your changes. Click the Run in Google Colab + button at the top of an interactive tutorial to tinker with it.

+ + # For beginners + - classname: devsite-landing-row-100 + items: + - description: > + +

If you are unfamiliar with machine learning and TensorFlow, you can start by getting + an overview of how to classify images and text, detecting objects in images, or by stylizing your own pictures like famous artwork:

+ + - classname: devsite-landing-row-100 + items: + - classname: tfo-landing-page-card + description: > + + Build a Keras model on top of a pre-trained image classifier to distinguish flowers. + path: /hub/tutorials/tf2_image_retraining + image_path: /hub/images/image_classification.png + - classname: tfo-landing-page-card + description: > + + Use BERT to build a Keras model to solve a text classificaton sentiment analysis task. + path: /tutorials/text/classify_text_with_bert + image_path: /hub/images/bert_preprocess.png + - classname: tfo-landing-page-card + description: > + + + + Let a neural network redraw an image in the style of Picasso, van Gogh or like your own style image. + path: /hub/tutorials/tf2_arbitrary_image_stylization + image_path: /hub/images/style_transfer.png + - classname: tfo-landing-page-card + description: > + + Detect objects in images using models like FasterRCNN or SSD. + path: /hub/tutorials/tf2_object_detection + image_path: /hub/images/object_detection.png + + # More advanced users + - classname: devsite-landing-row-100 + items: + - description: > + +

Check out more advanced tutorials for how to use NLP, images, audio, and video models from TensorFlow Hub.

+ + # NLP tutorials + - classname: devsite-landing-row-100 + items: + - description: > + +

Solve common NLP tasks with models from TensorFlow Hub. View all available NLP tutorials in the left nav.

+ + - classname: devsite-landing-row-100 + items: + - classname: tfo-landing-page-card + description: > + + Classify and semantically compare sentences with the Universal Sentence Encoder. + path: /hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder + image_path: /hub/images/similarity.png + - classname: tfo-landing-page-card + description: > + + Use BERT to solve GLUE benchmark tasks running on TPU. + path: /tutorials/text/solve_glue_tasks_using_bert_on_tpu + image_path: /hub/images/bert.png + - classname: tfo-landing-page-card + description: > + + Answer cross-lingual questions from the SQuAD dataset using the multilingual universal sentence encoder Q&A model. + path: /hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa + image_path: /hub/images/colab_logo.svg + + # Image tutorials + - classname: devsite-landing-row-100 + items: + - description: > + +

Explore how to use GANs, super resolution models and more. View all available image tutorials in the left nav.

+ + - classname: devsite-landing-row-100 + items: + - classname: tfo-landing-page-card + description: > + + Generate artificial faces and interpolate between them using GANs. + path: /hub/tutorials/tf_hub_generative_image_module + image_path: /hub/images/gan_faces.gif + - classname: tfo-landing-page-card + description: > + + Enhance the resolution of downsampled images. + path: /hub/tutorials/image_enhancing + image_path: /hub/images/super_resolution.png + - classname: tfo-landing-page-card + description: > + + Fill the masked part of given images. + path: /hub/tutorials/boundless + image_path: /hub/images/boundless.png + + # Audio tutorials + - classname: devsite-landing-row-100 + items: + - description: > + +

Explore tutorials using trained models for audio data including pitch recognition and sound classification.

+ + - classname: devsite-landing-row-100 + items: + - classname: tfo-landing-page-card + description: > + + Record yourself singing and detect the pitch of your voice using the SPICE model. + path: /hub/tutorials/spice + image_path: /hub/images/spice_color.png + - classname: tfo-landing-page-card + description: > + + Use the YAMNet model to classify sounds as 521 audio event classes from the AudioSet-YouTube corpus. + path: /hub/tutorials/yamnet + image_path: /hub/images/yamnet.png + + # Video tutorials + - classname: devsite-landing-row-100 + items: + - description: > + +

Try out trained ML models for video data for action recognition, video interpolation, and more.

+ + - classname: devsite-landing-row-100 + items: + - classname: tfo-landing-page-card + description: > + + Detect one of 400 actions in a video using the Inflated 3D ConvNet model. + path: /hub/tutorials/action_recognition_with_tf_hub + image_path: /hub/images/action_recognition.gif + - classname: tfo-landing-page-card + description: > + + Interpolate between video frames using Inbetweening with 3D Convolutions. + path: /hub/tutorials/tweening_conv3d + image_path: /hub/images/interpolation.png + - classname: tfo-landing-page-card + description: > + + Find videos that are the most related to a text query. + path: /hub/tutorials/text_to_video_retrieval_with_s3d_milnce + image_path: /hub/images/text_video.gif diff --git a/site/en/hub/tutorials/_toc.yaml b/site/en/hub/tutorials/_toc.yaml new file mode 100644 index 00000000000..04d95a267d7 --- /dev/null +++ b/site/en/hub/tutorials/_toc.yaml @@ -0,0 +1,118 @@ +toc: +- heading: "Getting started" + style: divider +- title: Overview + path: /hub/tutorials/_index.yaml + +- heading: "NLP Tutorials" + style: divider +- title: Text classification + path: /hub/tutorials/tf2_text_classification +- title: Classify text with BERT + path: /tutorials/text/classify_text_with_bert + status: external +- title: BERT on TPU + path: /tutorials/text/solve_glue_tasks_using_bert_on_tpu + status: external +- title: Real-time semantic search + path: /hub/tutorials/tf2_semantic_approximate_nearest_neighbors +- title: Multilingual question answering + path: /hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa +- title: "Additional NLP tutorials" + style: accordion + section: + - title: BERT Experts + path: /hub/tutorials/bert_experts + - title: Semantic similarity + path: /hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder + - title: Text classification on Kaggle + path: /hub/tutorials/text_classification_with_tf_hub_on_kaggle + - title: Bangla article classifier + path: /hub/tutorials/bangla_article_classifier + - title: Explore CORD-19 text embeddings + path: /hub/tutorials/cord_19_embeddings_keras + - title: Multilingual universal sentence encoder + path: /hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder + - title: Text cookbook + path: /hub/tutorials/text_cookbook + - title: SentEval for Universal Sentence Encoder CMLM model. + path: /hub/tutorials/senteval_for_universal_sentence_encoder_cmlm + +- heading: "Image Tutorials" + style: divider +- title: Image classification + path: /hub/tutorials/image_classification +- title: Transfer Learning for Image classification + path: /hub/tutorials/tf2_image_retraining +- title: Style transfer + path: /hub/tutorials/tf2_arbitrary_image_stylization +- title: Large-scale image retrieval with DELF + path: /hub/tutorials/tf_hub_delf_module +- title: Object detection + path: /hub/tutorials/tf2_object_detection +- title: GANs for image generation + path: /hub/tutorials/tf_hub_generative_image_module +- title: Human Pose Estimation + path: /hub/tutorials/movenet +- title: "Additional image tutorials" + style: accordion + section: + - title: "CropNet: Cassava Disease Detection" + path: /hub/tutorials/cropnet_cassava + - title: "CropNet: Fine tuning models for on-device inference" + path: /hub/tutorials/cropnet_on_device + - title: Boundless GAN + path: /hub/tutorials/boundless + - title: Super resolution + path: /hub/tutorials/image_enhancing + - title: HRNet model inference for semantic segmentation + path: /hub/tutorials/hrnet_semantic_segmentation + status: new + +- heading: "Audio Tutorials" + style: divider +- title: Pitch recognition + path: /hub/tutorials/spice +- title: Sound classification + path: /hub/tutorials/yamnet +- title: Automatic speech recognition with Wav2Vec2 + path: /hub/tutorials/wav2vec2_saved_model_finetuning + +- heading: "Video Tutorials" + style: divider +- title: Frame interpolation with FILM + path: /hub/tutorials/tf_hub_film_example + status: new +- title: Action recognition + path: /hub/tutorials/action_recognition_with_tf_hub +- title: Streaming action recognition + path: /hub/tutorials/movinet +- title: Video interpolation + path: /hub/tutorials/tweening_conv3d +- title: Text-to-video retrieval + path: /hub/tutorials/text_to_video_retrieval_with_s3d_milnce + +- title: "Tutorials (TF1)" + style: accordion + status: deprecated + section: + - heading: "Image Tutorials" + - title: Image classification + path: /hub/tutorials/image_feature_vector + - title: Object detection + path: /hub/tutorials/object_detection + - title: BigGAN image generation + path: /hub/tutorials/biggan_generation_with_tf_hub + - title: BigBiGAN image generation + path: /hub/tutorials/bigbigan_with_tf_hub + - title: S3 GAN image generation + path: /hub/tutorials/s3gan_generation_with_tf_hub + - heading: "NLP Tutorials" + - title: Semantic similarity lite + path: /hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite + - title: Nearest neighbor index for real-time semantic search + path: /hub/tutorials/semantic_approximate_nearest_neighbors + - title: Explore CORD-19 text embeddings + path: /hub/tutorials/cord_19_embeddings + - title: Wiki40B Language Models + path: /hub/tutorials/wiki40b_lm diff --git a/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb b/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb new file mode 100644 index 00000000000..3f586991ba9 --- /dev/null +++ b/site/en/hub/tutorials/action_recognition_with_tf_hub.ipynb @@ -0,0 +1,438 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "x8Q7Un821X1A" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1W4rIAFt1Ui3" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cDq0CIKc1vO_" + }, + "source": [ + "# Action Recognition with an Inflated 3D CNN\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h6W3FhoP3TxC" + }, + "source": [ + "This Colab demonstrates recognizing actions in video data using the\n", + "[tfhub.dev/deepmind/i3d-kinetics-400/1](https://tfhub.dev/deepmind/i3d-kinetics-400/1) module. More models to detect actions in videos can be found [here](https://tfhub.dev/s?module-type=video-classification).\n", + "\n", + "The underlying model is described in the paper \"[Quo Vadis, Action Recognition? A New\n", + "Model and the Kinetics Dataset](https://arxiv.org/abs/1705.07750)\" by Joao\n", + "Carreira and Andrew Zisserman. The paper was posted on arXiv in May 2017, and\n", + "was published as a CVPR 2017 conference paper.\n", + "The source code is publicly available on\n", + "[github](https://github.com/deepmind/kinetics-i3d).\n", + "\n", + "\"Quo Vadis\" introduced a new architecture for video classification, the Inflated\n", + "3D Convnet or I3D. This architecture achieved state-of-the-art results on the UCF101\n", + "and HMDB51 datasets from fine-tuning these models. I3D models pre-trained on Kinetics\n", + "also placed first in the CVPR 2017 [Charades challenge](http://vuchallenge.org/charades.html).\n", + "\n", + "The original module was trained on the [kinetics-400 dateset](https://www.deepmind.com/open-source/kinetics)\n", + "and knows about 400 different actions.\n", + "Labels for these actions can be found in the\n", + "[label map file](https://github.com/deepmind/kinetics-i3d/blob/master/data/label_map.txt).\n", + "\n", + "In this Colab we will use it recognize activities in videos from a UCF101 dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "R_0xc2jyNGRp" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mOHMWsFnITdi" + }, + "outputs": [], + "source": [ + "!pip install -q imageio\n", + "!pip install -q opencv-python\n", + "!pip install -q git+https://github.com/tensorflow/docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "USf0UvkYIlKo" + }, + "outputs": [], + "source": [ + "#@title Import the necessary modules\n", + "# TensorFlow and TF-Hub modules.\n", + "from absl import logging\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "from tensorflow_docs.vis import embed\n", + "\n", + "logging.set_verbosity(logging.ERROR)\n", + "\n", + "# Some modules to help with reading the UCF101 dataset.\n", + "import random\n", + "import re\n", + "import os\n", + "import tempfile\n", + "import ssl\n", + "import cv2\n", + "import numpy as np\n", + "\n", + "# Some modules to display an animation using imageio.\n", + "import imageio\n", + "from IPython import display\n", + "\n", + "from urllib import request # requires python3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "IuMMS3TGdws7" + }, + "outputs": [], + "source": [ + "#@title Helper functions for the UCF101 dataset\n", + "\n", + "# Utilities to fetch videos from UCF101 dataset\n", + "UCF_ROOT = \"https://www.crcv.ucf.edu/THUMOS14/UCF101/UCF101//"\n", + "_VIDEO_LIST = None\n", + "_CACHE_DIR = tempfile.mkdtemp()\n", + "# As of July 2020, crcv.ucf.edu doesn't use a certificate accepted by the\n", + "# default Colab environment anymore.\n", + "unverified_context = ssl._create_unverified_context()\n", + "\n", + "def list_ucf_videos():\n", + " \"\"\"Lists videos available in UCF101 dataset.\"\"\"\n", + " global _VIDEO_LIST\n", + " if not _VIDEO_LIST:\n", + " index = request.urlopen(UCF_ROOT, context=unverified_context).read().decode(\"utf-8\")\n", + " videos = re.findall(\"(v_[\\w_]+\\.avi)\", index)\n", + " _VIDEO_LIST = sorted(set(videos))\n", + " return list(_VIDEO_LIST)\n", + "\n", + "def fetch_ucf_video(video):\n", + " \"\"\"Fetches a video and cache into local filesystem.\"\"\"\n", + " cache_path = os.path.join(_CACHE_DIR, video)\n", + " if not os.path.exists(cache_path):\n", + " urlpath = request.urljoin(UCF_ROOT, video)\n", + " print(\"Fetching %s => %s\" % (urlpath, cache_path))\n", + " data = request.urlopen(urlpath, context=unverified_context).read()\n", + " open(cache_path, \"wb\").write(data)\n", + " return cache_path\n", + "\n", + "# Utilities to open video files using CV2\n", + "def crop_center_square(frame):\n", + " y, x = frame.shape[0:2]\n", + " min_dim = min(y, x)\n", + " start_x = (x // 2) - (min_dim // 2)\n", + " start_y = (y // 2) - (min_dim // 2)\n", + " return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]\n", + "\n", + "def load_video(path, max_frames=0, resize=(224, 224)):\n", + " cap = cv2.VideoCapture(path)\n", + " frames = []\n", + " try:\n", + " while True:\n", + " ret, frame = cap.read()\n", + " if not ret:\n", + " break\n", + " frame = crop_center_square(frame)\n", + " frame = cv2.resize(frame, resize)\n", + " frame = frame[:, :, [2, 1, 0]]\n", + " frames.append(frame)\n", + " \n", + " if len(frames) == max_frames:\n", + " break\n", + " finally:\n", + " cap.release()\n", + " return np.array(frames) / 255.0\n", + "\n", + "def to_gif(images):\n", + " converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)\n", + " imageio.mimsave('./animation.gif', converted_images, duration=40)\n", + " return embed.embed_file('./animation.gif')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "pIKTs-KneUfz" + }, + "outputs": [], + "source": [ + "#@title Get the kinetics-400 labels\n", + "# Get the kinetics-400 action labels from the GitHub repository.\n", + "KINETICS_URL = \"https://raw.githubusercontent.com/deepmind/kinetics-i3d/master/data/label_map.txt/"\n", + "with request.urlopen(KINETICS_URL) as obj:\n", + " labels = [line.decode(\"utf-8\").strip() for line in obj.readlines()]\n", + "print(\"Found %d labels.\" % len(labels))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GBvmjVICIp3W" + }, + "source": [ + "# Using the UCF101 dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V-QcxdhLIfi2" + }, + "outputs": [], + "source": [ + "# Get the list of videos in the dataset.\n", + "ucf_videos = list_ucf_videos()\n", + " \n", + "categories = {}\n", + "for video in ucf_videos:\n", + " category = video[2:-12]\n", + " if category not in categories:\n", + " categories[category] = []\n", + " categories[category].append(video)\n", + "print(\"Found %d videos in %d categories.\" % (len(ucf_videos), len(categories)))\n", + "\n", + "for category, sequences in categories.items():\n", + " summary = \", \".join(sequences[:2])\n", + " print(\"%-20s %4d videos (%s, ...)\" % (category, len(sequences), summary))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c0ZvVDruN2nU" + }, + "outputs": [], + "source": [ + "# Get a sample cricket video.\n", + "video_path = fetch_ucf_video(\"v_CricketShot_g04_c02.avi\")\n", + "sample_video = load_video(video_path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hASLA90YFPTO" + }, + "outputs": [], + "source": [ + "sample_video.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "POf5XgffvXlD" + }, + "outputs": [], + "source": [ + "i3d = hub.load(\"https://tfhub.dev/deepmind/i3d-kinetics-400/1/").signatures['default']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mDXgaOD1zhMP" + }, + "source": [ + "Run the id3 model and print the top-5 action predictions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3mTbqA5JGYUx" + }, + "outputs": [], + "source": [ + "def predict(sample_video):\n", + " # Add a batch axis to the sample video.\n", + " model_input = tf.constant(sample_video, dtype=tf.float32)[tf.newaxis, ...]\n", + "\n", + " logits = i3d(model_input)['default'][0]\n", + " probabilities = tf.nn.softmax(logits)\n", + "\n", + " print(\"Top 5 actions:\")\n", + " for i in np.argsort(probabilities)[::-1][:5]:\n", + " print(f\" {labels[i]:22}: {probabilities[i] * 100:5.2f}%\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ykaXQcGRvK4E" + }, + "outputs": [], + "source": [ + "predict(sample_video)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PHsq0lHXCsD4" + }, + "source": [ + "Now try a new video, from: https://commons.wikimedia.org/wiki/Category:Videos_of_sports\n", + "\n", + "How about [this video](https://commons.wikimedia.org/wiki/File:End_of_a_jam.ogv) by Patrick Gillett: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p-mZ9fFPCoNq" + }, + "outputs": [], + "source": [ + "!curl -O https://upload.wikimedia.org/wikipedia/commons/8/86/End_of_a_jam.ogv" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lpLmE8rjEbAF" + }, + "outputs": [], + "source": [ + "video_path = \"End_of_a_jam.ogv\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CHZJ9qTLErhV" + }, + "outputs": [], + "source": [ + "sample_video = load_video(video_path)[:100]\n", + "sample_video.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2ZNLkEZ9Er-c" + }, + "outputs": [], + "source": [ + "to_gif(sample_video)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yskHIRbxEtjS" + }, + "outputs": [], + "source": [ + "predict(sample_video)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "x8Q7Un821X1A" + ], + "name": "action_recognition_with_tf_hub.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/bangla_article_classifier.ipynb b/site/en/hub/tutorials/bangla_article_classifier.ipynb new file mode 100644 index 00000000000..988a68c4023 --- /dev/null +++ b/site/en/hub/tutorials/bangla_article_classifier.ipynb @@ -0,0 +1,646 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "IDdZSPcLtKx4" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-g5By3P4tavy" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS, \n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vpaLrN0mteAS" + }, + "source": [ + "# Bangla Article Classification With TF-Hub" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GhN2WtIrBQ4y" + }, + "source": [ + "Caution: In addition to installing Python packages with pip, this notebook uses\n", + "`sudo apt install` to install system packages: `unzip`.\n", + "\n", + "This Colab is a demonstration of using [Tensorflow Hub](https://www.tensorflow.org/hub/) for text classification in non-English/local languages. Here we choose [Bangla](https://en.wikipedia.org/wiki/Bengali_language) as the local language and use pretrained word embeddings to solve a multiclass classification task where we classify Bangla news articles in 5 categories. The pretrained embeddings for Bangla comes from [fastText](https://fasttext.cc/docs/en/crawl-vectors.html) which is a library by Facebook with released pretrained word vectors for 157 languages. \n", + "\n", + "We'll use TF-Hub's pretrained embedding exporter for converting the word embeddings to a text embedding module first and then use the module to train a classifier with [tf.keras](https://www.tensorflow.org/api_docs/python/tf/keras), Tensorflow's high level user friendly API to build deep learning models. Even if we are using fastText embeddings here, it's possible to export any other embeddings pretrained from other tasks and quickly get results with Tensorflow hub. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9Vt-StAAZguA" + }, + "outputs": [], + "source": [ + "%%bash\n", + "# https://github.com/pypa/setuptools/issues/1694#issuecomment-466010982\n", + "pip install gdown --no-use-pep517" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WcBA19FlDPZO" + }, + "outputs": [], + "source": [ + "%%bash\n", + "sudo apt-get install -y unzip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zSeyZMq-BYsu" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "import gdown\n", + "import numpy as np\n", + "from sklearn.metrics import classification_report\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9FB7gLU4F54l" + }, + "source": [ + "# Dataset\n", + "\n", + "We will use [BARD](https://www.researchgate.net/publication/328214545_BARD_Bangla_Article_Classification_Using_a_New_Comprehensive_Dataset) (Bangla Article Dataset) which has around 376,226 articles collected from different Bangla news portals and labelled with 5 categories: economy, state, international, sports, and entertainment. We download the file from Google Drive this ([bit.ly/BARD_DATASET](https://bit.ly/BARD_DATASET)) link is referring to from [this](https://github.com/tanvirfahim15/BARD-Bangla-Article-Classifier) GitHub repository.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zdQrL_rwa-1K" + }, + "outputs": [], + "source": [ + "gdown.download(\n", + " url='https://drive.google.com/uc?id=1Ag0jd21oRwJhVFIBohmX_ogeojVtapLy',\n", + " output='bard.zip',\n", + " quiet=True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "P2YW4GGa9Y5o" + }, + "outputs": [], + "source": [ + "%%bash\n", + "unzip -qo bard.zip" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "js75OARBF_B8" + }, + "source": [ + "# Export pretrained word vectors to TF-Hub module" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-uAicYA6vLsf" + }, + "source": [ + "TF-Hub provides some useful scripts for converting word embeddings to TF-hub text embedding modules [here](https://github.com/tensorflow/hub/tree/master/examples/text_embeddings_v2). To make the module for Bangla or any other languages, we simply have to download the word embedding `.txt` or `.vec` file to the same directory as `export_v2.py` and run the script.\n", + "\n", + "\n", + "The exporter reads the embedding vectors and exports it to a Tensorflow [SavedModel](https://www.tensorflow.org/beta/guide/saved_model). A SavedModel contains a complete TensorFlow program including weights and graph. TF-Hub can load the SavedModel as a [module](https://www.tensorflow.org/hub/api_docs/python/hub/Module), which we will use to build the model for text classification. Since we are using `tf.keras` to build the model, we will use [hub.KerasLayer](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer), which provides a wrapper for a TF-Hub module to use as a Keras Layer.\n", + "\n", + "First we will get our word embeddings from fastText and embedding exporter from TF-Hub [repo](https://github.com/tensorflow/hub).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5DY5Ze6pO1G5" + }, + "outputs": [], + "source": [ + "%%bash\n", + "curl -O https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.bn.300.vec.gz\n", + "curl -O https://raw.githubusercontent.com/tensorflow/hub/master/examples/text_embeddings_v2/export_v2.py\n", + "gunzip -qf cc.bn.300.vec.gz --k" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PAzdNZaHmdl1" + }, + "source": [ + "Then, we will run the exporter script on our embedding file. Since fastText embeddings have a header line and are pretty large (around 3.3 GB for Bangla after converting to a module) we ignore the first line and export only the first 100, 000 tokens to the text embedding module." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Tkv5acr_Q9UU" + }, + "outputs": [], + "source": [ + "%%bash\n", + "python export_v2.py --embedding_file=cc.bn.300.vec --export_path=text_module --num_lines_to_ignore=1 --num_lines_to_use=100000" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k9WEpmedF_3_" + }, + "outputs": [], + "source": [ + "module_path = \"text_module\"\n", + "embedding_layer = hub.KerasLayer(module_path, trainable=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fQHbmS_D4YIo" + }, + "source": [ + "The text embedding module takes a batch of sentences in a 1D tensor of strings as input and outputs the embedding vectors of shape (batch_size, embedding_dim) corresponding to the sentences. It preprocesses the input by splitting on spaces. Word embeddings are combined to sentence embeddings with the `sqrtn` combiner(See [here](https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup_sparse)). For demonstration we pass a list of Bangla words as input and get the corresponding embedding vectors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z1MBnaBUihWn" + }, + "outputs": [], + "source": [ + "embedding_layer(['বাস', 'বসবাস', 'ট্রেন', 'যাত্রী', 'ট্রাক']) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4KY8LiFOHmcd" + }, + "source": [ + "# Convert to Tensorflow Dataset \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pNguCDNe6bvz" + }, + "source": [ + "Since the dataset is really large instead of loading the entire dataset in memory we will use a generator to yield samples in run-time in batches using [Tensorflow Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) functions. The dataset is also very imbalanced, so, before using the generator, we will shuffle the dataset. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bYv6LqlEChO1" + }, + "outputs": [], + "source": [ + "dir_names = ['economy', 'sports', 'entertainment', 'state', 'international']\n", + "\n", + "file_paths = []\n", + "labels = []\n", + "for i, dir in enumerate(dir_names):\n", + " file_names = [\"/\".join([dir, name]) for name in os.listdir(dir)]\n", + " file_paths += file_names\n", + " labels += [i] * len(os.listdir(dir))\n", + " \n", + "np.random.seed(42)\n", + "permutation = np.random.permutation(len(file_paths))\n", + "\n", + "file_paths = np.array(file_paths)[permutation]\n", + "labels = np.array(labels)[permutation]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8b-UtAP5TL-W" + }, + "source": [ + "We can check the distribution of labels in the training and validation examples after shuffling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mimhWVSzzAmS" + }, + "outputs": [], + "source": [ + "train_frac = 0.8\n", + "train_size = int(len(file_paths) * train_frac)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4BNXFrkotAYu" + }, + "outputs": [], + "source": [ + "# plot training vs validation distribution\n", + "plt.subplot(1, 2, 1)\n", + "plt.hist(labels[0:train_size])\n", + "plt.title(\"Train labels\")\n", + "plt.subplot(1, 2, 2)\n", + "plt.hist(labels[train_size:])\n", + "plt.title(\"Validation labels\")\n", + "plt.tight_layout()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RVbHb2I3TUNA" + }, + "source": [ + "To create a [Dataset](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) using a generator, we first write a generator function which reads each of the articles from `file_paths` and the labels from the label array, and yields one training example at each step. We pass this generator function to the [`tf.data.Dataset.from_generator`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_generator) method and specify the output types. Each training example is a tuple containing an article of `tf.string` data type and one-hot encoded label. We split the dataset with a train-validation split of 80-20 using [`tf.data.Dataset.skip`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#skip) and [`tf.data.Dataset.take`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#take) methods." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eZRGTzEhUi7Q" + }, + "outputs": [], + "source": [ + "def load_file(path, label):\n", + " return tf.io.read_file(path), label" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2g4nRflB7fbF" + }, + "outputs": [], + "source": [ + "def make_datasets(train_size):\n", + " batch_size = 256\n", + "\n", + " train_files = file_paths[:train_size]\n", + " train_labels = labels[:train_size]\n", + " train_ds = tf.data.Dataset.from_tensor_slices((train_files, train_labels))\n", + " train_ds = train_ds.map(load_file).shuffle(5000)\n", + " train_ds = train_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)\n", + "\n", + " test_files = file_paths[train_size:]\n", + " test_labels = labels[train_size:]\n", + " test_ds = tf.data.Dataset.from_tensor_slices((test_files, test_labels))\n", + " test_ds = test_ds.map(load_file)\n", + " test_ds = test_ds.batch(batch_size).prefetch(tf.data.AUTOTUNE)\n", + "\n", + "\n", + " return train_ds, test_ds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8PuuN6el8tv9" + }, + "outputs": [], + "source": [ + "train_data, validation_data = make_datasets(train_size)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MrdZI6FqPJNP" + }, + "source": [ + "# Model Training and Evaluation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jgr7YScGVS58" + }, + "source": [ + "Since we have already added a wrapper around our module to use it as any other layer in Keras, we can create a small [Sequential](https://www.tensorflow.org/api_docs/python/tf/keras/Sequential) model which is a linear stack of layers. We can add our text embedding module with `model.add` just like any other layer. We compile the model by specifying the loss and optimizer and train it for 10 epochs. The `tf.keras` API can handle Tensorflow Datasets as input, so we can pass a Dataset instance to the fit method for model training. Since we are using the generator function, `tf.data` will handle generating the samples, batching them and feeding them to the model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WhCqbDK2uUV5" + }, + "source": [ + "## Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nHUw807XPPM9" + }, + "outputs": [], + "source": [ + "def create_model():\n", + " model = tf.keras.Sequential([\n", + " tf.keras.layers.Input(shape=[], dtype=tf.string),\n", + " embedding_layer,\n", + " tf.keras.layers.Dense(64, activation=\"relu\"),\n", + " tf.keras.layers.Dense(16, activation=\"relu\"),\n", + " tf.keras.layers.Dense(5),\n", + " ])\n", + " model.compile(loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " optimizer=\"adam\", metrics=['accuracy'])\n", + " return model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5J4EXJUmPVNG" + }, + "outputs": [], + "source": [ + "model = create_model()\n", + "# Create earlystopping callback\n", + "early_stopping_callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZZ7XJLg2u2No" + }, + "source": [ + "## Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OoBkN2tAaXWD" + }, + "outputs": [], + "source": [ + "history = model.fit(train_data, \n", + " validation_data=validation_data, \n", + " epochs=5, \n", + " callbacks=[early_stopping_callback])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XoDk8otmMoT7" + }, + "source": [ + "## Evaluation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G5ZRKGOsXEh4" + }, + "source": [ + "We can visualize the accuracy and loss curves for training and validation data using the `tf.keras.callbacks.History` object returned by the `tf.keras.Model.fit` method, which contains the loss and accuracy value for each epoch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "V6tOnByIOeGn" + }, + "outputs": [], + "source": [ + "# Plot training & validation accuracy values\n", + "plt.plot(history.history['accuracy'])\n", + "plt.plot(history.history['val_accuracy'])\n", + "plt.title('Model accuracy')\n", + "plt.ylabel('Accuracy')\n", + "plt.xlabel('Epoch')\n", + "plt.legend(['Train', 'Test'], loc='upper left')\n", + "plt.show()\n", + "\n", + "# Plot training & validation loss values\n", + "plt.plot(history.history['loss'])\n", + "plt.plot(history.history['val_loss'])\n", + "plt.title('Model loss')\n", + "plt.ylabel('Loss')\n", + "plt.xlabel('Epoch')\n", + "plt.legend(['Train', 'Test'], loc='upper left')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D54IXLqcG8Cq" + }, + "source": [ + "## Prediction\n", + "\n", + "We can get the predictions for the validation data and check the confusion matrix to see the model's performance for each of the 5 classes. Because `tf.keras.Model.predict` method returns an n-d array for probabilities for each class, they can be converted to class labels using `np.argmax`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dptEywzZJk4l" + }, + "outputs": [], + "source": [ + "y_pred = model.predict(validation_data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7Dzeml6Pk0ub" + }, + "outputs": [], + "source": [ + "y_pred = np.argmax(y_pred, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T4M3Lzg8jHcB" + }, + "outputs": [], + "source": [ + "samples = file_paths[0:3]\n", + "for i, sample in enumerate(samples):\n", + " f = open(sample)\n", + " text = f.read()\n", + " print(text[0:100])\n", + " print(\"True Class: \", sample.split(\"/\")[0])\n", + " print(\"Predicted Class: \", dir_names[y_pred[i]])\n", + " f.close()\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PlDTIpMBu6h-" + }, + "source": [ + "## Compare Performance\n", + "\n", + "Now we can take the correct labels for the validation data from `labels` and compare them with our predictions to get a [classification_report](http://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mqrERUCS1Xn7" + }, + "outputs": [], + "source": [ + "y_true = np.array(labels[train_size:])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NX5w-NuTKuVP" + }, + "outputs": [], + "source": [ + "print(classification_report(y_true, y_pred, target_names=dir_names))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p5e9m3bV6oXK" + }, + "source": [ + "We can also compare our model's performance with the published results obtained in the original [paper](https://www.researchgate.net/publication/328214545_BARD_Bangla_Article_Classification_Using_a_New_Comprehensive_Dataset), which had a 0.96 precision .The original authors described many preprocessing steps performed on the dataset, such as dropping punctuations and digits, removing top 25 most frequest stop words. As we can see in the `classification_report`, we also manage to obtain a 0.96 precision and accuracy after training for only 5 epochs without any preprocessing! \n", + "\n", + "In this example, when we created the Keras layer from our embedding module, we set the parameter`trainable=False`, which means the embedding weights will not be updated during training. Try setting it to `True` to reach around 97% accuracy using this dataset after only 2 epochs. " + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "IDdZSPcLtKx4" + ], + "name": "bangla_article_classifier.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/bert_experts.ipynb b/site/en/hub/tutorials/bert_experts.ipynb new file mode 100644 index 00000000000..5440909f7cb --- /dev/null +++ b/site/en/hub/tutorials/bert_experts.ipynb @@ -0,0 +1,286 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "-1vOMEXIhMQt" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "pRfq9ZU5hQhg" + }, + "outputs": [], + "source": [ + "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mTL0TERThT6z" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FkthMlVk8bHp" + }, + "source": [ + "# BERT Experts from TF-Hub\n", + "\n", + "This colab demonstrates how to:\n", + "* Load BERT models from [TensorFlow Hub](https://tfhub.dev) that have been trained on different tasks including MNLI, SQuAD, and PubMed\n", + "* Use a matching preprocessing model to tokenize raw text and convert it to ids\n", + "* Generate the pooled and sequence output from the token input ids using the loaded model\n", + "* Look at the semantic similarity of the pooled outputs of different sentences\n", + "\n", + "#### Note: This colab should be run with a GPU runtime" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jspO02jDPfPG" + }, + "source": [ + "## Set up and imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r-ed8zj-dbwm" + }, + "outputs": [], + "source": [ + "!pip install --quiet \"tensorflow-text==2.11.*\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "czDmtrGKYw_5" + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "from sklearn.metrics import pairwise\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import tensorflow_text as text # Imports TF ops for preprocessing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "GSuDcPSaY5aB" + }, + "outputs": [], + "source": [ + "#@title Configure the model { run: \"auto\" }\n", + "BERT_MODEL = \"https://tfhub.dev/google/experts/bert/wiki_books/2/" # @param {type: \"string\"} [\"https://tfhub.dev/google/experts/bert/wiki_books/2/", \"https://tfhub.dev/google/experts/bert/wiki_books/mnli/2/", \"https://tfhub.dev/google/experts/bert/wiki_books/qnli/2/", \"https://tfhub.dev/google/experts/bert/wiki_books/qqp/2/", \"https://tfhub.dev/google/experts/bert/wiki_books/squad2/2/", \"https://tfhub.dev/google/experts/bert/wiki_books/sst2/2/", \"https://tfhub.dev/google/experts/bert/pubmed/2/", \"https://tfhub.dev/google/experts/bert/pubmed/squad2/2/"]\n", + "# Preprocessing must match the model, but all the above use the same.\n", + "PREPROCESS_MODEL = \"https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3/"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pvaZiGVgwtqw" + }, + "source": [ + "## Sentences\n", + "\n", + "Let's take some sentences from Wikipedia to run through the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tytu-rSpeDNG" + }, + "outputs": [], + "source": [ + "sentences = [\n", + " \"Here We Go Then, You And I is a 1999 album by Norwegian pop artist Morten Abel. It was Abel's second CD as a solo artist.\",\n", + " \"The album went straight to number one on the Norwegian album chart, and sold to double platinum.\",\n", + " \"Among the singles released from the album were the songs \\\"Be My Lover\\\" and \\\"Hard To Stay Awake\\\".\",\n", + " \"Riccardo Zegna is an Italian jazz musician.\",\n", + " \"Rajko Maksimović is a composer, writer, and music pedagogue.\",\n", + " \"One of the most significant Serbian composers of our time, Maksimović has been and remains active in creating works for different ensembles.\",\n", + " \"Ceylon spinach is a common name for several plants and may refer to: Basella alba Talinum fruticosum\",\n", + " \"A solar eclipse occurs when the Moon passes between Earth and the Sun, thereby totally or partly obscuring the image of the Sun for a viewer on Earth.\",\n", + " \"A partial solar eclipse occurs in the polar regions of the Earth when the center of the Moon's shadow misses the Earth.\",\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zI39475kxCKh" + }, + "source": [ + "## Run the model\n", + "\n", + "We'll load the BERT model from TF-Hub, tokenize our sentences using the matching preprocessing model from TF-Hub, then feed in the tokenized sentences to the model. To keep this colab fast and simple, we recommend running on GPU.\n", + "\n", + "Go to **Runtime** → **Change runtime type** to make sure that **GPU** is selected" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "x4t6r22ErQg0" + }, + "outputs": [], + "source": [ + "preprocess = hub.load(PREPROCESS_MODEL)\n", + "bert = hub.load(BERT_MODEL)\n", + "inputs = preprocess(sentences)\n", + "outputs = bert(inputs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gItjCg4315Cv" + }, + "outputs": [], + "source": [ + "print(\"Sentences:\")\n", + "print(sentences)\n", + "\n", + "print(\"\\nBERT inputs:\")\n", + "print(inputs)\n", + "\n", + "print(\"\\nPooled embeddings:\")\n", + "print(outputs[\"pooled_output\"])\n", + "\n", + "print(\"\\nPer token embeddings:\")\n", + "print(outputs[\"sequence_output\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ptiW2mgw6x-l" + }, + "source": [ + "## Semantic similarity\n", + "\n", + "Now let's take a look at the `pooled_output` embeddings of our sentences and compare how similar they are across sentences." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "GXrSO2Vc1Qtr" + }, + "outputs": [], + "source": [ + "#@title Helper functions\n", + "\n", + "def plot_similarity(features, labels):\n", + " \"\"\"Plot a similarity matrix of the embeddings.\"\"\"\n", + " cos_sim = pairwise.cosine_similarity(features)\n", + " sns.set(font_scale=1.2)\n", + " cbar_kws=dict(use_gridspec=False, location=\"left\")\n", + " g = sns.heatmap(\n", + " cos_sim, xticklabels=labels, yticklabels=labels,\n", + " vmin=0, vmax=1, cmap=\"Blues\", cbar_kws=cbar_kws)\n", + " g.tick_params(labelright=True, labelleft=False)\n", + " g.set_yticklabels(labels, rotation=0)\n", + " g.set_title(\"Semantic Textual Similarity\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "td6jcT0pJMZ5" + }, + "outputs": [], + "source": [ + "plot_similarity(outputs[\"pooled_output\"], sentences)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tJ4QCyzhSL7B" + }, + "source": [ + "## Learn more\n", + "\n", + "* Find more BERT models on [TensorFlow Hub](https://tfhub.dev)\n", + "* This notebook demonstrates simple inference with BERT, you can find a more advanced tutorial about fine-tuning BERT at [tensorflow.org/official_models/fine_tuning_bert](https://www.tensorflow.org/official_models/fine_tuning_bert)\n", + "* We used just one GPU chip to run the model, you can learn more about how to load models using tf.distribute at [tensorflow.org/tutorials/distribute/save_and_load](https://www.tensorflow.org/tutorials/distribute/save_and_load)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "bert_experts.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb b/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb new file mode 100644 index 00000000000..919abc7e354 --- /dev/null +++ b/site/en/hub/tutorials/bigbigan_with_tf_hub.ipynb @@ -0,0 +1,713 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "pLOYL1PJAAtK" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3fJWQ8WSAFhh" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-1NTVIH6ABK-" + }, + "source": [ + "# Generating Images with BigBiGAN\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AVvOoEhswyZg" + }, + "source": [ + "This notebook is a demo for the *BigBiGAN* models available on [TF Hub](https://tfhub.dev/s?publisher=deepmind&q=bigbigan).\n", + "\n", + "BigBiGAN extends standard (Big)GANs by adding an *encoder* module which can be used for unsupervised representation learning. Roughly speaking, the encoder inverts the generator by predicting latents `z` given real data `x`. See the [BigBiGAN paper on arXiv](https://arxiv.org/abs/1907.02544) [1] for more information about these models.\n", + "\n", + "After connecting to a runtime, get started by following these instructions:\n", + "\n", + "1. (Optional) Update the selected **`module_path`** in the first code cell below to load a BigBiGAN generator for a different encoder architecture.\n", + "2. Click **Runtime > Run all** to run each cell in order. Afterwards, the outputs, including visualizations of BigBiGAN samples and reconstructions, should automatically appear below.\n", + "\n", + "Note: if you run into any issues, it can help to click **Runtime > Restart and run all...** to restart your runtime and rerun all cells from scratch.\n", + "\n", + "[1] Jeff Donahue and Karen Simonyan. [Large Scale Adversarial Representation Learning](https://arxiv.org/abs/1907.02544). *arxiv:1907.02544*, 2019." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DtGFwUKOA9jt" + }, + "source": [ + "First, set the module path.\n", + "By default, we load the BigBiGAN model with the smaller ResNet-50-based encoder from **`https://tfhub.dev/deepmind/bigbigan-resnet50/1`**.\n", + "To load the larger RevNet-50-x4 based model used to achieve the best representation learning results, comment out the active **`module_path`** setting and uncomment the other." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xoY9pl0FBoUS" + }, + "outputs": [], + "source": [ + "module_path = 'https://tfhub.dev/deepmind/bigbigan-resnet50/1' # ResNet-50\n", + "# module_path = 'https://tfhub.dev/deepmind/bigbigan-revnet50x4/1' # RevNet-50 x4" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Lr01cszC_vcC" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TPdT-hYj1XXQ" + }, + "outputs": [], + "source": [ + "import io\n", + "import IPython.display\n", + "import PIL.Image\n", + "from pprint import pformat\n", + "\n", + "import numpy as np\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "\n", + "import tensorflow_hub as hub" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ouePZy6-CFJl" + }, + "source": [ + "## Define some functions to display images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MBQPtmrY2N91" + }, + "outputs": [], + "source": [ + "def imgrid(imarray, cols=4, pad=1, padval=255, row_major=True):\n", + " \"\"\"Lays out a [N, H, W, C] image array as a single image grid.\"\"\"\n", + " pad = int(pad)\n", + " if pad < 0:\n", + " raise ValueError('pad must be non-negative')\n", + " cols = int(cols)\n", + " assert cols >= 1\n", + " N, H, W, C = imarray.shape\n", + " rows = N // cols + int(N % cols != 0)\n", + " batch_pad = rows * cols - N\n", + " assert batch_pad >= 0\n", + " post_pad = [batch_pad, pad, pad, 0]\n", + " pad_arg = [[0, p] for p in post_pad]\n", + " imarray = np.pad(imarray, pad_arg, 'constant', constant_values=padval)\n", + " H += pad\n", + " W += pad\n", + " grid = (imarray\n", + " .reshape(rows, cols, H, W, C)\n", + " .transpose(0, 2, 1, 3, 4)\n", + " .reshape(rows*H, cols*W, C))\n", + " if pad:\n", + " grid = grid[:-pad, :-pad]\n", + " return grid\n", + "\n", + "def interleave(*args):\n", + " \"\"\"Interleaves input arrays of the same shape along the batch axis.\"\"\"\n", + " if not args:\n", + " raise ValueError('At least one argument is required.')\n", + " a0 = args[0]\n", + " if any(a.shape != a0.shape for a in args):\n", + " raise ValueError('All inputs must have the same shape.')\n", + " if not a0.shape:\n", + " raise ValueError('Inputs must have at least one axis.')\n", + " out = np.transpose(args, [1, 0] + list(range(2, len(a0.shape) + 1)))\n", + " out = out.reshape(-1, *a0.shape[1:])\n", + " return out\n", + "\n", + "def imshow(a, format='png', jpeg_fallback=True):\n", + " \"\"\"Displays an image in the given format.\"\"\"\n", + " a = a.astype(np.uint8)\n", + " data = io.BytesIO()\n", + " PIL.Image.fromarray(a).save(data, format)\n", + " im_data = data.getvalue()\n", + " try:\n", + " disp = IPython.display.display(IPython.display.Image(im_data))\n", + " except IOError:\n", + " if jpeg_fallback and format != 'jpeg':\n", + " print ('Warning: image was too large to display in format \"{}\"; '\n", + " 'trying jpeg instead.').format(format)\n", + " return imshow(a, format='jpeg')\n", + " else:\n", + " raise\n", + " return disp\n", + "\n", + "def image_to_uint8(x):\n", + " \"\"\"Converts [-1, 1] float array to [0, 255] uint8.\"\"\"\n", + " x = np.asarray(x)\n", + " x = (256. / 2.) * (x + 1.)\n", + " x = np.clip(x, 0, 255)\n", + " x = x.astype(np.uint8)\n", + " return x" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8ASXPMb6CaXR" + }, + "source": [ + "## Load a BigBiGAN TF Hub module and display its available functionality" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IuG7G1ToCtaf" + }, + "outputs": [], + "source": [ + "# module = hub.Module(module_path, trainable=True, tags={'train'}) # training\n", + "module = hub.Module(module_path) # inference\n", + "\n", + "for signature in module.get_signature_names():\n", + " print('Signature:', signature)\n", + " print('Inputs:', pformat(module.get_input_info_dict(signature)))\n", + " print('Outputs:', pformat(module.get_output_info_dict(signature)))\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sAY-AmcNCj9_" + }, + "source": [ + "## Define a wrapper class for convenient access to various functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aTKHkxfx1dAL" + }, + "outputs": [], + "source": [ + "class BigBiGAN(object):\n", + "\n", + " def __init__(self, module):\n", + " \"\"\"Initialize a BigBiGAN from the given TF Hub module.\"\"\"\n", + " self._module = module\n", + "\n", + " def generate(self, z, upsample=False):\n", + " \"\"\"Run a batch of latents z through the generator to generate images.\n", + "\n", + " Args:\n", + " z: A batch of 120D Gaussian latents, shape [N, 120].\n", + "\n", + " Returns: a batch of generated RGB images, shape [N, 128, 128, 3], range\n", + " [-1, 1].\n", + " \"\"\"\n", + " outputs = self._module(z, signature='generate', as_dict=True)\n", + " return outputs['upsampled' if upsample else 'default']\n", + "\n", + " def make_generator_ph(self):\n", + " \"\"\"Creates a tf.placeholder with the dtype & shape of generator inputs.\"\"\"\n", + " info = self._module.get_input_info_dict('generate')['z']\n", + " return tf.placeholder(dtype=info.dtype, shape=info.get_shape())\n", + "\n", + " def gen_pairs_for_disc(self, z):\n", + " \"\"\"Compute generator input pairs (G(z), z) for discriminator, given z.\n", + "\n", + " Args:\n", + " z: A batch of latents (120D standard Gaussians), shape [N, 120].\n", + "\n", + " Returns: a tuple (G(z), z) of discriminator inputs.\n", + " \"\"\"\n", + " # Downsample 256x256 image x for 128x128 discriminator input.\n", + " x = self.generate(z)\n", + " return x, z\n", + "\n", + " def encode(self, x, return_all_features=False):\n", + " \"\"\"Run a batch of images x through the encoder.\n", + "\n", + " Args:\n", + " x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n", + " [-1, 1].\n", + " return_all_features: If True, return all features computed by the encoder.\n", + " Otherwise (default) just return a sample z_hat.\n", + "\n", + " Returns: the sample z_hat of shape [N, 120] (or a dict of all features if\n", + " return_all_features).\n", + " \"\"\"\n", + " outputs = self._module(x, signature='encode', as_dict=True)\n", + " return outputs if return_all_features else outputs['z_sample']\n", + "\n", + " def make_encoder_ph(self):\n", + " \"\"\"Creates a tf.placeholder with the dtype & shape of encoder inputs.\"\"\"\n", + " info = self._module.get_input_info_dict('encode')['x']\n", + " return tf.placeholder(dtype=info.dtype, shape=info.get_shape())\n", + "\n", + " def enc_pairs_for_disc(self, x):\n", + " \"\"\"Compute encoder input pairs (x, E(x)) for discriminator, given x.\n", + "\n", + " Args:\n", + " x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n", + " [-1, 1].\n", + "\n", + " Returns: a tuple (downsample(x), E(x)) of discriminator inputs.\n", + " \"\"\"\n", + " # Downsample 256x256 image x for 128x128 discriminator input.\n", + " x_down = tf.nn.avg_pool(x, ksize=2, strides=2, padding='SAME')\n", + " z = self.encode(x)\n", + " return x_down, z\n", + "\n", + " def discriminate(self, x, z):\n", + " \"\"\"Compute the discriminator scores for pairs of data (x, z).\n", + "\n", + " (x, z) must be batches with the same leading batch dimension, and joint\n", + " scores are computed on corresponding pairs x[i] and z[i].\n", + "\n", + " Args:\n", + " x: A batch of data (128x128 RGB images), shape [N, 128, 128, 3], range\n", + " [-1, 1].\n", + " z: A batch of latents (120D standard Gaussians), shape [N, 120].\n", + "\n", + " Returns:\n", + " A dict of scores:\n", + " score_xz: the joint scores for the (x, z) pairs.\n", + " score_x: the unary scores for x only.\n", + " score_z: the unary scores for z only.\n", + " \"\"\"\n", + " inputs = dict(x=x, z=z)\n", + " return self._module(inputs, signature='discriminate', as_dict=True)\n", + "\n", + " def reconstruct_x(self, x, use_sample=True, upsample=False):\n", + " \"\"\"Compute BigBiGAN reconstructions of images x via G(E(x)).\n", + "\n", + " Args:\n", + " x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n", + " [-1, 1].\n", + " use_sample: takes a sample z_hat ~ E(x). Otherwise, deterministically\n", + " use the mean. (Though a sample z_hat may be far from the mean z,\n", + " typically the resulting recons G(z_hat) and G(z) are very\n", + " similar.\n", + " upsample: if set, upsample the reconstruction to the input resolution\n", + " (256x256). Otherwise return the raw lower resolution generator output\n", + " (128x128).\n", + "\n", + " Returns: a batch of recons G(E(x)), shape [N, 256, 256, 3] if\n", + " `upsample`, otherwise [N, 128, 128, 3].\n", + " \"\"\"\n", + " if use_sample:\n", + " z = self.encode(x)\n", + " else:\n", + " z = self.encode(x, return_all_features=True)['z_mean']\n", + " recons = self.generate(z, upsample=upsample)\n", + " return recons\n", + "\n", + " def losses(self, x, z):\n", + " \"\"\"Compute per-module BigBiGAN losses given data & latent sample batches.\n", + "\n", + " Args:\n", + " x: A batch of data (256x256 RGB images), shape [N, 256, 256, 3], range\n", + " [-1, 1].\n", + " z: A batch of latents (120D standard Gaussians), shape [M, 120].\n", + "\n", + " For the original BigBiGAN losses, pass batches of size N=M=2048, with z's\n", + " sampled from a 120D standard Gaussian (e.g., np.random.randn(2048, 120)),\n", + " and x's sampled from the ImageNet (ILSVRC2012) training set with the\n", + " \"ResNet-style\" preprocessing from:\n", + "\n", + " https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_preprocessing.py\n", + "\n", + " Returns:\n", + " A dict of per-module losses:\n", + " disc: loss for the discriminator.\n", + " enc: loss for the encoder.\n", + " gen: loss for the generator.\n", + " \"\"\"\n", + " # Compute discriminator scores on (x, E(x)) pairs.\n", + " # Downsample 256x256 image x for 128x128 discriminator input.\n", + " scores_enc_x_dict = self.discriminate(*self.enc_pairs_for_disc(x))\n", + " scores_enc_x = tf.concat([scores_enc_x_dict['score_xz'],\n", + " scores_enc_x_dict['score_x'],\n", + " scores_enc_x_dict['score_z']], axis=0)\n", + "\n", + " # Compute discriminator scores on (G(z), z) pairs.\n", + " scores_gen_z_dict = self.discriminate(*self.gen_pairs_for_disc(z))\n", + " scores_gen_z = tf.concat([scores_gen_z_dict['score_xz'],\n", + " scores_gen_z_dict['score_x'],\n", + " scores_gen_z_dict['score_z']], axis=0)\n", + "\n", + " disc_loss_enc_x = tf.reduce_mean(tf.nn.relu(1. - scores_enc_x))\n", + " disc_loss_gen_z = tf.reduce_mean(tf.nn.relu(1. + scores_gen_z))\n", + " disc_loss = disc_loss_enc_x + disc_loss_gen_z\n", + "\n", + " enc_loss = tf.reduce_mean(scores_enc_x)\n", + " gen_loss = tf.reduce_mean(-scores_gen_z)\n", + "\n", + " return dict(disc=disc_loss, enc=enc_loss, gen=gen_loss)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5L5SFfH4C9gu" + }, + "source": [ + "## Create tensors to be used later for computing samples, reconstructions, discriminator scores, and losses" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "goxtzcb-19NA" + }, + "outputs": [], + "source": [ + "bigbigan = BigBiGAN(module)\n", + "\n", + "# Make input placeholders for x (`enc_ph`) and z (`gen_ph`).\n", + "enc_ph = bigbigan.make_encoder_ph()\n", + "gen_ph = bigbigan.make_generator_ph()\n", + "\n", + "# Compute samples G(z) from encoder input z (`gen_ph`).\n", + "gen_samples = bigbigan.generate(gen_ph)\n", + "\n", + "# Compute reconstructions G(E(x)) of encoder input x (`enc_ph`).\n", + "recon_x = bigbigan.reconstruct_x(enc_ph, upsample=True)\n", + "\n", + "# Compute encoder features used for representation learning evaluations given\n", + "# encoder input x (`enc_ph`).\n", + "enc_features = bigbigan.encode(enc_ph, return_all_features=True)\n", + "\n", + "# Compute discriminator scores for encoder pairs (x, E(x)) given x (`enc_ph`)\n", + "# and generator pairs (G(z), z) given z (`gen_ph`).\n", + "disc_scores_enc = bigbigan.discriminate(*bigbigan.enc_pairs_for_disc(enc_ph))\n", + "disc_scores_gen = bigbigan.discriminate(*bigbigan.gen_pairs_for_disc(gen_ph))\n", + "\n", + "# Compute losses.\n", + "losses = bigbigan.losses(enc_ph, gen_ph)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ly7LWnSUDQ_P" + }, + "source": [ + "## Create a TensorFlow session and initialize variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CPnzCHDWFJwx" + }, + "outputs": [], + "source": [ + "init = tf.global_variables_initializer()\n", + "sess = tf.Session()\n", + "sess.run(init)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gcEVS26D-ues" + }, + "source": [ + "# Generator samples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LYSA8Zvb-w7S" + }, + "source": [ + "First, we'll visualize samples from the pretrained BigBiGAN generator by sampling generator inputs `z` from a standard Gaussian (via `np.random.randn`) and displaying the images it produces. So far we're not going beyond the capabilites of a standard GAN -- we're just using the generator (and ignoring the encoder) for now." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9zfpvw8fGNMr" + }, + "outputs": [], + "source": [ + "feed_dict = {gen_ph: np.random.randn(32, 120)}\n", + "_out_samples = sess.run(gen_samples, feed_dict=feed_dict)\n", + "print('samples shape:', _out_samples.shape)\n", + "imshow(imgrid(image_to_uint8(_out_samples), cols=4))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9v58CTfl8jTc" + }, + "source": [ + "# Load `test_images` from the TF-Flowers dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "o0kmzQ4EqKJt" + }, + "source": [ + "BigBiGAN is trained on ImageNet, but as it's too large to work with in this demo, we use the smaller TF-Flowers [1] dataset as our inputs for visualizing reconstructions and computing encoder features.\n", + "\n", + "In this cell we load TF-Flowers (downloading the dataset if needed) and store a fixed batch of 256x256 RGB image samples in a NumPy array `test_images`.\n", + "\n", + "[1] https://www.tensorflow.org/datasets/catalog/tf_flowers" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OBgpkMdkUjL-" + }, + "outputs": [], + "source": [ + "def get_flowers_data():\n", + " \"\"\"Returns a [32, 256, 256, 3] np.array of preprocessed TF-Flowers samples.\"\"\"\n", + " import tensorflow_datasets as tfds\n", + " ds, info = tfds.load('tf_flowers', split='train', with_info=True)\n", + "\n", + " # Just get the images themselves as we don't need labels for this demo.\n", + " ds = ds.map(lambda x: x['image'])\n", + "\n", + " # Filter out small images (with minor edge length <256).\n", + " ds = ds.filter(lambda x: tf.reduce_min(tf.shape(x)[:2]) >= 256)\n", + "\n", + " # Take the center square crop of the image and resize to 256x256.\n", + " def crop_and_resize(image):\n", + " imsize = tf.shape(image)[:2]\n", + " minor_edge = tf.reduce_min(imsize)\n", + " start = (imsize - minor_edge) // 2\n", + " stop = start + minor_edge\n", + " cropped_image = image[start[0] : stop[0], start[1] : stop[1]]\n", + " resized_image = tf.image.resize_bicubic([cropped_image], [256, 256])[0]\n", + " return resized_image\n", + " ds = ds.map(crop_and_resize)\n", + "\n", + " # Convert images from [0, 255] uint8 to [-1, 1] float32.\n", + " ds = ds.map(lambda image: tf.cast(image, tf.float32) / (255. / 2.) - 1)\n", + "\n", + " # Take the first 32 samples.\n", + " ds = ds.take(32)\n", + "\n", + " return np.array(list(tfds.as_numpy(ds)))\n", + "\n", + "test_images = get_flowers_data()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QAFJQU597n2A" + }, + "source": [ + "# Reconstructions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EmCQ9N9b7ptM" + }, + "source": [ + "Now we visualize BigBiGAN reconstructions by passing real images through the encoder and back through the generator, computing `G(E(x))` given images `x`.\n", + "Below, input images `x` are shown in the left column, and corresponding reconstructions are shown on the right.\n", + "\n", + "Note that reconstructions are not pixel-perfect matches to the input images; rather, they tend to capture the higher level semantic content of the input while \"forgetting\" most of the low-level detail. This suggests the BigBiGAN encoder may learn to capture the types of high level semantic information about images that we'd like to see in a representation learning approach.\n", + "\n", + "Also note that the raw reconstructions of the 256x256 input images are at the lower resolution produced by our generator -- 128x128. We upsample them for visualization purposes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R2F3eq8aFRle" + }, + "outputs": [], + "source": [ + "test_images_batch = test_images[:16]\n", + "_out_recons = sess.run(recon_x, feed_dict={enc_ph: test_images_batch})\n", + "print('reconstructions shape:', _out_recons.shape)\n", + "\n", + "inputs_and_recons = interleave(test_images_batch, _out_recons)\n", + "print('inputs_and_recons shape:', inputs_and_recons.shape)\n", + "imshow(imgrid(image_to_uint8(inputs_and_recons), cols=2))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zPpW3qdbEpXL" + }, + "source": [ + "# Encoder features" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2gAW76YxEsZa" + }, + "source": [ + "We now demonstrate how to compute features from the encoder used for standard representation learning evaluations.\n", + "\n", + "These features could be used in a linear or nearest neighbors-based classifier. We include the standard feature taken after the global average pooling (key `avepool_feat`) as well as the larger \"BN+CReLU\" feature (key `bn_crelu_feat`) used to achieve the best results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hpZYe5S_FQEw" + }, + "outputs": [], + "source": [ + "_out_features = sess.run(enc_features, feed_dict={enc_ph: test_images_batch})\n", + "print('AvePool features shape:', _out_features['avepool_feat'].shape)\n", + "print('BN+CReLU features shape:', _out_features['bn_crelu_feat'].shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TGzahsms2w9a" + }, + "source": [ + "# Discriminator scores and losses" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B2_5BIBN21Hr" + }, + "source": [ + "Finally, we'll compute the discriminator scores and losses on batches of encoder and generator pairs. These losses could be passed into an optimizer to train BigBiGAN.\n", + "\n", + "We use our batch of images above as the encoder inputs `x`, computing the encoder score as `D(x, E(x))`. For the generator inputs we sample `z` from a 120D standard Gaussian via `np.random.randn`, computing the generator score as `D(G(z), z)`.\n", + "\n", + "The discriminator predicts a joint score `score_xz` for the `(x, z)` pairs as well as unary scores `score_x` and `score_z` for `x` and `z` alone, respectively. It's trained to give high (positive) scores to encoder pairs and low (negative) scores to generator pairs. This mostly holds below, though the unary `score_z` is negative in both cases, indicating that the encoder outputs `E(x)` resemble actual samples from a Gaussian." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8JJ8Go0dr22-" + }, + "outputs": [], + "source": [ + "feed_dict = {enc_ph: test_images, gen_ph: np.random.randn(32, 120)}\n", + "_out_scores_enc, _out_scores_gen, _out_losses = sess.run(\n", + " [disc_scores_enc, disc_scores_gen, losses], feed_dict=feed_dict)\n", + "print('Encoder scores:', {k: v.mean() for k, v in _out_scores_enc.items()})\n", + "print('Generator scores:', {k: v.mean() for k, v in _out_scores_gen.items()})\n", + "print('Losses:', _out_losses)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "9v58CTfl8jTc" + ], + "name": "bigbigan_with_tf_hub.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb b/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb new file mode 100644 index 00000000000..e388f91fbcc --- /dev/null +++ b/site/en/hub/tutorials/biggan_generation_with_tf_hub.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "pLOYL1PJAAtK" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3fJWQ8WSAFhh" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cd1dhL4Ykbm7" + }, + "source": [ + "# Generating Images with BigGAN\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-1NTVIH6ABK-" + }, + "source": [ + "This notebook is a demo for the *BigGAN* image generators available on [TF Hub](https://tfhub.dev/s?publisher=deepmind&q=biggan).\n", + "\n", + "See the [BigGAN paper on arXiv](https://arxiv.org/abs/1809.11096) [1] for more information about these models.\n", + "\n", + "After connecting to a runtime, get started by following these instructions:\n", + "\n", + "1. (Optional) Update the selected **`module_path`** in the first code cell below to load a BigGAN generator for a different image resolution.\n", + "2. Click **Runtime > Run all** to run each cell in order.\n", + " * Afterwards, the interactive visualizations should update automatically when you modify the settings using the sliders and dropdown menus.\n", + " * If not, press the **Play** button by the cell to re-render outputs manually.\n", + "\n", + "Note: if you run into any issues, it can help to click **Runtime > Restart and run all...** to restart your runtime and rerun all cells from scratch.\n", + "\n", + "[1] Andrew Brock, Jeff Donahue, and Karen Simonyan. [Large Scale GAN Training for High Fidelity Natural Image Synthesis](https://arxiv.org/abs/1809.11096). *arxiv:1809.11096*, 2018." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XS1_N6hKj8cz" + }, + "source": [ + "First, set the module path.\n", + "By default, we load the BigGAN-deep generator for 256x256 images from **`https://tfhub.dev/deepmind/biggan-deep-256/1`**.\n", + "To generate 128x128 or 512x512 images or to use the original BigGAN generators, comment out the active **`module_path`** setting and uncomment one of the others." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OJCIhQPClKJ1" + }, + "outputs": [], + "source": [ + "# BigGAN-deep models\n", + "# module_path = 'https://tfhub.dev/deepmind/biggan-deep-128/1' # 128x128 BigGAN-deep\n", + "module_path = 'https://tfhub.dev/deepmind/biggan-deep-256/1' # 256x256 BigGAN-deep\n", + "# module_path = 'https://tfhub.dev/deepmind/biggan-deep-512/1' # 512x512 BigGAN-deep\n", + "\n", + "# BigGAN (original) models\n", + "# module_path = 'https://tfhub.dev/deepmind/biggan-128/2' # 128x128 BigGAN\n", + "# module_path = 'https://tfhub.dev/deepmind/biggan-256/2' # 256x256 BigGAN\n", + "# module_path = 'https://tfhub.dev/deepmind/biggan-512/2' # 512x512 BigGAN" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JJrTM6hAi0CJ" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lOZnst2jeWDL" + }, + "outputs": [], + "source": [ + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "\n", + "import os\n", + "import io\n", + "import IPython.display\n", + "import numpy as np\n", + "import PIL.Image\n", + "from scipy.stats import truncnorm\n", + "import tensorflow_hub as hub\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "stWb21nlcyCm" + }, + "source": [ + "## Load a BigGAN generator module from TF Hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tVgwgJiCH3PV" + }, + "outputs": [], + "source": [ + "tf.reset_default_graph()\n", + "print('Loading BigGAN module from:', module_path)\n", + "module = hub.Module(module_path)\n", + "inputs = {k: tf.placeholder(v.dtype, v.get_shape().as_list(), k)\n", + " for k, v in module.get_input_info_dict().items()}\n", + "output = module(inputs)\n", + "\n", + "print()\n", + "print('Inputs:\\n', '\\n'.join(\n", + " ' {}: {}'.format(*kv) for kv in inputs.items()))\n", + "print()\n", + "print('Output:', output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ry62-8SWfuds" + }, + "source": [ + "## Define some functions for sampling and displaying BigGAN images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "46M8prJPDEsV" + }, + "outputs": [], + "source": [ + "input_z = inputs['z']\n", + "input_y = inputs['y']\n", + "input_trunc = inputs['truncation']\n", + "\n", + "dim_z = input_z.shape.as_list()[1]\n", + "vocab_size = input_y.shape.as_list()[1]\n", + "\n", + "def truncated_z_sample(batch_size, truncation=1., seed=None):\n", + " state = None if seed is None else np.random.RandomState(seed)\n", + " values = truncnorm.rvs(-2, 2, size=(batch_size, dim_z), random_state=state)\n", + " return truncation * values\n", + "\n", + "def one_hot(index, vocab_size=vocab_size):\n", + " index = np.asarray(index)\n", + " if len(index.shape) == 0:\n", + " index = np.asarray([index])\n", + " assert len(index.shape) == 1\n", + " num = index.shape[0]\n", + " output = np.zeros((num, vocab_size), dtype=np.float32)\n", + " output[np.arange(num), index] = 1\n", + " return output\n", + "\n", + "def one_hot_if_needed(label, vocab_size=vocab_size):\n", + " label = np.asarray(label)\n", + " if len(label.shape) <= 1:\n", + " label = one_hot(label, vocab_size)\n", + " assert len(label.shape) == 2\n", + " return label\n", + "\n", + "def sample(sess, noise, label, truncation=1., batch_size=8,\n", + " vocab_size=vocab_size):\n", + " noise = np.asarray(noise)\n", + " label = np.asarray(label)\n", + " num = noise.shape[0]\n", + " if len(label.shape) == 0:\n", + " label = np.asarray([label] * num)\n", + " if label.shape[0] != num:\n", + " raise ValueError('Got # noise samples ({}) != # label samples ({})'\n", + " .format(noise.shape[0], label.shape[0]))\n", + " label = one_hot_if_needed(label, vocab_size)\n", + " ims = []\n", + " for batch_start in range(0, num, batch_size):\n", + " s = slice(batch_start, min(num, batch_start + batch_size))\n", + " feed_dict = {input_z: noise[s], input_y: label[s], input_trunc: truncation}\n", + " ims.append(sess.run(output, feed_dict=feed_dict))\n", + " ims = np.concatenate(ims, axis=0)\n", + " assert ims.shape[0] == num\n", + " ims = np.clip(((ims + 1) / 2.0) * 256, 0, 255)\n", + " ims = np.uint8(ims)\n", + " return ims\n", + "\n", + "def interpolate(A, B, num_interps):\n", + " if A.shape != B.shape:\n", + " raise ValueError('A and B must have the same shape to interpolate.')\n", + " alphas = np.linspace(0, 1, num_interps)\n", + " return np.array([(1-a)*A + a*B for a in alphas])\n", + "\n", + "def imgrid(imarray, cols=5, pad=1):\n", + " if imarray.dtype != np.uint8:\n", + " raise ValueError('imgrid input imarray must be uint8')\n", + " pad = int(pad)\n", + " assert pad >= 0\n", + " cols = int(cols)\n", + " assert cols >= 1\n", + " N, H, W, C = imarray.shape\n", + " rows = N // cols + int(N % cols != 0)\n", + " batch_pad = rows * cols - N\n", + " assert batch_pad >= 0\n", + " post_pad = [batch_pad, pad, pad, 0]\n", + " pad_arg = [[0, p] for p in post_pad]\n", + " imarray = np.pad(imarray, pad_arg, 'constant', constant_values=255)\n", + " H += pad\n", + " W += pad\n", + " grid = (imarray\n", + " .reshape(rows, cols, H, W, C)\n", + " .transpose(0, 2, 1, 3, 4)\n", + " .reshape(rows*H, cols*W, C))\n", + " if pad:\n", + " grid = grid[:-pad, :-pad]\n", + " return grid\n", + "\n", + "def imshow(a, format='png', jpeg_fallback=True):\n", + " a = np.asarray(a, dtype=np.uint8)\n", + " data = io.BytesIO()\n", + " PIL.Image.fromarray(a).save(data, format)\n", + " im_data = data.getvalue()\n", + " try:\n", + " disp = IPython.display.display(IPython.display.Image(im_data))\n", + " except IOError:\n", + " if jpeg_fallback and format != 'jpeg':\n", + " print(('Warning: image was too large to display in format \"{}\"; '\n", + " 'trying jpeg instead.').format(format))\n", + " return imshow(a, format='jpeg')\n", + " else:\n", + " raise\n", + " return disp" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uCeCg3Sdf8Nv" + }, + "source": [ + "## Create a TensorFlow session and initialize variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rYJor5bOaVn1" + }, + "outputs": [], + "source": [ + "initializer = tf.global_variables_initializer()\n", + "sess = tf.Session()\n", + "sess.run(initializer)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SeZ7u3rWd9jz" + }, + "source": [ + "# Explore BigGAN samples of a particular category\n", + "\n", + "Try varying the **`truncation`** value.\n", + "\n", + "(Double-click on the cell to view code.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HuCO9tv3IKT2" + }, + "outputs": [], + "source": [ + "#@title Category-conditional sampling { display-mode: \"form\", run: \"auto\" }\n", + "\n", + "num_samples = 10 #@param {type:\"slider\", min:1, max:20, step:1}\n", + "truncation = 0.4 #@param {type:\"slider\", min:0.02, max:1, step:0.02}\n", + "noise_seed = 0 #@param {type:\"slider\", min:0, max:100, step:1}\n", + "category = \"933) cheeseburger\"\n", + "\n", + "z = truncated_z_sample(num_samples, truncation, noise_seed)\n", + "y = int(category.split(')')[0])\n", + "\n", + "ims = sample(sess, z, y, truncation=truncation)\n", + "imshow(imgrid(ims, cols=min(num_samples, 5)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hHNXtvuQgKwa" + }, + "source": [ + "# Interpolate between BigGAN samples\n", + "\n", + "Try setting different **`category`**s with the same **`noise_seed`**s, or the same **`category`**s with different **`noise_seed`**s. Or go wild and set both any way you like!\n", + "\n", + "(Double-click on the cell to view code.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dSAyfDfnVugs" + }, + "outputs": [], + "source": [ + "#@title Interpolation { display-mode: \"form\", run: \"auto\" }\n", + "\n", + "num_samples = 2 #@param {type:\"slider\", min:1, max:5, step:1}\n", + "num_interps = 5 #@param {type:\"slider\", min:2, max:10, step:1}\n", + "truncation = 0.2 #@param {type:\"slider\", min:0.02, max:1, step:0.02}\n", + "noise_seed_A = 0 #@param {type:\"slider\", min:0, max:100, step:1}\n", + "category_A = \"207) golden retriever\"\n", + "noise_seed_B = 0 #@param {type:\"slider\", min:0, max:100, step:1}\n", + "category_B = \"8) hen\"\n", + "\n", + "def interpolate_and_shape(A, B, num_interps):\n", + " interps = interpolate(A, B, num_interps)\n", + " return (interps.transpose(1, 0, *range(2, len(interps.shape)))\n", + " .reshape(num_samples * num_interps, *interps.shape[2:]))\n", + "\n", + "z_A, z_B = [truncated_z_sample(num_samples, truncation, noise_seed)\n", + " for noise_seed in [noise_seed_A, noise_seed_B]]\n", + "y_A, y_B = [one_hot([int(category.split(')')[0])] * num_samples)\n", + " for category in [category_A, category_B]]\n", + "\n", + "z_interp = interpolate_and_shape(z_A, z_B, num_interps)\n", + "y_interp = interpolate_and_shape(y_A, y_B, num_interps)\n", + "\n", + "ims = sample(sess, z_interp, y_interp, truncation=truncation)\n", + "imshow(imgrid(ims, cols=num_interps))" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "pLOYL1PJAAtK" + ], + "name": "biggan_generation_with_tf_hub.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/bird_vocalization_classifier.ipynb b/site/en/hub/tutorials/bird_vocalization_classifier.ipynb new file mode 100644 index 00000000000..563be9b425a --- /dev/null +++ b/site/en/hub/tutorials/bird_vocalization_classifier.ipynb @@ -0,0 +1,375 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "QD3FvutQsaqc" + }, + "source": [ + "##### Copyright 2023 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-5fm9kVRsfuG" + }, + "outputs": [], + "source": [ + "#@title Copyright 2023 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QNDQZiSGtXMu" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1JAO_rv_QEBr" + }, + "source": [ + "# Using Google Bird Vocalization model\n", + "\n", + "The Google Bird Vocalization is a global bird embedding and classification model.\n", + "\n", + "This model expects as input a 5-second audio segment sampled at 32kHz\n", + "\n", + "The model outputs both the logits and the embeddigs for each input window of audio.\n", + "\n", + "On this notebook you'll learn how to feed the audio properly to the model and how to use the logits for inference.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bytIYq0MjEKT" + }, + "outputs": [], + "source": [ + "!pip install -q \"tensorflow_io==0.28.*\"\n", + "!pip install -q librosa" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aXXTdq-eq6lk" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import tensorflow_io as tfio\n", + "\n", + "import numpy as np\n", + "import librosa\n", + "\n", + "import csv\n", + "import io\n", + "\n", + "from IPython.display import Audio" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "B6mFpgMWQjgk" + }, + "source": [ + "Loading the Model from TFHub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CQ1P3IkpQiya" + }, + "outputs": [], + "source": [ + "model_handle = \"https://tfhub.dev/google/bird-vocalization-classifier/1/"\n", + "model = hub.load(model_handle)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3OOw23B3fZT6" + }, + "source": [ + "Lets load the labels that the model was trained on.\n", + "\n", + "The labels file is in the assets forlder under label.csv. Each line is an ebird id." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f5i-R4k9ZhwN" + }, + "outputs": [], + "source": [ + "# Find the name of the class with the top score when mean-aggregated across frames.\n", + "def class_names_from_csv(class_map_csv_text):\n", + " \"\"\"Returns list of class names corresponding to score vector.\"\"\"\n", + " with open(labels_path) as csv_file:\n", + " csv_reader = csv.reader(csv_file, delimiter=',')\n", + " class_names = [mid for mid, desc in csv_reader]\n", + " return class_names[1:]\n", + "\n", + "labels_path = hub.resolve(model_handle) + \"/assets/label.csv\"\n", + "classes = class_names_from_csv(labels_path)\n", + "print(classes)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "b2JYPafeRRi_" + }, + "source": [ + "The ```frame_audio``` function is based on the [Chirp lib](https://github.com/google-research/chirp/blob/10c5faa325a3c3468fa6f18a736fc1aeb9bf8129/chirp/inference/interface.py#L128) version but using tf.signal instead of librosa.\n", + "\n", + "The `ensure_sample_rate` is a function to make sure that any audio used with the model has the expected sample rate of 32kHz" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "t65gi_DTrRaa" + }, + "outputs": [], + "source": [ + "def frame_audio(\n", + " audio_array: np.ndarray,\n", + " window_size_s: float = 5.0,\n", + " hop_size_s: float = 5.0,\n", + " sample_rate = 32000,\n", + " ) -> np.ndarray:\n", + " \"\"\"Helper function for framing audio for inference.\"\"\"\n", + " if window_size_s is None or window_size_s < 0:\n", + " return audio_array[np.newaxis, :]\n", + " frame_length = int(window_size_s * sample_rate)\n", + " hop_length = int(hop_size_s * sample_rate)\n", + " framed_audio = tf.signal.frame(audio_array, frame_length, hop_length, pad_end=True)\n", + " return framed_audio\n", + "\n", + "def ensure_sample_rate(waveform, original_sample_rate,\n", + " desired_sample_rate=32000):\n", + " \"\"\"Resample waveform if required.\"\"\"\n", + " if original_sample_rate != desired_sample_rate:\n", + " waveform = tfio.audio.resample(waveform, original_sample_rate, desired_sample_rate)\n", + " return desired_sample_rate, waveform" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G7uAuI4f6ehb" + }, + "source": [ + "Lets load a file from Wikipedia.\n", + "\n", + "To be more precise, the audio of a [Common Blackbird](https://es.wikipedia.org/wiki/Turdus_merula)\n", + "\n", + "|

\"Common|\n", + "|:--:|\n", + "| *By Andreas Trepte - Own work, CC BY-SA 2.5, Link*

|\n", + "\n", + "\n", + "The audio was contributed by Oona Räisänen (Mysid) under the public domain license." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "whkmGeJ9lmyd" + }, + "outputs": [], + "source": [ + "!curl -O \"https://upload.wikimedia.org/wikipedia/commons/7/7c/Turdus_merula_2.ogg/"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ff6nOV2EurAO" + }, + "outputs": [], + "source": [ + "turdus_merula = \"Turdus_merula_2.ogg\"\n", + "\n", + "audio, sample_rate = librosa.load(turdus_merula)\n", + "\n", + "sample_rate, wav_data_turdus = ensure_sample_rate(audio, sample_rate)\n", + "Audio(wav_data_turdus, rate=sample_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sjpKLk9K7TTV" + }, + "source": [ + "The audio has 24 seconds and the model expects chunks of 5 seconds.\n", + "\n", + "The `frame_audio` function can fix that and split the audio in proper frames" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VzgK0xWw9g8X" + }, + "outputs": [], + "source": [ + "fixed_tm = frame_audio(wav_data_turdus)\n", + "fixed_tm.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rU5-UqaCAVZ7" + }, + "source": [ + "Let's apply the model only on the first frame:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0zveWSOU9QBC" + }, + "outputs": [], + "source": [ + "logits, embeddings = model.infer_tf(fixed_tm[:1])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "osmRNWciEEuG" + }, + "source": [ + "The label.csv file contains ebirds ids.\n", + "The ebird id for Turdus Merula is eurbla" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "E-UehjA6Acn_" + }, + "outputs": [], + "source": [ + "probabilities = tf.nn.softmax(logits)\n", + "argmax = np.argmax(probabilities)\n", + "print(f\"The audio is from the class {classes[argmax]} (element:{argmax} in the label.csv file), with probability of {probabilities[0][argmax]}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bGK84egXBg2f" + }, + "source": [ + "Lets apply the model on all the frames now:\n", + "\n", + "*note*: this code is also based on the [Chirp library](https://github.com/google-research/chirp/blob/d6ff5e7cee3865940f31697bf4b70176c1072572/chirp/inference/models.py#L174)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UT_Im9i50EGy" + }, + "outputs": [], + "source": [ + "all_logits, all_embeddings = model.infer_tf(fixed_tm[:1])\n", + "for window in fixed_tm[1:]:\n", + " logits, embeddings = model.infer_tf(window[np.newaxis, :])\n", + " all_logits = np.concatenate([all_logits, logits], axis=0)\n", + "\n", + "all_logits.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kKuJWq4SxyR1" + }, + "outputs": [], + "source": [ + "frame = 0\n", + "for frame_logits in all_logits:\n", + " probabilities = tf.nn.softmax(frame_logits)\n", + " argmax = np.argmax(probabilities)\n", + " print(f\"For frame {frame}, the audio is from the class {classes[argmax]} (element:{argmax} in the label.csv file), with probability of {probabilities[argmax]}\")\n", + " frame += 1" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "bird_vocalization_classifier.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/boundless.ipynb b/site/en/hub/tutorials/boundless.ipynb new file mode 100644 index 00000000000..f53fc5bb004 --- /dev/null +++ b/site/en/hub/tutorials/boundless.ipynb @@ -0,0 +1,306 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "9veUEV0CfmHX" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "BlCInyRifxHS" + }, + "outputs": [], + "source": [ + "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_LRMeRxCfzC4" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QOjczJJ4gWHS" + }, + "source": [ + "# Boundless Colab\n", + "\n", + "Welcome to the Boundless model Colab! This notebook will take you through the steps of running the model on images and visualize the results.\n", + "\n", + "## Overview\n", + "\n", + "Boundless is a model for image extrapolation. This model takes an image, internally masks a portion of it ([1/2](https://tfhub.dev/google/boundless/half/1), [1/4](https://tfhub.dev/google/boundless/quarter/1), [3/4](https://tfhub.dev/google/boundless/three_quarter/1)) and completes the masked part. For more details refer to [Boundless: Generative Adversarial Networks for Image Extension](https://arxiv.org/pdf/1908.07007.pdf) or the model documentation on TensorFlow Hub." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hDKbpAEZf8Lt" + }, + "source": [ + "## Imports and setup\n", + "\n", + "Start with the base imports:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xJMFtTqPr7lf" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "from io import BytesIO\n", + "from PIL import Image as PilImage\n", + "import numpy as np\n", + "from matplotlib import pyplot as plt\n", + "from six.moves.urllib.request import urlopen" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pigUDIXtciQO" + }, + "source": [ + "## Create a function for reading an image\n", + "\n", + "Create a utility function to help load an image and format it for the model (257x257x3). This method will also crop the image to a square to avoid distortion and you can use it with local images or from the internet." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KTEVPgXH6rtV" + }, + "outputs": [], + "source": [ + " def read_image(filename):\n", + " fd = None\n", + " if(filename.startswith('http')):\n", + " fd = urlopen(filename)\n", + " else:\n", + " fd = tf.io.gfile.GFile(filename, 'rb')\n", + "\n", + " pil_image = PilImage.open(fd)\n", + " width, height = pil_image.size\n", + " # crop to make the image square\n", + " pil_image = pil_image.crop((0, 0, height, height))\n", + " pil_image = pil_image.resize((257,257),PilImage.LANCZOS)\n", + " image_unscaled = np.array(pil_image)\n", + " image_np = np.expand_dims(\n", + " image_unscaled.astype(np.float32) / 255., axis=0)\n", + " return image_np" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lonrLxuKcsL0" + }, + "source": [ + "## Create a visualization function\n", + "\n", + "Create a visualization function to show the original image side-by-side with the masked version and the \"filled\" version, both generated by the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j7AkoMFG7r-O" + }, + "outputs": [], + "source": [ + "def visualize_output_comparison(img_original, img_masked, img_filled):\n", + " plt.figure(figsize=(24,12))\n", + " plt.subplot(131)\n", + " plt.imshow((np.squeeze(img_original)))\n", + " plt.title(\"Original\", fontsize=24)\n", + " plt.axis('off')\n", + " plt.subplot(132)\n", + " plt.imshow((np.squeeze(img_masked)))\n", + " plt.title(\"Masked\", fontsize=24)\n", + " plt.axis('off')\n", + " plt.subplot(133)\n", + " plt.imshow((np.squeeze(img_filled)))\n", + " plt.title(\"Generated\", fontsize=24)\n", + " plt.axis('off')\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8rwaCWmxdJGH" + }, + "source": [ + "## Load an image\n", + "\n", + "Now you can load a sample image. Feel free to use your own image by uploading it to the Colab notebook. Remember that the model may have some limitations regarding human images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "92w-Jfbm60XA" + }, + "outputs": [], + "source": [ + "wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/3/31/Nusfjord_road%2C_2010_09.jpg/800px-Nusfjord_road%2C_2010_09.jpg/"\n", + "# wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/Beech_forest_M%C3%A1tra_in_winter.jpg/640px-Beech_forest_M%C3%A1tra_in_winter.jpg/"\n", + "# wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/b/b2/Marmolada_Sunset.jpg/640px-Marmolada_Sunset.jpg/"\n", + "# wikimedia = \"https://upload.wikimedia.org/wikipedia/commons/thumb/9/9d/Aegina_sunset.jpg/640px-Aegina_sunset.jpg/"\n", + "\n", + "input_img = read_image(wikimedia)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4lIkmZL_dtyX" + }, + "source": [ + "## Select a model from TensorFlow Hub\n", + "\n", + "On TensorFlow Hub there are three versions of the Boundless model: Half, Quarter and Three Quarters.\n", + "In the following cell you can choose any of the models and apply them on your image. If you want to pick another model, select it below and then run the following cells." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B3myNctEQ5GE" + }, + "outputs": [], + "source": [ + "#@title Model Selection { display-mode: \"form\" }\n", + "model_name = 'Boundless Quarter' # @param ['Boundless Half', 'Boundless Quarter', 'Boundless Three Quarters']\n", + "model_handle_map = {\n", + " 'Boundless Half' : 'https://tfhub.dev/google/boundless/half/1',\n", + " 'Boundless Quarter' : 'https://tfhub.dev/google/boundless/quarter/1', \n", + " 'Boundless Three Quarters' : 'https://tfhub.dev/google/boundless/three_quarter/1'\n", + "}\n", + "\n", + "model_handle = model_handle_map[model_name]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aSJFeNNSeOn8" + }, + "source": [ + "After choosing your model, you can load it from TensorFlow Hub.\n", + "\n", + "**Note**: You can point to a model handle to read the model's documentation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0IDKMNyYSWsj" + }, + "outputs": [], + "source": [ + "print(\"Loading model {} ({})\".format(model_name, model_handle))\n", + "model = hub.load(model_handle)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L4G7CPOaeuQb" + }, + "source": [ + "## Perform inference\n", + "\n", + "The boundless model has two outputs:\n", + "\n", + "* The input image with a mask applied\n", + "* The masked image with the extrapolation to complete it\n", + "\n", + "You can compare these two images with a visualization as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "W7uCAuKxSd-M" + }, + "outputs": [], + "source": [ + "result = model.signatures['default'](tf.constant(input_img))\n", + "generated_image = result['default']\n", + "masked_image = result['masked_image']\n", + "\n", + "visualize_output_comparison(input_img, masked_image, generated_image)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "boundless.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/cord_19_embeddings.ipynb b/site/en/hub/tutorials/cord_19_embeddings.ipynb new file mode 100644 index 00000000000..01f43e5f9a9 --- /dev/null +++ b/site/en/hub/tutorials/cord_19_embeddings.ipynb @@ -0,0 +1,537 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "5wFF5JFyD2Ki" + }, + "source": [ + "#### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Uf6NouXxDqGk" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ORy-KvWXGXBo" + }, + "source": [ + "# Exploring the TF-Hub CORD-19 Swivel Embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9VusdTAH0isl" + }, + "source": [ + "The CORD-19 Swivel text embedding module from TF-Hub (https://tfhub.dev/tensorflow/cord-19/swivel-128d/1)\n", + " was built to support researchers analyzing natural languages text related to COVID-19.\n", + "These embeddings were trained on the titles, authors, abstracts, body texts, and\n", + "reference titles of articles in the [CORD-19 dataset](https://api.semanticscholar.org/CorpusID:216056360).\n", + "\n", + "In this colab we will:\n", + "- Analyze semantically similar words in the embedding space\n", + "- Train a classifier on the SciCite dataset using the CORD-19 embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L69VQv2Z0isl" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ym2nXOPuPV__" + }, + "outputs": [], + "source": [ + "import functools\n", + "import itertools\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "tf.disable_eager_execution()\n", + "tf.logging.set_verbosity('ERROR')\n", + "\n", + "import tensorflow_datasets as tfds\n", + "import tensorflow_hub as hub\n", + "\n", + "try:\n", + " from google.colab import data_table\n", + " def display_df(df):\n", + " return data_table.DataTable(df, include_index=False)\n", + "except ModuleNotFoundError:\n", + " # If google-colab is not available, just display the raw DataFrame\n", + " def display_df(df):\n", + " return df" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_VgRRf2I7tER" + }, + "source": [ + "# Analyze the embeddings\n", + "\n", + "Let's start off by analyzing the embedding by calculating and plotting a correlation matrix between different terms. If the embedding learned to successfully capture the meaning of different words, the embedding vectors of semantically similar words should be close together. Let's take a look at some COVID-19 related terms." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HNN_9bBKSLHU" + }, + "outputs": [], + "source": [ + "# Use the inner product between two embedding vectors as the similarity measure\n", + "def plot_correlation(labels, features):\n", + " corr = np.inner(features, features)\n", + " corr /= np.max(corr)\n", + " sns.heatmap(corr, xticklabels=labels, yticklabels=labels)\n", + "\n", + "\n", + "with tf.Graph().as_default():\n", + " # Load the module\n", + " query_input = tf.placeholder(tf.string)\n", + " module = hub.Module('https://tfhub.dev/tensorflow/cord-19/swivel-128d/1')\n", + " embeddings = module(query_input)\n", + "\n", + " with tf.train.MonitoredTrainingSession() as sess:\n", + "\n", + " # Generate embeddings for some terms\n", + " queries = [\n", + " # Related viruses\n", + " \"coronavirus\", \"SARS\", \"MERS\",\n", + " # Regions\n", + " \"Italy\", \"Spain\", \"Europe\",\n", + " # Symptoms\n", + " \"cough\", \"fever\", \"throat\"\n", + " ]\n", + "\n", + " features = sess.run(embeddings, feed_dict={query_input: queries})\n", + " plot_correlation(queries, features)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Bg-PGqtm8B7K" + }, + "source": [ + "We can see that the embedding successfully captured the meaning of the different terms. Each word is similar to the other words of its cluster (i.e. \"coronavirus\" highly correlates with \"SARS\" and \"MERS\"), while they are different from terms of other clusters (i.e. the similarity between \"SARS\" and \"Spain\" is close to 0).\n", + "\n", + "Now let's see how we can use these embeddings to solve a specific task." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "idJ1jFmH7xMa" + }, + "source": [ + "## SciCite: Citation Intent Classification\n", + "\n", + "This section shows how one can use the embedding for downstream tasks such as text classification. We'll use the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) from TensorFlow Datasets to classify citation intents in academic papers. Given a sentence with a citation from an academic paper, classify whether the main intent of the citation is as background information, use of methods, or comparing results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "-FB19HLfVp2V" + }, + "outputs": [], + "source": [ + "#@title Set up the dataset from TFDS\n", + "\n", + "class Dataset:\n", + " \"\"\"Build a dataset from a TFDS dataset.\"\"\"\n", + " def __init__(self, tfds_name, feature_name, label_name):\n", + " self.dataset_builder = tfds.builder(tfds_name)\n", + " self.dataset_builder.download_and_prepare()\n", + " self.feature_name = feature_name\n", + " self.label_name = label_name\n", + " \n", + " def get_data(self, for_eval):\n", + " splits = THE_DATASET.dataset_builder.info.splits\n", + " if tfds.Split.TEST in splits:\n", + " split = tfds.Split.TEST if for_eval else tfds.Split.TRAIN\n", + " else:\n", + " SPLIT_PERCENT = 80\n", + " split = \"train[{}%:]\".format(SPLIT_PERCENT) if for_eval else \"train[:{}%]\".format(SPLIT_PERCENT)\n", + " return self.dataset_builder.as_dataset(split=split)\n", + "\n", + " def num_classes(self):\n", + " return self.dataset_builder.info.features[self.label_name].num_classes\n", + "\n", + " def class_names(self):\n", + " return self.dataset_builder.info.features[self.label_name].names\n", + "\n", + " def preprocess_fn(self, data):\n", + " return data[self.feature_name], data[self.label_name]\n", + "\n", + " def example_fn(self, data):\n", + " feature, label = self.preprocess_fn(data)\n", + " return {'feature': feature, 'label': label}, label\n", + "\n", + "\n", + "def get_example_data(dataset, num_examples, **data_kw):\n", + " \"\"\"Show example data\"\"\"\n", + " with tf.Session() as sess:\n", + " batched_ds = dataset.get_data(**data_kw).take(num_examples).map(dataset.preprocess_fn).batch(num_examples)\n", + " it = tf.data.make_one_shot_iterator(batched_ds).get_next()\n", + " data = sess.run(it)\n", + " return data\n", + "\n", + "\n", + "TFDS_NAME = 'scicite' #@param {type: \"string\"}\n", + "TEXT_FEATURE_NAME = 'string' #@param {type: \"string\"}\n", + "LABEL_NAME = 'label' #@param {type: \"string\"}\n", + "THE_DATASET = Dataset(TFDS_NAME, TEXT_FEATURE_NAME, LABEL_NAME)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "CVjyBD0ZPh4Z" + }, + "outputs": [], + "source": [ + "#@title Let's take a look at a few labeled examples from the training set\n", + "NUM_EXAMPLES = 20 #@param {type:\"integer\"}\n", + "data = get_example_data(THE_DATASET, NUM_EXAMPLES, for_eval=False)\n", + "display_df(\n", + " pd.DataFrame({\n", + " TEXT_FEATURE_NAME: [ex.decode('utf8') for ex in data[0]],\n", + " LABEL_NAME: [THE_DATASET.class_names()[x] for x in data[1]]\n", + " }))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "65s9UpYJ_1ct" + }, + "source": [ + "## Training a citaton intent classifier\n", + "\n", + "We'll train a classifier on the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) using an Estimator. Let's set up the input_fns to read the dataset into the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "OldapWmKSGsW" + }, + "outputs": [], + "source": [ + "def preprocessed_input_fn(for_eval):\n", + " data = THE_DATASET.get_data(for_eval=for_eval)\n", + " data = data.map(THE_DATASET.example_fn, num_parallel_calls=1)\n", + " return data\n", + "\n", + "\n", + "def input_fn_train(params):\n", + " data = preprocessed_input_fn(for_eval=False)\n", + " data = data.repeat(None)\n", + " data = data.shuffle(1024)\n", + " data = data.batch(batch_size=params['batch_size'])\n", + " return data\n", + "\n", + "\n", + "def input_fn_eval(params):\n", + " data = preprocessed_input_fn(for_eval=True)\n", + " data = data.repeat(1)\n", + " data = data.batch(batch_size=params['batch_size'])\n", + " return data\n", + "\n", + "\n", + "def input_fn_predict(params):\n", + " data = preprocessed_input_fn(for_eval=True)\n", + " data = data.batch(batch_size=params['batch_size'])\n", + " return data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KcrmWUkVKg2u" + }, + "source": [ + "Let's build a model which use the CORD-19 embeddings with a classification layer on top." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ff0uKqJCA9zh" + }, + "outputs": [], + "source": [ + "def model_fn(features, labels, mode, params):\n", + " # Embed the text\n", + " embed = hub.Module(params['module_name'], trainable=params['trainable_module'])\n", + " embeddings = embed(features['feature'])\n", + "\n", + " # Add a linear layer on top\n", + " logits = tf.layers.dense(\n", + " embeddings, units=THE_DATASET.num_classes(), activation=None)\n", + " predictions = tf.argmax(input=logits, axis=1)\n", + "\n", + " if mode == tf.estimator.ModeKeys.PREDICT:\n", + " return tf.estimator.EstimatorSpec(\n", + " mode=mode,\n", + " predictions={\n", + " 'logits': logits,\n", + " 'predictions': predictions,\n", + " 'features': features['feature'],\n", + " 'labels': features['label']\n", + " })\n", + " \n", + " # Set up a multi-class classification head\n", + " loss = tf.nn.sparse_softmax_cross_entropy_with_logits(\n", + " labels=labels, logits=logits)\n", + " loss = tf.reduce_mean(loss)\n", + "\n", + " if mode == tf.estimator.ModeKeys.TRAIN:\n", + " optimizer = tf.train.GradientDescentOptimizer(learning_rate=params['learning_rate'])\n", + " train_op = optimizer.minimize(loss, global_step=tf.train.get_or_create_global_step())\n", + " return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)\n", + "\n", + " elif mode == tf.estimator.ModeKeys.EVAL:\n", + " accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions)\n", + " precision = tf.metrics.precision(labels=labels, predictions=predictions)\n", + " recall = tf.metrics.recall(labels=labels, predictions=predictions)\n", + "\n", + " return tf.estimator.EstimatorSpec(\n", + " mode=mode,\n", + " loss=loss,\n", + " eval_metric_ops={\n", + " 'accuracy': accuracy,\n", + " 'precision': precision,\n", + " 'recall': recall,\n", + " })\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "yZUclu8xBYlj" + }, + "outputs": [], + "source": [ + "#@title Hyperparmeters { run: \"auto\" }\n", + "\n", + "EMBEDDING = 'https://tfhub.dev/tensorflow/cord-19/swivel-128d/1' #@param {type: \"string\"}\n", + "TRAINABLE_MODULE = False #@param {type: \"boolean\"}\n", + "STEPS = 8000#@param {type: \"integer\"}\n", + "EVAL_EVERY = 200 #@param {type: \"integer\"}\n", + "BATCH_SIZE = 10 #@param {type: \"integer\"}\n", + "LEARNING_RATE = 0.01 #@param {type: \"number\"}\n", + "\n", + "params = {\n", + " 'batch_size': BATCH_SIZE,\n", + " 'learning_rate': LEARNING_RATE,\n", + " 'module_name': EMBEDDING,\n", + " 'trainable_module': TRAINABLE_MODULE\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "weZKWK-pLBll" + }, + "source": [ + "## Train and evaluate the model\n", + "\n", + "Let's train and evaluate the model to see the performance on the SciCite task" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cO1FWkZW2WS9" + }, + "outputs": [], + "source": [ + "estimator = tf.estimator.Estimator(functools.partial(model_fn, params=params))\n", + "metrics = []\n", + "\n", + "for step in range(0, STEPS, EVAL_EVERY):\n", + " estimator.train(input_fn=functools.partial(input_fn_train, params=params), steps=EVAL_EVERY)\n", + " step_metrics = estimator.evaluate(input_fn=functools.partial(input_fn_eval, params=params))\n", + " print('Global step {}: loss {:.3f}, accuracy {:.3f}'.format(step, step_metrics['loss'], step_metrics['accuracy']))\n", + " metrics.append(step_metrics)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RUNGAeyf1ygC" + }, + "outputs": [], + "source": [ + "global_steps = [x['global_step'] for x in metrics]\n", + "fig, axes = plt.subplots(ncols=2, figsize=(20,8))\n", + "\n", + "for axes_index, metric_names in enumerate([['accuracy', 'precision', 'recall'],\n", + " ['loss']]):\n", + " for metric_name in metric_names:\n", + " axes[axes_index].plot(global_steps, [x[metric_name] for x in metrics], label=metric_name)\n", + " axes[axes_index].legend()\n", + " axes[axes_index].set_xlabel(\"Global Step\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1biWylvB6ayg" + }, + "source": [ + "We can see that the loss quickly decreases while especially the accuracy rapidly increases. Let's plot some examples to check how the prediction relates to the true labels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zK_NJXtoyG2o" + }, + "outputs": [], + "source": [ + "predictions = estimator.predict(functools.partial(input_fn_predict, params))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nlxFER_Oriam" + }, + "outputs": [], + "source": [ + "first_10_predictions = list(itertools.islice(predictions, 10))\n", + "\n", + "display_df(\n", + " pd.DataFrame({\n", + " TEXT_FEATURE_NAME: [pred['features'].decode('utf8') for pred in first_10_predictions],\n", + " LABEL_NAME: [THE_DATASET.class_names()[pred['labels']] for pred in first_10_predictions],\n", + " 'prediction': [THE_DATASET.class_names()[pred['predictions']] for pred in first_10_predictions]\n", + " }))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OSGcrkE069_Q" + }, + "source": [ + "We can see that for this random sample, the model predicts the correct label most of the times, indicating that it can embed scientific sentences pretty well." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oLE0kCfO5CIA" + }, + "source": [ + "# What's next?\n", + "\n", + "Now that you've gotten to know a bit more about the CORD-19 Swivel embeddings from TF-Hub, we encourage you to participate in the CORD-19 Kaggle competition to contribute to gaining scientific insights from COVID-19 related academic texts.\n", + "\n", + "* Participate in the [CORD-19 Kaggle Challenge](https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge)\n", + "* Learn more about the [COVID-19 Open Research Dataset (CORD-19)](https://api.semanticscholar.org/CorpusID:216056360)\n", + "* See documentation and more about the TF-Hub embeddings at https://tfhub.dev/tensorflow/cord-19/swivel-128d/1\n", + "* Explore the CORD-19 embedding space with the [TensorFlow Embedding Projector](http://projector.tensorflow.org/?config=https://storage.googleapis.com/tfhub-examples/tensorflow/cord-19/swivel-128d/1/tensorboard/full_projector_config.json)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "5wFF5JFyD2Ki" + ], + "name": "cord_19_embeddings.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb b/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb new file mode 100644 index 00000000000..388de741e34 --- /dev/null +++ b/site/en/hub/tutorials/cord_19_embeddings_keras.ipynb @@ -0,0 +1,421 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "5wFF5JFyD2Ki" + }, + "source": [ + "#### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Uf6NouXxDqGk" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ORy-KvWXGXBo" + }, + "source": [ + "# Exploring the TF-Hub CORD-19 Swivel Embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yI6Mh3-P0_Pk" + }, + "source": [ + "The CORD-19 Swivel text embedding module from TF-Hub (https://tfhub.dev/tensorflow/cord-19/swivel-128d/3)\n", + " was built to support researchers analyzing natural languages text related to COVID-19.\n", + "These embeddings were trained on the titles, authors, abstracts, body texts, and\n", + "reference titles of articles in the [CORD-19 dataset](https://api.semanticscholar.org/CorpusID:216056360).\n", + "\n", + "In this colab we will:\n", + "- Analyze semantically similar words in the embedding space\n", + "- Train a classifier on the SciCite dataset using the CORD-19 embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gVWOrccw0_Pl" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ym2nXOPuPV__" + }, + "outputs": [], + "source": [ + "import functools\n", + "import itertools\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import pandas as pd\n", + "\n", + "import tensorflow as tf\n", + "\n", + "import tensorflow_datasets as tfds\n", + "import tensorflow_hub as hub\n", + "\n", + "from tqdm import trange" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_VgRRf2I7tER" + }, + "source": [ + "# Analyze the embeddings\n", + "\n", + "Let's start off by analyzing the embedding by calculating and plotting a correlation matrix between different terms. If the embedding learned to successfully capture the meaning of different words, the embedding vectors of semantically similar words should be close together. Let's take a look at some COVID-19 related terms." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HNN_9bBKSLHU" + }, + "outputs": [], + "source": [ + "# Use the inner product between two embedding vectors as the similarity measure\n", + "def plot_correlation(labels, features):\n", + " corr = np.inner(features, features)\n", + " corr /= np.max(corr)\n", + " sns.heatmap(corr, xticklabels=labels, yticklabels=labels)\n", + "\n", + "# Generate embeddings for some terms\n", + "queries = [\n", + " # Related viruses\n", + " 'coronavirus', 'SARS', 'MERS',\n", + " # Regions\n", + " 'Italy', 'Spain', 'Europe',\n", + " # Symptoms\n", + " 'cough', 'fever', 'throat'\n", + "]\n", + "\n", + "module = hub.load('https://tfhub.dev/tensorflow/cord-19/swivel-128d/3')\n", + "embeddings = module(queries)\n", + "\n", + "plot_correlation(queries, embeddings)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Bg-PGqtm8B7K" + }, + "source": [ + "We can see that the embedding successfully captured the meaning of the different terms. Each word is similar to the other words of its cluster (i.e. \"coronavirus\" highly correlates with \"SARS\" and \"MERS\"), while they are different from terms of other clusters (i.e. the similarity between \"SARS\" and \"Spain\" is close to 0).\n", + "\n", + "Now let's see how we can use these embeddings to solve a specific task." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "idJ1jFmH7xMa" + }, + "source": [ + "## SciCite: Citation Intent Classification\n", + "\n", + "This section shows how one can use the embedding for downstream tasks such as text classification. We'll use the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) from TensorFlow Datasets to classify citation intents in academic papers. Given a sentence with a citation from an academic paper, classify whether the main intent of the citation is as background information, use of methods, or comparing results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ghc-CzT8DDaZ" + }, + "outputs": [], + "source": [ + "builder = tfds.builder(name='scicite')\n", + "builder.download_and_prepare()\n", + "train_data, validation_data, test_data = builder.as_dataset(\n", + " split=('train', 'validation', 'test'),\n", + " as_supervised=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CVjyBD0ZPh4Z" + }, + "outputs": [], + "source": [ + "#@title Let's take a look at a few labeled examples from the training set\n", + "NUM_EXAMPLES = 10#@param {type:\"integer\"}\n", + "\n", + "TEXT_FEATURE_NAME = builder.info.supervised_keys[0]\n", + "LABEL_NAME = builder.info.supervised_keys[1]\n", + "\n", + "def label2str(numeric_label):\n", + " m = builder.info.features[LABEL_NAME].names\n", + " return m[numeric_label]\n", + "\n", + "data = next(iter(train_data.batch(NUM_EXAMPLES)))\n", + "\n", + "\n", + "pd.DataFrame({\n", + " TEXT_FEATURE_NAME: [ex.numpy().decode('utf8') for ex in data[0]],\n", + " LABEL_NAME: [label2str(x) for x in data[1]]\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "65s9UpYJ_1ct" + }, + "source": [ + "## Training a citaton intent classifier\n", + "\n", + "We'll train a classifier on the [SciCite dataset](https://www.tensorflow.org/datasets/catalog/scicite) using Keras. Let's build a model which use the CORD-19 embeddings with a classification layer on top." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yZUclu8xBYlj" + }, + "outputs": [], + "source": [ + "#@title Hyperparameters { run: \"auto\" }\n", + "\n", + "EMBEDDING = 'https://tfhub.dev/tensorflow/cord-19/swivel-128d/3' #@param {type: \"string\"}\n", + "TRAINABLE_MODULE = False #@param {type: \"boolean\"}\n", + "\n", + "hub_layer = hub.KerasLayer(EMBEDDING, input_shape=[], \n", + " dtype=tf.string, trainable=TRAINABLE_MODULE)\n", + "\n", + "model = tf.keras.Sequential()\n", + "model.add(hub_layer)\n", + "model.add(tf.keras.layers.Dense(3))\n", + "model.summary()\n", + "model.compile(optimizer='adam',\n", + " loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " metrics=['accuracy'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "weZKWK-pLBll" + }, + "source": [ + "## Train and evaluate the model\n", + "\n", + "Let's train and evaluate the model to see the performance on the SciCite task" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cO1FWkZW2WS9" + }, + "outputs": [], + "source": [ + "EPOCHS = 35#@param {type: \"integer\"}\n", + "BATCH_SIZE = 32#@param {type: \"integer\"}\n", + "\n", + "history = model.fit(train_data.shuffle(10000).batch(BATCH_SIZE),\n", + " epochs=EPOCHS,\n", + " validation_data=validation_data.batch(BATCH_SIZE),\n", + " verbose=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2sKE7kEyLJQZ" + }, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "def display_training_curves(training, validation, title, subplot):\n", + " if subplot%10==1: # set up the subplots on the first call\n", + " plt.subplots(figsize=(10,10), facecolor='#F0F0F0')\n", + " plt.tight_layout()\n", + " ax = plt.subplot(subplot)\n", + " ax.set_facecolor('#F8F8F8')\n", + " ax.plot(training)\n", + " ax.plot(validation)\n", + " ax.set_title('model '+ title)\n", + " ax.set_ylabel(title)\n", + " ax.set_xlabel('epoch')\n", + " ax.legend(['train', 'valid.'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nnQfxevhLKld" + }, + "outputs": [], + "source": [ + "display_training_curves(history.history['accuracy'], history.history['val_accuracy'], 'accuracy', 211)\n", + "display_training_curves(history.history['loss'], history.history['val_loss'], 'loss', 212)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BjvtOw72Lpyw" + }, + "source": [ + "## Evaluate the model\n", + "\n", + "And let's see how the model performs. Two values will be returned. Loss (a number which represents our error, lower values are better), and accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y0ExC8D0LX8m" + }, + "outputs": [], + "source": [ + "results = model.evaluate(test_data.batch(512), verbose=2)\n", + "\n", + "for name, value in zip(model.metrics_names, results):\n", + " print('%s: %.3f' % (name, value))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dWp5OWeTL2EW" + }, + "source": [ + "We can see that the loss quickly decreases while especially the accuracy rapidly increases. Let's plot some examples to check how the prediction relates to the true labels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VzHzAOaaOVC0" + }, + "outputs": [], + "source": [ + "prediction_dataset = next(iter(test_data.batch(20)))\n", + "\n", + "prediction_texts = [ex.numpy().decode('utf8') for ex in prediction_dataset[0]]\n", + "prediction_labels = [label2str(x) for x in prediction_dataset[1]]\n", + "\n", + "predictions = [\n", + " label2str(x) for x in np.argmax(model.predict(prediction_texts), axis=-1)]\n", + "\n", + "\n", + "pd.DataFrame({\n", + " TEXT_FEATURE_NAME: prediction_texts,\n", + " LABEL_NAME: prediction_labels,\n", + " 'prediction': predictions\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OSGcrkE069_Q" + }, + "source": [ + "We can see that for this random sample, the model predicts the correct label most of the times, indicating that it can embed scientific sentences pretty well." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oLE0kCfO5CIA" + }, + "source": [ + "# What's next?\n", + "\n", + "Now that you've gotten to know a bit more about the CORD-19 Swivel embeddings from TF-Hub, we encourage you to participate in the CORD-19 Kaggle competition to contribute to gaining scientific insights from COVID-19 related academic texts.\n", + "\n", + "* Participate in the [CORD-19 Kaggle Challenge](https://www.kaggle.com/allen-institute-for-ai/CORD-19-research-challenge)\n", + "* Learn more about the [COVID-19 Open Research Dataset (CORD-19)](https://api.semanticscholar.org/CorpusID:216056360)\n", + "* See documentation and more about the TF-Hub embeddings at https://tfhub.dev/tensorflow/cord-19/swivel-128d/3\n", + "* Explore the CORD-19 embedding space with the [TensorFlow Embedding Projector](http://projector.tensorflow.org/?config=https://storage.googleapis.com/tfhub-examples/tensorflow/cord-19/swivel-128d/3/tensorboard/projector_config.json)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "cord_19_embeddings_keras.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/cropnet_cassava.ipynb b/site/en/hub/tutorials/cropnet_cassava.ipynb new file mode 100644 index 00000000000..926b5395e41 --- /dev/null +++ b/site/en/hub/tutorials/cropnet_cassava.ipynb @@ -0,0 +1,413 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vtNtfcHHoHNP" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jZwnHZ70oUIM" + }, + "source": [ + "# CropNet: Cassava Disease Detection" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6sg9wHP9oR3q" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "grEgSWu2iTxm" + }, + "source": [ + "This notebook shows how to use the CropNet [cassava disease classifier](https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2) model from **TensorFlow Hub**. The model classifies images of cassava leaves into one of 6 classes: *bacterial blight, brown streak disease, green mite, mosaic disease, healthy, or unknown*.\n", + "\n", + "This colab demonstrates how to:\n", + " * Load the https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2 model from **TensorFlow Hub**\n", + " * Load the [cassava](https://www.tensorflow.org/datasets/catalog/cassava) dataset from **TensorFlow Datasets (TFDS)**\n", + " * Classify images of cassava leaves into 4 distinct cassava disease categories or as healthy or unknown.\n", + " * Evaluate the *accuracy* of the classifier and look at how *robust* the model is when applied to out of domain images." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bKn4Fiq2OD7u" + }, + "source": [ + "## Imports and setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LMgjpSoYqJIz" + }, + "outputs": [], + "source": [ + "!pip install matplotlib==3.2.2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "FIP4rkjp45MG" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "import tensorflow_hub as hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "mIqmq_qmWw78" + }, + "outputs": [], + "source": [ + "#@title Helper function for displaying examples\n", + "def plot(examples, predictions=None):\n", + " # Get the images, labels, and optionally predictions\n", + " images = examples['image']\n", + " labels = examples['label']\n", + " batch_size = len(images)\n", + " if predictions is None:\n", + " predictions = batch_size * [None]\n", + "\n", + " # Configure the layout of the grid\n", + " x = np.ceil(np.sqrt(batch_size))\n", + " y = np.ceil(batch_size / x)\n", + " fig = plt.figure(figsize=(x * 6, y * 7))\n", + "\n", + " for i, (image, label, prediction) in enumerate(zip(images, labels, predictions)):\n", + " # Render the image\n", + " ax = fig.add_subplot(x, y, i+1)\n", + " ax.imshow(image, aspect='auto')\n", + " ax.grid(False)\n", + " ax.set_xticks([])\n", + " ax.set_yticks([])\n", + "\n", + " # Display the label and optionally prediction\n", + " x_label = 'Label: ' + name_map[class_names[label]]\n", + " if prediction is not None:\n", + " x_label = 'Prediction: ' + name_map[class_names[prediction]] + '\\n' + x_label\n", + " ax.xaxis.label.set_color('green' if label == prediction else 'red')\n", + " ax.set_xlabel(x_label)\n", + "\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kwrg9yIlaUSb" + }, + "source": [ + "## Dataset\n", + "\n", + "Let's load the *cassava* dataset from TFDS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0rTcnxoSkp31" + }, + "outputs": [], + "source": [ + "dataset, info = tfds.load('cassava', with_info=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GpC71TFDhJFO" + }, + "source": [ + "Let's take a look at the dataset info to learn more about it, like the description and citation and information about how many examples are available" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "btJBMovmbYtR" + }, + "outputs": [], + "source": [ + "info" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QT3XWAtR6BRy" + }, + "source": [ + "The *cassava* dataset has images of cassava leaves with 4 distinct diseases as well as healthy cassava leaves. The model can predict all of these classes as well as sixth class for \"unknown\" when the model is not confident in its prediction." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9NT9q8yyXZfX" + }, + "outputs": [], + "source": [ + "# Extend the cassava dataset classes with 'unknown'\n", + "class_names = info.features['label'].names + ['unknown']\n", + "\n", + "# Map the class names to human readable names\n", + "name_map = dict(\n", + " cmd='Mosaic Disease',\n", + " cbb='Bacterial Blight',\n", + " cgm='Green Mite',\n", + " cbsd='Brown Streak Disease',\n", + " healthy='Healthy',\n", + " unknown='Unknown')\n", + "\n", + "print(len(class_names), 'classes:')\n", + "print(class_names)\n", + "print([name_map[name] for name in class_names])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "I6y_MGxgiW09" + }, + "source": [ + "Before we can feed the data to the model, we need to do a bit of preprocessing. The model expects 224 x 224 images with RGB channel values in [0, 1]. Let's normalize and resize the images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UxtxvqRjh7Nm" + }, + "outputs": [], + "source": [ + "def preprocess_fn(data):\n", + " image = data['image']\n", + "\n", + " # Normalize [0, 255] to [0, 1]\n", + " image = tf.cast(image, tf.float32)\n", + " image = image / 255.\n", + "\n", + " # Resize the images to 224 x 224\n", + " image = tf.image.resize(image, (224, 224))\n", + "\n", + " data['image'] = image\n", + " return data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qz27YrZahdvn" + }, + "source": [ + "Let's take a look at a few examples from the dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "j6LkAxv3f-aJ" + }, + "outputs": [], + "source": [ + "batch = dataset['validation'].map(preprocess_fn).batch(25).as_numpy_iterator()\n", + "examples = next(batch)\n", + "plot(examples)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eHlEAhL3hq2R" + }, + "source": [ + "## Model\n", + "\n", + "Let's load the classifier from TF Hub and get some predictions and see the predictions of the model is on a few examples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "b6eIWkTjIQhS" + }, + "outputs": [], + "source": [ + "classifier = hub.KerasLayer('https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2')\n", + "probabilities = classifier(examples['image'])\n", + "predictions = tf.argmax(probabilities, axis=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MTQA1YAltfRZ" + }, + "outputs": [], + "source": [ + "plot(examples, predictions)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MuFE8A5aZv9z" + }, + "source": [ + "## Evaluation & robustness\n", + "\n", + "Let's measure the *accuracy* of our classifier on a split of the dataset. We can also look at the *robustness* of the model by evaluating its performance on a non-cassava dataset. For image of other plant datasets like iNaturalist or beans, the model should almost always return *unknown*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "0ERcNxs0kHd3" + }, + "outputs": [], + "source": [ + "#@title Parameters {run: \"auto\"}\n", + "\n", + "DATASET = 'cassava' #@param {type:\"string\"} ['cassava', 'beans', 'i_naturalist2017']\n", + "DATASET_SPLIT = 'test' #@param {type:\"string\"} ['train', 'test', 'validation']\n", + "BATCH_SIZE = 32 #@param {type:\"integer\"}\n", + "MAX_EXAMPLES = 1000 #@param {type:\"integer\"}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Mt0-IVmZplbb" + }, + "outputs": [], + "source": [ + "def label_to_unknown_fn(data):\n", + " data['label'] = 5 # Override label to unknown.\n", + " return data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cQYvY3IvY2Nx" + }, + "outputs": [], + "source": [ + "# Preprocess the examples and map the image label to unknown for non-cassava datasets.\n", + "ds = tfds.load(DATASET, split=DATASET_SPLIT).map(preprocess_fn).take(MAX_EXAMPLES)\n", + "dataset_description = DATASET\n", + "if DATASET != 'cassava':\n", + " ds = ds.map(label_to_unknown_fn)\n", + " dataset_description += ' (labels mapped to unknown)'\n", + "ds = ds.batch(BATCH_SIZE)\n", + "\n", + "# Calculate the accuracy of the model\n", + "metric = tf.keras.metrics.Accuracy()\n", + "for examples in ds:\n", + " probabilities = classifier(examples['image'])\n", + " predictions = tf.math.argmax(probabilities, axis=-1)\n", + " labels = examples['label']\n", + " metric.update_state(labels, predictions)\n", + "\n", + "print('Accuracy on %s: %.2f' % (dataset_description, metric.result().numpy()))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rvS18sBExpdL" + }, + "source": [ + "## Learn more\n", + "\n", + "* Learn more about the model on TensorFlow Hub: https://tfhub.dev/google/cropnet/classifier/cassava_disease_V1/2\n", + "* Learn how to build a custom image classifier running on a mobile phone with [ML Kit](https://developers.google.com/ml-kit/custom-models#tfhub) with the [TensorFlow Lite version of this model](https://tfhub.dev/google/lite-model/cropnet/classifier/cassava_disease_V1/1)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "cropnet_cassava.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/cropnet_on_device.ipynb b/site/en/hub/tutorials/cropnet_on_device.ipynb new file mode 100644 index 00000000000..0e1cb1e0b0d --- /dev/null +++ b/site/en/hub/tutorials/cropnet_on_device.ipynb @@ -0,0 +1,724 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "3XX46cTrh6iD" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors. \n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sKrlWr6Kh-mF" + }, + "outputs": [], + "source": [ + "#@title Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DMVmlJ0fAMkH" + }, + "source": [ + "# Fine tuning models for plant disease detection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hk5u_9KN1m-t" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OEHq-hV5sWYO" + }, + "source": [ + "This notebook shows you how to **fine-tune CropNet models from TensorFlow Hub** on a dataset from TFDS or your own crop disease detection dataset.\n", + "\n", + "You will:\n", + "- Load the TFDS cassava dataset or your own data\n", + "- Enrich the data with unknown (negative) examples to get a more robust model\n", + "- Apply image augmentations to the data\n", + "- Load and fine tune a [CropNet model](https://tfhub.dev/s?module-type=image-feature-vector&q=cropnet) from TF Hub\n", + "- Export a TFLite model, ready to be deployed on your app with [Task Library](https://www.tensorflow.org/lite/inference_with_metadata/task_library/image_classifier), [MLKit](https://developers.google.com/ml-kit/vision/image-labeling/custom-models/android) or [TFLite](https://www.tensorflow.org/lite/guide/inference) directly" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dQvS4p807mZf" + }, + "source": [ + "## Imports and Dependencies\n", + "\n", + "Before starting, you'll need to install some of the dependencies that will be needed like [Model Maker](https://www.tensorflow.org/lite/guide/model_maker#installation) and the latest version of TensorFlow Datasets." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5BDTEMtexXE3" + }, + "outputs": [], + "source": [ + "!sudo apt install -q libportaudio2\n", + "## image_classifier library requires numpy <= 1.23.5\n", + "!pip install \"numpy<=1.23.5\"\n", + "!pip install --use-deprecated=legacy-resolver tflite-model-maker-nightly\n", + "!pip install -U tensorflow-datasets\n", + "## scann library requires tensorflow < 2.9.0\n", + "!pip install \"tensorflow<2.9.0\"\n", + "!pip install \"tensorflow-datasets~=4.8.0\" # protobuf>=3.12.2\n", + "!pip install tensorflow-metadata~=1.10.0 # protobuf>=3.13\n", + "## tensorflowjs requires packaging < 20.10\n", + "!pip install \"packaging<20.10\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nekG9Iwgxbx0" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import os\n", + "import seaborn as sns\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "\n", + "from tensorflow_examples.lite.model_maker.core.export_format import ExportFormat\n", + "from tensorflow_examples.lite.model_maker.core.task import image_preprocessing\n", + "\n", + "from tflite_model_maker import image_classifier\n", + "from tflite_model_maker import ImageClassifierDataLoader\n", + "from tflite_model_maker.image_classifier import ModelSpec" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fV0k2Q4x4N_4" + }, + "source": [ + "## Load a TFDS dataset to fine-tune on\n", + "\n", + "Lets use the publicly available [Cassava Leaf Disease dataset](https://www.tensorflow.org/datasets/catalog/cassava) from TFDS." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TTaD5W_1xjUz" + }, + "outputs": [], + "source": [ + "tfds_name = 'cassava'\n", + "(ds_train, ds_validation, ds_test), ds_info = tfds.load(\n", + " name=tfds_name,\n", + " split=['train', 'validation', 'test'],\n", + " with_info=True,\n", + " as_supervised=True)\n", + "TFLITE_NAME_PREFIX = tfds_name" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xDuDGUAxyHtA" + }, + "source": [ + "## Or alternatively load your own data to fine-tune on\n", + "\n", + "Instead of using a TFDS dataset, you can also train on your own data. This code snippet shows how to load your own custom dataset. See [this](https://www.tensorflow.org/datasets/api_docs/python/tfds/folder_dataset/ImageFolder) link for the supported structure of the data. An example is provided here using the publicly available [Cassava Leaf Disease dataset](https://www.tensorflow.org/datasets/catalog/cassava)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k003tLvflHpC" + }, + "outputs": [], + "source": [ + "# data_root_dir = tf.keras.utils.get_file(\n", + "# 'cassavaleafdata.zip',\n", + "# 'https://storage.googleapis.com/emcassavadata/cassavaleafdata.zip',\n", + "# extract=True)\n", + "# data_root_dir = os.path.splitext(data_root_dir)[0] # Remove the .zip extension\n", + "\n", + "# builder = tfds.ImageFolder(data_root_dir)\n", + "\n", + "# ds_info = builder.info\n", + "# ds_train = builder.as_dataset(split='train', as_supervised=True)\n", + "# ds_validation = builder.as_dataset(split='validation', as_supervised=True)\n", + "# ds_test = builder.as_dataset(split='test', as_supervised=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hs3XCVLo4Fa1" + }, + "source": [ + "## Visualize samples from train split\n", + "\n", + "Let's take a look at some examples from the dataset including the class id and the class name for the image samples and their labels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "89GkD60Eyfe0" + }, + "outputs": [], + "source": [ + "_ = tfds.show_examples(ds_train, ds_info)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-KW-n0lV4AZ-" + }, + "source": [ + "## Add images to be used as Unknown examples from TFDS datasets\n", + "\n", + "Add additional unknown (negative) examples to the training dataset and assign a new unknown class label number to them. The goal is to have a model that, when used in practice (e.g. in the field), has the option of predicting \"Unknown\" when it sees something unexpected.\n", + "\n", + "Below you can see a list of datasets that will be used to sample the additional unknown imagery. It includes 3 completely different datasets to increase diversity. One of them is a beans leaf disease dataset, so that the model has exposure to diseased plants other than cassava.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SYDMjRhDkDnd" + }, + "outputs": [], + "source": [ + "UNKNOWN_TFDS_DATASETS = [{\n", + " 'tfds_name': 'imagenet_v2/matched-frequency',\n", + " 'train_split': 'test[:80%]',\n", + " 'test_split': 'test[80%:]',\n", + " 'num_examples_ratio_to_normal': 1.0,\n", + "}, {\n", + " 'tfds_name': 'oxford_flowers102',\n", + " 'train_split': 'train',\n", + " 'test_split': 'test',\n", + " 'num_examples_ratio_to_normal': 1.0,\n", + "}, {\n", + " 'tfds_name': 'beans',\n", + " 'train_split': 'train',\n", + " 'test_split': 'test',\n", + " 'num_examples_ratio_to_normal': 1.0,\n", + "}]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XUM_d0evktGi" + }, + "source": [ + "The UNKNOWN datasets are also loaded from TFDS." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5DdWgBTe8uKR" + }, + "outputs": [], + "source": [ + "# Load unknown datasets.\n", + "weights = [\n", + " spec['num_examples_ratio_to_normal'] for spec in UNKNOWN_TFDS_DATASETS\n", + "]\n", + "num_unknown_train_examples = sum(\n", + " int(w * ds_train.cardinality().numpy()) for w in weights)\n", + "ds_unknown_train = tf.data.Dataset.sample_from_datasets([\n", + " tfds.load(\n", + " name=spec['tfds_name'], split=spec['train_split'],\n", + " as_supervised=True).repeat(-1) for spec in UNKNOWN_TFDS_DATASETS\n", + "], weights).take(num_unknown_train_examples)\n", + "ds_unknown_train = ds_unknown_train.apply(\n", + " tf.data.experimental.assert_cardinality(num_unknown_train_examples))\n", + "ds_unknown_tests = [\n", + " tfds.load(\n", + " name=spec['tfds_name'], split=spec['test_split'], as_supervised=True)\n", + " for spec in UNKNOWN_TFDS_DATASETS\n", + "]\n", + "ds_unknown_test = ds_unknown_tests[0]\n", + "for ds in ds_unknown_tests[1:]:\n", + " ds_unknown_test = ds_unknown_test.concatenate(ds)\n", + "\n", + "# All examples from the unknown datasets will get a new class label number.\n", + "num_normal_classes = len(ds_info.features['label'].names)\n", + "unknown_label_value = tf.convert_to_tensor(num_normal_classes, tf.int64)\n", + "ds_unknown_train = ds_unknown_train.map(lambda image, _:\n", + " (image, unknown_label_value))\n", + "ds_unknown_test = ds_unknown_test.map(lambda image, _:\n", + " (image, unknown_label_value))\n", + "\n", + "# Merge the normal train dataset with the unknown train dataset.\n", + "weights = [\n", + " ds_train.cardinality().numpy(),\n", + " ds_unknown_train.cardinality().numpy()\n", + "]\n", + "ds_train_with_unknown = tf.data.Dataset.sample_from_datasets(\n", + " [ds_train, ds_unknown_train], [float(w) for w in weights])\n", + "ds_train_with_unknown = ds_train_with_unknown.apply(\n", + " tf.data.experimental.assert_cardinality(sum(weights)))\n", + "\n", + "print((f\"Added {ds_unknown_train.cardinality().numpy()} negative examples.\"\n", + " f\"Training dataset has now {ds_train_with_unknown.cardinality().numpy()}\"\n", + " ' examples in total.'))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "am6eKbzt7raH" + }, + "source": [ + "## Apply augmentations" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sxIUP0Flk35V" + }, + "source": [ + "For all the images, to make them more diverse, you'll apply some augmentation, like changes in:\n", + "- Brightness\n", + "- Contrast\n", + "- Saturation\n", + "- Hue\n", + "- Crop\n", + "\n", + "These types of augmentations help make the model more robust to variations in image inputs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q_BiOkXjqRju" + }, + "outputs": [], + "source": [ + "def random_crop_and_random_augmentations_fn(image):\n", + " # preprocess_for_train does random crop and resize internally.\n", + " image = image_preprocessing.preprocess_for_train(image)\n", + " image = tf.image.random_brightness(image, 0.2)\n", + " image = tf.image.random_contrast(image, 0.5, 2.0)\n", + " image = tf.image.random_saturation(image, 0.75, 1.25)\n", + " image = tf.image.random_hue(image, 0.1)\n", + " return image\n", + "\n", + "\n", + "def random_crop_fn(image):\n", + " # preprocess_for_train does random crop and resize internally.\n", + " image = image_preprocessing.preprocess_for_train(image)\n", + " return image\n", + "\n", + "\n", + "def resize_and_center_crop_fn(image):\n", + " image = tf.image.resize(image, (256, 256))\n", + " image = image[16:240, 16:240]\n", + " return image\n", + "\n", + "\n", + "no_augment_fn = lambda image: image\n", + "\n", + "train_augment_fn = lambda image, label: (\n", + " random_crop_and_random_augmentations_fn(image), label)\n", + "eval_augment_fn = lambda image, label: (resize_and_center_crop_fn(image), label)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RUfqE1c3l6my" + }, + "source": [ + "To apply the augmentation, it uses the `map` method from the Dataset class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Uq-NCtaH_h8j" + }, + "outputs": [], + "source": [ + "ds_train_with_unknown = ds_train_with_unknown.map(train_augment_fn)\n", + "ds_validation = ds_validation.map(eval_augment_fn)\n", + "ds_test = ds_test.map(eval_augment_fn)\n", + "ds_unknown_test = ds_unknown_test.map(eval_augment_fn)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DvnwolLiCqYX" + }, + "source": [ + "## Wrap the data into Model Maker friendly format\n", + "\n", + "To use these dataset with Model Maker, they need to be in a ImageClassifierDataLoader class." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OXPWEDFDRlVu" + }, + "outputs": [], + "source": [ + "label_names = ds_info.features['label'].names + ['UNKNOWN']\n", + "\n", + "train_data = ImageClassifierDataLoader(ds_train_with_unknown,\n", + " ds_train_with_unknown.cardinality(),\n", + " label_names)\n", + "validation_data = ImageClassifierDataLoader(ds_validation,\n", + " ds_validation.cardinality(),\n", + " label_names)\n", + "test_data = ImageClassifierDataLoader(ds_test, ds_test.cardinality(),\n", + " label_names)\n", + "unknown_test_data = ImageClassifierDataLoader(ds_unknown_test,\n", + " ds_unknown_test.cardinality(),\n", + " label_names)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j2iDwq2Njpb_" + }, + "source": [ + "## Run training\n", + "\n", + "[TensorFlow Hub](https://tfhub.dev) has multiple models available for Transfer Learning.\n", + "\n", + "Here you can choose one and you can also keep experimenting with other ones to try to get better results.\n", + "\n", + "If you want even more models to try, you can add them from this [collection](https://tfhub.dev/google/collections/image/1).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "5UhNpR0Ex_5-" + }, + "outputs": [], + "source": [ + "#@title Choose a base model\n", + "\n", + "model_name = 'mobilenet_v3_large_100_224' #@param ['cropnet_cassava', 'cropnet_concat', 'cropnet_imagenet', 'mobilenet_v3_large_100_224']\n", + "\n", + "map_model_name = {\n", + " 'cropnet_cassava':\n", + " 'https://tfhub.dev/google/cropnet/feature_vector/cassava_disease_V1/1',\n", + " 'cropnet_concat':\n", + " 'https://tfhub.dev/google/cropnet/feature_vector/concat/1',\n", + " 'cropnet_imagenet':\n", + " 'https://tfhub.dev/google/cropnet/feature_vector/imagenet/1',\n", + " 'mobilenet_v3_large_100_224':\n", + " 'https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5',\n", + "}\n", + "\n", + "model_handle = map_model_name[model_name]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Y1ecXlQgR5Uk" + }, + "source": [ + "To fine tune the model, you will use Model Maker. This makes the overall solution easier since after the training of the model, it'll also convert it to TFLite.\n", + "\n", + "Model Maker makes this conversion be the best one possible and with all the necessary information to easily deploy the model on-device later.\n", + "\n", + "The model spec is how you tell Model Maker which base model you'd like to use." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L8P-VTqJ8GaF" + }, + "outputs": [], + "source": [ + "image_model_spec = ModelSpec(uri=model_handle)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AnWN3kk6jCHf" + }, + "source": [ + "One important detail here is setting `train_whole_model` which will make the base model fine tuned during training. This makes the process slower but the final model has a higher accuracy. Setting `shuffle` will make sure the model sees the data in a random shuffled order which is a best practice for model learning." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KRbSDbnA6Xap" + }, + "outputs": [], + "source": [ + "model = image_classifier.create(\n", + " train_data,\n", + " model_spec=image_model_spec,\n", + " batch_size=128,\n", + " learning_rate=0.03,\n", + " epochs=5,\n", + " shuffle=True,\n", + " train_whole_model=True,\n", + " validation_data=validation_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "buFDW0izBqIQ" + }, + "source": [ + "## Evaluate model on test split" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OYIZ1rlV7lxm" + }, + "outputs": [], + "source": [ + "model.evaluate(test_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YJaReZ_OVU71" + }, + "source": [ + "To have an even better understanding of the fine tuned model, it's good to analyse the confusion matrix. This will show how often one class is predicted as another." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o9_vs1nNKOLF" + }, + "outputs": [], + "source": [ + "def predict_class_label_number(dataset):\n", + " \"\"\"Runs inference and returns predictions as class label numbers.\"\"\"\n", + " rev_label_names = {l: i for i, l in enumerate(label_names)}\n", + " return [\n", + " rev_label_names[o[0][0]]\n", + " for o in model.predict_top_k(dataset, batch_size=128)\n", + " ]\n", + "\n", + "def show_confusion_matrix(cm, labels):\n", + " plt.figure(figsize=(10, 8))\n", + " sns.heatmap(cm, xticklabels=labels, yticklabels=labels, \n", + " annot=True, fmt='g')\n", + " plt.xlabel('Prediction')\n", + " plt.ylabel('Label')\n", + " plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7BWZCKerCNF_" + }, + "outputs": [], + "source": [ + "confusion_mtx = tf.math.confusion_matrix(\n", + " list(ds_test.map(lambda x, y: y)),\n", + " predict_class_label_number(test_data),\n", + " num_classes=len(label_names))\n", + "\n", + "show_confusion_matrix(confusion_mtx, label_names)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ksu9BFULBvmj" + }, + "source": [ + "## Evaluate model on unknown test data\n", + "\n", + "In this evaluation we expect the model to have accuracy of almost 1. All images the model is tested on are not related to the normal dataset and hence we expect the model to predict the \"Unknown\" class label." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f5wvZwliZcJP" + }, + "outputs": [], + "source": [ + "model.evaluate(unknown_test_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jm47Odo5Vaiq" + }, + "source": [ + "Print the confusion matrix." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "E_gEX3oWH1YT" + }, + "outputs": [], + "source": [ + "unknown_confusion_mtx = tf.math.confusion_matrix(\n", + " list(ds_unknown_test.map(lambda x, y: y)),\n", + " predict_class_label_number(unknown_test_data),\n", + " num_classes=len(label_names))\n", + "\n", + "show_confusion_matrix(unknown_confusion_mtx, label_names)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "o2agDx2fCHyd" + }, + "source": [ + "## Export the model as TFLite and SavedModel\n", + "\n", + "Now we can export the trained models in TFLite and SavedModel formats for deploying on-device and using for inference in TensorFlow." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bAFvBmMr7owW" + }, + "outputs": [], + "source": [ + "tflite_filename = f'{TFLITE_NAME_PREFIX}_model_{model_name}.tflite'\n", + "model.export(export_dir='.', tflite_filename=tflite_filename)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Pz0-6To2C4yM" + }, + "outputs": [], + "source": [ + "# Export saved model version.\n", + "model.export(export_dir='.', export_format=ExportFormat.SAVED_MODEL)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4V4GdQqxjEU7" + }, + "source": [ + "## Next steps\n", + "\n", + "The model that you've just trained can be used on mobile devices and even deployed in the field!\n", + "\n", + "**To download the model, click the folder icon for the Files menu on the left side of the colab, and choose the download option.**\n", + "\n", + "The same technique used here could be applied to other plant diseases tasks that might be more suitable for your use case or any other type of image classification task. If you want to follow up and deploy on an Android app, you can continue on this [Android quickstart guide](https://www.tensorflow.org/lite/android/quickstart)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "3XX46cTrh6iD", + "xDuDGUAxyHtA" + ], + "name": "cropnet_on_device.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb b/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb new file mode 100644 index 00000000000..920d197811e --- /dev/null +++ b/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb @@ -0,0 +1,4463 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "RUymE2l9GZfO" + }, + "source": [ + "**Copyright 2019 The TensorFlow Hub Authors.**\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "code", + "id": "JMyTNwSJGGWg" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "co7MV6sX7Xto" + }, + "source": [ + "# Cross-Lingual Similarity and Semantic Search Engine with Multilingual Universal Sentence Encoder\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eAVQGidpL8v5" + }, + "source": [ + "This notebook illustrates how to access the Multilingual Universal Sentence Encoder module and use it for sentence similarity across multiple languages. This module is an extension of the [original Universal Encoder module](https://tfhub.dev/google/universal-sentence-encoder/2).\n", + "\n", + "The notebook is divided as follows:\n", + "\n", + "* The first section shows a visualization of sentences between pair of languages. This is a more academic exercise. \n", + "* In the second section, we show how to build a semantic search engine from a sample of a Wikipedia corpus in multiple languages." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UvNRbHGarYeR" + }, + "source": [ + "## Citation\n", + "\n", + "*Research papers that make use of the models explored in this colab should cite:*\n", + "\n", + "### [Multilingual universal sentence encoder for semantic retrieval](https://arxiv.org/abs/1907.04307)\n", + "Yinfei Yang, Daniel Cer, Amin Ahmad, Mandy Guo, Jax Law, Noah Constant, Gustavo Hernandez Abrego, Steve Yuan, Chris Tar, Yun-Hsuan Sung, Brian Strope, and Ray Kurzweil. 2019.\n", + " arXiv preprint arXiv:1907.04307" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pOTzp8O36CyQ" + }, + "source": [ + "## Setup\n", + "\n", + "This section sets up the environment for access to the Multilingual Universal Sentence Encoder Module and also prepares a set of English sentences and their translations. In the following sections, the multilingual module will be used to compute similarity *across languages*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "lVjNK8shFKOC" + }, + "outputs": [], + "source": [ + "%%capture\n", + "#@title Setup Environment\n", + "# Install the latest Tensorflow version.\n", + "!pip install \"tensorflow-text==2.11.*\"\n", + "!pip install bokeh\n", + "!pip install simpleneighbors[annoy]\n", + "!pip install tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "MSeY-MUQo2Ha" + }, + "outputs": [], + "source": [ + "#@title Setup common imports and functions\n", + "import bokeh\n", + "import bokeh.models\n", + "import bokeh.plotting\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", + "import tensorflow.compat.v2 as tf\n", + "import tensorflow_hub as hub\n", + "from tensorflow_text import SentencepieceTokenizer\n", + "import sklearn.metrics.pairwise\n", + "\n", + "from simpleneighbors import SimpleNeighbors\n", + "from tqdm import tqdm\n", + "from tqdm import trange\n", + "\n", + "def visualize_similarity(embeddings_1, embeddings_2, labels_1, labels_2,\n", + " plot_title,\n", + " plot_width=1200, plot_height=600,\n", + " xaxis_font_size='12pt', yaxis_font_size='12pt'):\n", + "\n", + " assert len(embeddings_1) == len(labels_1)\n", + " assert len(embeddings_2) == len(labels_2)\n", + "\n", + " # arccos based text similarity (Yang et al. 2019; Cer et al. 2019)\n", + " sim = 1 - np.arccos(\n", + " sklearn.metrics.pairwise.cosine_similarity(embeddings_1,\n", + " embeddings_2))/np.pi\n", + "\n", + " embeddings_1_col, embeddings_2_col, sim_col = [], [], []\n", + " for i in range(len(embeddings_1)):\n", + " for j in range(len(embeddings_2)):\n", + " embeddings_1_col.append(labels_1[i])\n", + " embeddings_2_col.append(labels_2[j])\n", + " sim_col.append(sim[i][j])\n", + " df = pd.DataFrame(zip(embeddings_1_col, embeddings_2_col, sim_col),\n", + " columns=['embeddings_1', 'embeddings_2', 'sim'])\n", + "\n", + " mapper = bokeh.models.LinearColorMapper(\n", + " palette=[*reversed(bokeh.palettes.YlOrRd[9])], low=df.sim.min(),\n", + " high=df.sim.max())\n", + "\n", + " p = bokeh.plotting.figure(title=plot_title, x_range=labels_1,\n", + " x_axis_location=\"above\",\n", + " y_range=[*reversed(labels_2)],\n", + " plot_width=plot_width, plot_height=plot_height,\n", + " tools=\"save\",toolbar_location='below', tooltips=[\n", + " ('pair', '@embeddings_1 ||| @embeddings_2'),\n", + " ('sim', '@sim')])\n", + " p.rect(x=\"embeddings_1\", y=\"embeddings_2\", width=1, height=1, source=df,\n", + " fill_color={'field': 'sim', 'transform': mapper}, line_color=None)\n", + "\n", + " p.title.text_font_size = '12pt'\n", + " p.axis.axis_line_color = None\n", + " p.axis.major_tick_line_color = None\n", + " p.axis.major_label_standoff = 16\n", + " p.xaxis.major_label_text_font_size = xaxis_font_size\n", + " p.xaxis.major_label_orientation = 0.25 * np.pi\n", + " p.yaxis.major_label_text_font_size = yaxis_font_size\n", + " p.min_border_right = 300\n", + "\n", + " bokeh.io.output_notebook()\n", + " bokeh.io.show(p)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gk2IRjZFGDsK" + }, + "source": [ + "This is additional boilerplate code where we import the pre-trained ML model we will use to encode text throughout this notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mkmF3w8WGLcM" + }, + "outputs": [], + "source": [ + "# The 16-language multilingual module is the default but feel free\n", + "# to pick others from the list and compare the results.\n", + "module_url = 'https://tfhub.dev/google/universal-sentence-encoder-multilingual/3' #@param ['https://tfhub.dev/google/universal-sentence-encoder-multilingual/3', 'https://tfhub.dev/google/universal-sentence-encoder-multilingual-large/3']\n", + "\n", + "model = hub.load(module_url)\n", + "\n", + "def embed_text(input):\n", + " return model(input)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jhLPq6AROyFk" + }, + "source": [ + "# Visualize Text Similarity Between Languages\n", + "With the sentence embeddings now in hand, we can visualize semantic similarity across different languages." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8xdAogbxJDTD" + }, + "source": [ + "## Computing Text Embeddings\n", + "\n", + "We first define a set of sentences translated to various languages in parallel. Then, we precompute the embeddings for all of our sentences." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q8F4LNGFqOiq" + }, + "outputs": [], + "source": [ + "# Some texts of different lengths in different languages.\n", + "arabic_sentences = ['كلب', 'الجراء لطيفة.', 'أستمتع بالمشي لمسافات طويلة على طول الشاطئ مع كلبي.']\n", + "chinese_sentences = ['狗', '小狗很好。', '我喜欢和我的狗一起沿着海滩散步。']\n", + "english_sentences = ['dog', 'Puppies are nice.', 'I enjoy taking long walks along the beach with my dog.']\n", + "french_sentences = ['chien', 'Les chiots sont gentils.', 'J\\'aime faire de longues promenades sur la plage avec mon chien.']\n", + "german_sentences = ['Hund', 'Welpen sind nett.', 'Ich genieße lange Spaziergänge am Strand entlang mit meinem Hund.']\n", + "italian_sentences = ['cane', 'I cuccioli sono carini.', 'Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.']\n", + "japanese_sentences = ['犬', '子犬はいいです', '私は犬と一緒にビーチを散歩するのが好きです']\n", + "korean_sentences = ['개', '강아지가 좋다.', '나는 나의 개와 해변을 따라 길게 산책하는 것을 즐긴다.']\n", + "russian_sentences = ['собака', 'Милые щенки.', 'Мне нравится подолгу гулять по пляжу со своей собакой.']\n", + "spanish_sentences = ['perro', 'Los cachorros son agradables.', 'Disfruto de dar largos paseos por la playa con mi perro.']\n", + "\n", + "# Multilingual example\n", + "multilingual_example = [\"Willkommen zu einfachen, aber\", \"verrassend krachtige\", \"multilingüe\", \"compréhension du language naturel\", \"модели.\", \"大家是什么意思\" , \"보다 중요한\", \".اللغة التي يتحدثونها\"]\n", + "multilingual_example_in_en = [\"Welcome to simple yet\", \"surprisingly powerful\", \"multilingual\", \"natural language understanding\", \"models.\", \"What people mean\", \"matters more than\", \"the language they speak.\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "weXZqLtTJY9b" + }, + "outputs": [], + "source": [ + "# Compute embeddings.\n", + "ar_result = embed_text(arabic_sentences)\n", + "en_result = embed_text(english_sentences)\n", + "es_result = embed_text(spanish_sentences)\n", + "de_result = embed_text(german_sentences)\n", + "fr_result = embed_text(french_sentences)\n", + "it_result = embed_text(italian_sentences)\n", + "ja_result = embed_text(japanese_sentences)\n", + "ko_result = embed_text(korean_sentences)\n", + "ru_result = embed_text(russian_sentences)\n", + "zh_result = embed_text(chinese_sentences)\n", + "\n", + "multilingual_result = embed_text(multilingual_example)\n", + "multilingual_in_en_result = embed_text(multilingual_example_in_en)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_3zGWuF-GhUm" + }, + "source": [ + "## Visualizing Similarity\n", + "\n", + "With text embeddings in hand, we can take their dot-product to visualize how similar sentences are between languages. A darker color indicates the embeddings are semantically similar." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WOEIJA0mh70g" + }, + "source": [ + "### Multilingual Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "R2hbCMhmiDWR" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"b33996e2-7bdd-4097-888b-8bf79a526bff\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1013\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1016\",\"type\":\"Grid\"},{\"id\":\"1020\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1017\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1030\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1003\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1023\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1005\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1009\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1007\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1011\",\"type\":\"CategoricalScale\"}},\"id\":\"1002\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"Welcome to simple yet\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"surprisingly powerful\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"multilingual\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"natural language understanding\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"models.\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"What people mean\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"matters more than\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\",\"the language they speak.\"],\"embeddings_2\":[\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"Willkommen zu einfachen, aber\",\"verrassend krachtige\",\"multiling\\u00fce\",\"compr\\u00e9hension du langage naturel\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\"],\"index\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63],\"sim\":{\"__ndarray__\":\"AAAAwK4B6j8AAACgVl7iPwAAAMD5l94/AAAAQOVr4T8AAABA8/TfPwAAACDzG+A/AAAAAIa+4T8AAABAMivgPwAAAKBdieE/AAAAgBBH6T8AAAAA5griPwAAAIAUkeE/AAAAAJhp4D8AAACAkSTgPwAAAICvreI/AAAAgIcw4j8AAAAAELXfPwAAAKD19uE/AAAAwH/P6T8AAADAZbjjPwAAAABaX+I/AAAAgG6I4T8AAACAx/viPwAAACBrc+Y/AAAAQApe4T8AAACA32XiPwAAAAD0j+M/AAAAADsK7D8AAADgBnHhPwAAAAAIZeE/AAAAAHZm4T8AAACgXprmPwAAAAB5Jt8/AAAAQMci4D8AAACgPI7hPwAAAKBmSOE/AAAAQArv7D8AAABA7O7fPwAAAID9d+E/AAAAgIRv4T8AAACAN4PgPwAAAMD/yOA/AAAAgEIH4j8AAADAAFbiPwAAAMDwOuE/AAAAQE1v5j8AAABg2bLiPwAAAMDSyOI/AAAAwLXb4D8AAACAztLhPwAAAOB1HeI/AAAAgCd/4T8AAAAg1xHhPwAAAAAPQeE/AAAAAELU6T8AAAAgZjLiPwAAAACyYd8/AAAA4FKw4D8AAACgbUHkPwAAAEDcVeU/AAAAIHV34j8AAAAARznhPwAAAOCXLOE/AAAAQP8H6T8=\",\"dtype\":\"float64\",\"shape\":[64]}},\"selected\":{\"id\":\"1037\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1038\",\"type\":\"UnionRenderers\"}},\"id\":\"1026\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"text\":\"Multilingual Universal Sentence Encoder for Semantic Retrieval (Yang et al., 2019)\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1003\",\"type\":\"Title\"},{\"attributes\":{},\"id\":\"1033\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"callback\":null,\"factors\":[\".\\u0627\\u0644\\u0644\\u063a\\u0629 \\u0627\\u0644\\u062a\\u064a \\u064a\\u062a\\u062d\\u062f\\u062b\\u0648\\u0646\\u0647\\u0627\",\"\\ubcf4\\ub2e4 \\uc911\\uc694\\ud55c\",\"\\u5927\\u5bb6\\u662f\\u4ec0\\u4e48\\u610f\\u601d\",\"\\u043c\\u043e\\u0434\\u0435\\u043b\\u0438.\",\"compr\\u00e9hension du langage naturel\",\"multiling\\u00fce\",\"verrassend krachtige\",\"Willkommen zu einfachen, aber\"]},\"id\":\"1007\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Welcome to simple yet\",\"surprisingly powerful\",\"multilingual\",\"natural language understanding\",\"models.\",\"What people mean\",\"matters more than\",\"the language they speak.\"]},\"id\":\"1005\",\"type\":\"FactorRange\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1021\",\"type\":\"SaveTool\"},{\"id\":\"1022\",\"type\":\"HoverTool\"}]},\"id\":\"1023\",\"type\":\"Toolbar\"},{\"attributes\":{\"data_source\":{\"id\":\"1026\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1028\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1029\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1031\",\"type\":\"CDSView\"}},\"id\":\"1030\",\"type\":\"GlyphRenderer\"},{\"attributes\":{},\"id\":\"1009\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"source\":{\"id\":\"1026\",\"type\":\"ColumnDataSource\"}},\"id\":\"1031\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1011\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1035\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1014\",\"type\":\"CategoricalTicker\"}},\"id\":\"1013\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"ticker\":{\"id\":\"1014\",\"type\":\"CategoricalTicker\"}},\"id\":\"1016\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1014\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1037\",\"type\":\"Selection\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1033\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1018\",\"type\":\"CategoricalTicker\"}},\"id\":\"1017\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1038\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1018\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1035\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1018\",\"type\":\"CategoricalTicker\"}},\"id\":\"1020\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1021\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1022\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1001\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1028\",\"type\":\"Rect\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1029\",\"type\":\"Rect\"},{\"attributes\":{\"high\":0.9041796922683716,\"low\":0.4780258536338806,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1001\",\"type\":\"LinearColorMapper\"}],\"root_ids\":[\"1002\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"b33996e2-7bdd-4097-888b-8bf79a526bff\",\"roots\":{\"1002\":\"c23bc7cf-8f04-4fa0-a38c-20c29e5098b9\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1002" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(multilingual_in_en_result, multilingual_result,\n", + " multilingual_example_in_en, multilingual_example, \"Multilingual Universal Sentence Encoder for Semantic Retrieval (Yang et al., 2019)\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h3TEhllsq3ax" + }, + "source": [ + "### English-Arabic Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q9UDpStmq7Ii" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"a8ebb685-fc25-4a41-b1f1-80df179ccab5\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1093\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1096\",\"type\":\"Grid\"},{\"id\":\"1100\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1097\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1110\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1083\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1103\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1085\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1089\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1087\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1091\",\"type\":\"CategoricalScale\"}},\"id\":\"1082\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"data_source\":{\"id\":\"1106\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1108\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1109\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1111\",\"type\":\"CDSView\"}},\"id\":\"1110\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"1106\",\"type\":\"ColumnDataSource\"}},\"id\":\"1111\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1120\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1109\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1102\",\"type\":\"HoverTool\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1101\",\"type\":\"SaveTool\"},{\"id\":\"1102\",\"type\":\"HoverTool\"}]},\"id\":\"1103\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1094\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1098\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1098\",\"type\":\"CategoricalTicker\"}},\"id\":\"1100\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1122\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{},\"id\":\"1124\",\"type\":\"Selection\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1120\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1098\",\"type\":\"CategoricalTicker\"}},\"id\":\"1097\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1125\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"ticker\":{\"id\":\"1094\",\"type\":\"CategoricalTicker\"}},\"id\":\"1096\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1091\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1122\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1094\",\"type\":\"CategoricalTicker\"}},\"id\":\"1093\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1089\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0643\\u0644\\u0628\"]},\"id\":\"1087\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1085\",\"type\":\"FactorRange\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1081\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1108\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\u0643\\u0644\\u0628\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\",\"\\u0643\\u0644\\u0628\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\",\"\\u0643\\u0644\\u0628\",\"\\u0627\\u0644\\u062c\\u0631\\u0627\\u0621 \\u0644\\u0637\\u064a\\u0641\\u0629.\",\"\\u0623\\u0633\\u062a\\u0645\\u062a\\u0639 \\u0628\\u0627\\u0644\\u0645\\u0634\\u064a \\u0644\\u0645\\u0633\\u0627\\u0641\\u0627\\u062a \\u0637\\u0648\\u064a\\u0644\\u0629 \\u0639\\u0644\\u0649 \\u0637\\u0648\\u0644 \\u0627\\u0644\\u0634\\u0627\\u0637\\u0626 \\u0645\\u0639 \\u0643\\u0644\\u0628\\u064a.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAQLvE6j8AAABAlJHkPwAAAAAOj+A/AAAAwOqU5D8AAACA9aTnPwAAAMACcuA/AAAAAFtl4j8AAACAFvjiPwAAAIB6QOg/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1124\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1125\",\"type\":\"UnionRenderers\"}},\"id\":\"1106\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1101\",\"type\":\"SaveTool\"},{\"attributes\":{\"text\":\"English-Arabic Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1083\",\"type\":\"Title\"},{\"attributes\":{\"high\":0.8365150690078735,\"low\":0.5139173269271851,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1081\",\"type\":\"LinearColorMapper\"}],\"root_ids\":[\"1082\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"a8ebb685-fc25-4a41-b1f1-80df179ccab5\",\"roots\":{\"1082\":\"d20b027b-6bf0-444d-8763-df8d299e1642\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1082" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, ar_result, english_sentences, arabic_sentences, 'English-Arabic Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QF9z48HMp4WL" + }, + "source": [ + "### Engish-Russian Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QE68UejYp86z" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"c09fc8f6-d5fa-4ba4-ae4a-0ce276d613ba\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1180\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1183\",\"type\":\"Grid\"},{\"id\":\"1187\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1184\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1197\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1170\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1190\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1172\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1176\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1174\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1178\",\"type\":\"CategoricalScale\"}},\"id\":\"1169\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1189\",\"type\":\"HoverTool\"},{\"attributes\":{},\"id\":\"1218\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1216\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{},\"id\":\"1219\",\"type\":\"UnionRenderers\"},{\"attributes\":{\"ticker\":{\"id\":\"1181\",\"type\":\"CategoricalTicker\"}},\"id\":\"1183\",\"type\":\"Grid\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1216\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1181\",\"type\":\"CategoricalTicker\"}},\"id\":\"1180\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1178\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1176\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\"]},\"id\":\"1174\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1185\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1214\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1185\",\"type\":\"CategoricalTicker\"}},\"id\":\"1184\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1168\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1195\",\"type\":\"Rect\"},{\"attributes\":{\"high\":0.8845847249031067,\"low\":0.5800867676734924,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1168\",\"type\":\"LinearColorMapper\"},{\"attributes\":{},\"id\":\"1181\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"data_source\":{\"id\":\"1193\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1195\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1196\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1198\",\"type\":\"CDSView\"}},\"id\":\"1197\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1188\",\"type\":\"SaveTool\"},{\"id\":\"1189\",\"type\":\"HoverTool\"}]},\"id\":\"1190\",\"type\":\"Toolbar\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1172\",\"type\":\"FactorRange\"},{\"attributes\":{\"text\":\"English-Russian Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1170\",\"type\":\"Title\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1185\",\"type\":\"CategoricalTicker\"}},\"id\":\"1187\",\"type\":\"Grid\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\",\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\",\"\\u0441\\u043e\\u0431\\u0430\\u043a\\u0430\",\"\\u041c\\u0438\\u043b\\u044b\\u0435 \\u0449\\u0435\\u043d\\u043a\\u0438.\",\"\\u041c\\u043d\\u0435 \\u043d\\u0440\\u0430\\u0432\\u0438\\u0442\\u0441\\u044f \\u043f\\u043e\\u0434\\u043e\\u043b\\u0433\\u0443 \\u0433\\u0443\\u043b\\u044f\\u0442\\u044c \\u043f\\u043e \\u043f\\u043b\\u044f\\u0436\\u0443 \\u0441\\u043e \\u0441\\u0432\\u043e\\u0435\\u0439 \\u0441\\u043e\\u0431\\u0430\\u043a\\u043e\\u0439.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAoIRO7D8AAABgiPvkPwAAAMCko+M/AAAAgDBm5D8AAADAi1DoPwAAAKCr8eI/AAAAIBKQ4j8AAABARq3iPwAAAMAlV+g/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1218\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1219\",\"type\":\"UnionRenderers\"}},\"id\":\"1193\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1214\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1196\",\"type\":\"Rect\"},{\"attributes\":{\"source\":{\"id\":\"1193\",\"type\":\"ColumnDataSource\"}},\"id\":\"1198\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1188\",\"type\":\"SaveTool\"}],\"root_ids\":[\"1169\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"c09fc8f6-d5fa-4ba4-ae4a-0ce276d613ba\",\"roots\":{\"1169\":\"02b6f17f-c980-4d2c-b9d7-58e2d001b1bf\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1169" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, ru_result, english_sentences, russian_sentences, 'English-Russian Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BJkL6Az0QXNN" + }, + "source": [ + "### English-Spanish Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CH_BXVGhQ0GL" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"bb848e12-e360-4876-aa19-21896caab34d\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1274\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1277\",\"type\":\"Grid\"},{\"id\":\"1281\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1278\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1291\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1264\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1284\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1266\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1270\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1268\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1272\",\"type\":\"CategoricalScale\"}},\"id\":\"1263\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1272\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1270\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Disfruto de dar largos paseos por la playa con mi perro.\",\"Los cachorros son agradables.\",\"perro\"]},\"id\":\"1268\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1266\",\"type\":\"FactorRange\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1262\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1289\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAA4AIT7T8AAAAAcHfkPwAAAKAngeI/AAAAQNoA5D8AAADAvfvoPwAAAGCGFeI/AAAAgMNr4j8AAAAArbPjPwAAAGCJ1eo/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1319\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1320\",\"type\":\"UnionRenderers\"}},\"id\":\"1287\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"high\":0.9085707068443298,\"low\":0.5651275515556335,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1262\",\"type\":\"LinearColorMapper\"},{\"attributes\":{\"source\":{\"id\":\"1287\",\"type\":\"ColumnDataSource\"}},\"id\":\"1292\",\"type\":\"CDSView\"},{\"attributes\":{\"text\":\"English-Spanish Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1264\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1283\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1290\",\"type\":\"Rect\"},{\"attributes\":{\"ticker\":{\"id\":\"1275\",\"type\":\"CategoricalTicker\"}},\"id\":\"1277\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1282\",\"type\":\"SaveTool\"},{\"attributes\":{\"data_source\":{\"id\":\"1287\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1289\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1290\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1292\",\"type\":\"CDSView\"}},\"id\":\"1291\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1279\",\"type\":\"CategoricalTicker\"}},\"id\":\"1281\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1315\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1282\",\"type\":\"SaveTool\"},{\"id\":\"1283\",\"type\":\"HoverTool\"}]},\"id\":\"1284\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1320\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1279\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1315\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1279\",\"type\":\"CategoricalTicker\"}},\"id\":\"1278\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1319\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1317\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1317\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1275\",\"type\":\"CategoricalTicker\"}},\"id\":\"1274\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1275\",\"type\":\"CategoricalTicker\"}],\"root_ids\":[\"1263\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"bb848e12-e360-4876-aa19-21896caab34d\",\"roots\":{\"1263\":\"81e993c9-fc6b-4169-8c6b-a0101097b959\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1263" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, es_result, english_sentences, spanish_sentences, 'English-Spanish Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "imn28LCiQO7d" + }, + "source": [ + "### English-Italian Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "X9uD3DirPIGd" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"85d1d6b6-c6cd-4e71-b29b-2cc49ada74c5\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1375\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1378\",\"type\":\"Grid\"},{\"id\":\"1382\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1379\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1392\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1365\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1385\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1367\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1371\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1369\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1373\",\"type\":\"CategoricalScale\"}},\"id\":\"1364\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1371\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1427\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1373\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1423\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1425\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1376\",\"type\":\"CategoricalTicker\"}},\"id\":\"1375\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"I cuccioli sono carini.\",\"cane\"]},\"id\":\"1369\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1376\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1425\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"ticker\":{\"id\":\"1376\",\"type\":\"CategoricalTicker\"}},\"id\":\"1378\",\"type\":\"Grid\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1423\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1380\",\"type\":\"CategoricalTicker\"}},\"id\":\"1379\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1380\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAoHcI7T8AAADAU2/jPwAAAIBLIeM/AAAAAO8N5D8AAAAA5GToPwAAAIDhjeI/AAAAQLlt4j8AAAAAEj3iPwAAAGCPHuw/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1427\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1428\",\"type\":\"UnionRenderers\"}},\"id\":\"1388\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1380\",\"type\":\"CategoricalTicker\"}},\"id\":\"1382\",\"type\":\"Grid\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1383\",\"type\":\"SaveTool\"},{\"id\":\"1384\",\"type\":\"HoverTool\"}]},\"id\":\"1385\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1383\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1384\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1391\",\"type\":\"Rect\"},{\"attributes\":{\"text\":\"English-Italian Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1365\",\"type\":\"Title\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1363\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1390\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1367\",\"type\":\"FactorRange\"},{\"attributes\":{\"data_source\":{\"id\":\"1388\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1390\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1391\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1393\",\"type\":\"CDSView\"}},\"id\":\"1392\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"1388\",\"type\":\"ColumnDataSource\"}},\"id\":\"1393\",\"type\":\"CDSView\"},{\"attributes\":{\"high\":0.90728360414505,\"low\":0.5699548721313477,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1363\",\"type\":\"LinearColorMapper\"},{\"attributes\":{},\"id\":\"1428\",\"type\":\"UnionRenderers\"}],\"root_ids\":[\"1364\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"85d1d6b6-c6cd-4e71-b29b-2cc49ada74c5\",\"roots\":{\"1364\":\"5e20475c-62a7-4a19-87ed-a605dc444c96\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1364" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, it_result, english_sentences, italian_sentences, 'English-Italian Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m6ySvEGbQaTM" + }, + "source": [ + "### Italian-Spanish Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "irfwIeitQ7V6" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"9eb33289-2019-49f5-b3a2-c34c93bf2800\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1483\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1486\",\"type\":\"Grid\"},{\"id\":\"1490\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1487\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1500\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1473\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1493\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1475\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1479\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1477\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1481\",\"type\":\"CategoricalScale\"}},\"id\":\"1472\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1491\",\"type\":\"SaveTool\"},{\"id\":\"1492\",\"type\":\"HoverTool\"}]},\"id\":\"1493\",\"type\":\"Toolbar\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1492\",\"type\":\"HoverTool\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1499\",\"type\":\"Rect\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1471\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1498\",\"type\":\"Rect\"},{\"attributes\":{\"data_source\":{\"id\":\"1496\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1498\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1499\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1501\",\"type\":\"CDSView\"}},\"id\":\"1500\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"ticker\":{\"id\":\"1484\",\"type\":\"CategoricalTicker\"}},\"id\":\"1486\",\"type\":\"Grid\"},{\"attributes\":{\"source\":{\"id\":\"1496\",\"type\":\"ColumnDataSource\"}},\"id\":\"1501\",\"type\":\"CDSView\"},{\"attributes\":{\"callback\":null,\"factors\":[\"Disfruto de dar largos paseos por la playa con mi perro.\",\"Los cachorros son agradables.\",\"perro\"]},\"id\":\"1477\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1491\",\"type\":\"SaveTool\"},{\"attributes\":{\"high\":0.9059451818466187,\"low\":0.564821720123291,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1471\",\"type\":\"LinearColorMapper\"},{\"attributes\":{},\"id\":\"1488\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1540\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1484\",\"type\":\"CategoricalTicker\"}},\"id\":\"1483\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1484\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1538\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1488\",\"type\":\"CategoricalTicker\"}},\"id\":\"1487\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1481\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1543\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1479\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1542\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1540\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"text\":\"Italian-Spanish Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1473\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"factors\":[\"cane\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\"]},\"id\":\"1475\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1538\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"cane\",\"cane\",\"cane\",\"I cuccioli sono carini.\",\"I cuccioli sono carini.\",\"I cuccioli sono carini.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\",\"Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane.\"],\"embeddings_2\":[\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\",\"perro\",\"Los cachorros son agradables.\",\"Disfruto de dar largos paseos por la playa con mi perro.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAwID97D8AAACAiUjkPwAAAICyQuI/AAAAQJl+4z8AAABAqebnPwAAAAAFE+I/AAAAgDrw4j8AAAAg++LjPwAAAEDo1eo/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1542\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1543\",\"type\":\"UnionRenderers\"}},\"id\":\"1496\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1488\",\"type\":\"CategoricalTicker\"}},\"id\":\"1490\",\"type\":\"Grid\"}],\"root_ids\":[\"1472\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"9eb33289-2019-49f5-b3a2-c34c93bf2800\",\"roots\":{\"1472\":\"6f559e42-2dec-4a29-a3d9-62c969a8c08a\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1472" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(it_result, es_result, italian_sentences, spanish_sentences, 'Italian-Spanish Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ueoRO8balwwr" + }, + "source": [ + "### English-Chinese Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xA7anofVlxL7" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"e65d5833-780d-410f-a7be-915f30e03b59\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1598\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1601\",\"type\":\"Grid\"},{\"id\":\"1605\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1602\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1615\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1588\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1608\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1590\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1594\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1592\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1596\",\"type\":\"CategoricalScale\"}},\"id\":\"1587\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1664\",\"type\":\"Selection\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1603\",\"type\":\"CategoricalTicker\"}},\"id\":\"1605\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1665\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1603\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1660\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1603\",\"type\":\"CategoricalTicker\"}},\"id\":\"1602\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1662\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1599\",\"type\":\"CategoricalTicker\"}},\"id\":\"1598\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1607\",\"type\":\"HoverTool\"},{\"attributes\":{},\"id\":\"1599\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1596\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1594\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u72d7\"]},\"id\":\"1592\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1590\",\"type\":\"FactorRange\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAAF0H7D8AAAAgK6rlPwAAACCmLeM/AAAAgLC95D8AAABAJcfnPwAAAMDQZOM/AAAAAFSj4j8AAACg4TfjPwAAAIBszug/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1664\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1665\",\"type\":\"UnionRenderers\"}},\"id\":\"1611\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1606\",\"type\":\"SaveTool\"},{\"id\":\"1607\",\"type\":\"HoverTool\"}]},\"id\":\"1608\",\"type\":\"Toolbar\"},{\"attributes\":{\"text\":\"English-Chinese Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1588\",\"type\":\"Title\"},{\"attributes\":{},\"id\":\"1662\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"high\":0.8758988380432129,\"low\":0.5824375152587891,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1586\",\"type\":\"LinearColorMapper\"},{\"attributes\":{\"source\":{\"id\":\"1611\",\"type\":\"ColumnDataSource\"}},\"id\":\"1616\",\"type\":\"CDSView\"},{\"attributes\":{},\"id\":\"1660\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"data_source\":{\"id\":\"1611\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1613\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1614\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1616\",\"type\":\"CDSView\"}},\"id\":\"1615\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1586\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1613\",\"type\":\"Rect\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1614\",\"type\":\"Rect\"},{\"attributes\":{},\"id\":\"1606\",\"type\":\"SaveTool\"},{\"attributes\":{\"ticker\":{\"id\":\"1599\",\"type\":\"CategoricalTicker\"}},\"id\":\"1601\",\"type\":\"Grid\"}],\"root_ids\":[\"1587\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"e65d5833-780d-410f-a7be-915f30e03b59\",\"roots\":{\"1587\":\"5b8e7d08-b7e7-4a05-a22d-c27aa1873e6d\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1587" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, zh_result, english_sentences, chinese_sentences, 'English-Chinese Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8zV1BJc3mL3W" + }, + "source": [ + "### English-Korean Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iqWy1e1UmQeX" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"1bbe715c-608d-49a6-8927-e818fa752480\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1720\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1723\",\"type\":\"Grid\"},{\"id\":\"1727\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1724\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1737\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1710\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1730\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1712\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1716\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1714\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1718\",\"type\":\"CategoricalScale\"}},\"id\":\"1709\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1794\",\"type\":\"UnionRenderers\"},{\"attributes\":{},\"id\":\"1725\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1789\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1725\",\"type\":\"CategoricalTicker\"}},\"id\":\"1724\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1725\",\"type\":\"CategoricalTicker\"}},\"id\":\"1727\",\"type\":\"Grid\"},{\"attributes\":{},\"id\":\"1721\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1728\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"factors\":[\"dog\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\"]},\"id\":\"1712\",\"type\":\"FactorRange\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1728\",\"type\":\"SaveTool\"},{\"id\":\"1729\",\"type\":\"HoverTool\"}]},\"id\":\"1730\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1718\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1793\",\"type\":\"Selection\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1791\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1721\",\"type\":\"CategoricalTicker\"}},\"id\":\"1720\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1729\",\"type\":\"HoverTool\"},{\"attributes\":{},\"id\":\"1716\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1708\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1735\",\"type\":\"Rect\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\uac1c\"]},\"id\":\"1714\",\"type\":\"FactorRange\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1736\",\"type\":\"Rect\"},{\"attributes\":{\"source\":{\"id\":\"1733\",\"type\":\"ColumnDataSource\"}},\"id\":\"1738\",\"type\":\"CDSView\"},{\"attributes\":{\"data_source\":{\"id\":\"1733\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1735\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1736\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1738\",\"type\":\"CDSView\"}},\"id\":\"1737\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"text\":\"English-Korean Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1710\",\"type\":\"Title\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"dog\",\"dog\",\"dog\",\"Puppies are nice.\",\"Puppies are nice.\",\"Puppies are nice.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\",\"I enjoy taking long walks along the beach with my dog.\"],\"embeddings_2\":[\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAgICf6z8AAAAAKEjlPwAAAKBNk+A/AAAAwFZl5D8AAAAAUpnnPwAAAID69uA/AAAAQJua4j8AAADgCQ3jPwAAAIC9gOg/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1793\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1794\",\"type\":\"UnionRenderers\"}},\"id\":\"1733\",\"type\":\"ColumnDataSource\"},{\"attributes\":{},\"id\":\"1789\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{},\"id\":\"1791\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"ticker\":{\"id\":\"1721\",\"type\":\"CategoricalTicker\"}},\"id\":\"1723\",\"type\":\"Grid\"},{\"attributes\":{\"high\":0.8632204532623291,\"low\":0.5179813504219055,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1708\",\"type\":\"LinearColorMapper\"}],\"root_ids\":[\"1709\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"1bbe715c-608d-49a6-8927-e818fa752480\",\"roots\":{\"1709\":\"7b449243-0dbd-46b6-8b02-a89fdf92645e\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1709" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(en_result, ko_result, english_sentences, korean_sentences, 'English-Korean Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dfTj-JaunFTv" + }, + "source": [ + "### Chinese-Korean Similarity" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MndSgKGPnJuF" + }, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "(function(root) {\n", + " function now() {\n", + " return new Date();\n", + " }\n", + "\n", + " var force = true;\n", + "\n", + " if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n", + " root._bokeh_onload_callbacks = [];\n", + " root._bokeh_is_loading = undefined;\n", + " }\n", + "\n", + " var JS_MIME_TYPE = 'application/javascript';\n", + " var HTML_MIME_TYPE = 'text/html';\n", + " var EXEC_MIME_TYPE = 'application/vnd.bokehjs_exec.v0+json';\n", + " var CLASS_NAME = 'output_bokeh rendered_html';\n", + "\n", + " /**\n", + " * Render data to the DOM node\n", + " */\n", + " function render(props, node) {\n", + " var script = document.createElement(\"script\");\n", + " node.appendChild(script);\n", + " }\n", + "\n", + " /**\n", + " * Handle when an output is cleared or removed\n", + " */\n", + " function handleClearOutput(event, handle) {\n", + " var cell = handle.cell;\n", + "\n", + " var id = cell.output_area._bokeh_element_id;\n", + " var server_id = cell.output_area._bokeh_server_id;\n", + " // Clean up Bokeh references\n", + " if (id != null && id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + "\n", + " if (server_id !== undefined) {\n", + " // Clean up Bokeh references\n", + " var cmd = \"from bokeh.io.state import curstate; print(curstate().uuid_to_server['\" + server_id + \"'].get_sessions()[0].document.roots[0]._id)\";\n", + " cell.notebook.kernel.execute(cmd, {\n", + " iopub: {\n", + " output: function(msg) {\n", + " var id = msg.content.text.trim();\n", + " if (id in Bokeh.index) {\n", + " Bokeh.index[id].model.document.clear();\n", + " delete Bokeh.index[id];\n", + " }\n", + " }\n", + " }\n", + " });\n", + " // Destroy server and session\n", + " var cmd = \"import bokeh.io.notebook as ion; ion.destroy_server('\" + server_id + \"')\";\n", + " cell.notebook.kernel.execute(cmd);\n", + " }\n", + " }\n", + "\n", + " /**\n", + " * Handle when a new output is added\n", + " */\n", + " function handleAddOutput(event, handle) {\n", + " var output_area = handle.output_area;\n", + " var output = handle.output;\n", + "\n", + " // limit handleAddOutput to display_data with EXEC_MIME_TYPE content only\n", + " if ((output.output_type != \"display_data\") || (!output.data.hasOwnProperty(EXEC_MIME_TYPE))) {\n", + " return\n", + " }\n", + "\n", + " var toinsert = output_area.element.find(\".\" + CLASS_NAME.split(' ')[0]);\n", + "\n", + " if (output.metadata[EXEC_MIME_TYPE][\"id\"] !== undefined) {\n", + " toinsert[toinsert.length - 1].firstChild.textContent = output.data[JS_MIME_TYPE];\n", + " // store reference to embed id on output_area\n", + " output_area._bokeh_element_id = output.metadata[EXEC_MIME_TYPE][\"id\"];\n", + " }\n", + " if (output.metadata[EXEC_MIME_TYPE][\"server_id\"] !== undefined) {\n", + " var bk_div = document.createElement(\"div\");\n", + " bk_div.innerHTML = output.data[HTML_MIME_TYPE];\n", + " var script_attrs = bk_div.children[0].attributes;\n", + " for (var i = 0; i < script_attrs.length; i++) {\n", + " toinsert[toinsert.length - 1].firstChild.setAttribute(script_attrs[i].name, script_attrs[i].value);\n", + " }\n", + " // store reference to server id on output_area\n", + " output_area._bokeh_server_id = output.metadata[EXEC_MIME_TYPE][\"server_id\"];\n", + " }\n", + " }\n", + "\n", + " function register_renderer(events, OutputArea) {\n", + "\n", + " function append_mime(data, metadata, element) {\n", + " // create a DOM node to render to\n", + " var toinsert = this.create_output_subarea(\n", + " metadata,\n", + " CLASS_NAME,\n", + " EXEC_MIME_TYPE\n", + " );\n", + " this.keyboard_manager.register_events(toinsert);\n", + " // Render to node\n", + " var props = {data: data, metadata: metadata[EXEC_MIME_TYPE]};\n", + " render(props, toinsert[toinsert.length - 1]);\n", + " element.append(toinsert);\n", + " return toinsert\n", + " }\n", + "\n", + " /* Handle when an output is cleared or removed */\n", + " events.on('clear_output.CodeCell', handleClearOutput);\n", + " events.on('delete.Cell', handleClearOutput);\n", + "\n", + " /* Handle when a new output is added */\n", + " events.on('output_added.OutputArea', handleAddOutput);\n", + "\n", + " /**\n", + " * Register the mime type and append_mime function with output_area\n", + " */\n", + " OutputArea.prototype.register_mime_type(EXEC_MIME_TYPE, append_mime, {\n", + " /* Is output safe? */\n", + " safe: true,\n", + " /* Index of renderer in `output_area.display_order` */\n", + " index: 0\n", + " });\n", + " }\n", + "\n", + " // register the mime type if in Jupyter Notebook environment and previously unregistered\n", + " if (root.Jupyter !== undefined) {\n", + " var events = require('base/js/events');\n", + " var OutputArea = require('notebook/js/outputarea').OutputArea;\n", + "\n", + " if (OutputArea.prototype.mime_types().indexOf(EXEC_MIME_TYPE) == -1) {\n", + " register_renderer(events, OutputArea);\n", + " }\n", + " }\n", + "\n", + " \n", + " if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n", + " root._bokeh_timeout = Date.now() + 5000;\n", + " root._bokeh_failed_load = false;\n", + " }\n", + "\n", + " var NB_LOAD_WARNING = {'data': {'text/html':\n", + " \"
\\n\"+\n", + " \"

\\n\"+\n", + " \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n", + " \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n", + " \"

\\n\"+\n", + " \"
    \\n\"+\n", + " \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n", + " \"
  • use INLINE resources instead, as so:
  • \\n\"+\n", + " \"
\\n\"+\n", + " \"\\n\"+\n", + " \"from bokeh.resources import INLINE\\n\"+\n", + " \"output_notebook(resources=INLINE)\\n\"+\n", + " \"\\n\"+\n", + " \"
\"}};\n", + "\n", + " function display_loaded() {\n", + " var el = document.getElementById(null);\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS is loading...\";\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " if (el != null) {\n", + " el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(display_loaded, 100)\n", + " }\n", + " }\n", + "\n", + "\n", + " function run_callbacks() {\n", + " try {\n", + " root._bokeh_onload_callbacks.forEach(function(callback) {\n", + " if (callback != null)\n", + " callback();\n", + " });\n", + " } finally {\n", + " delete root._bokeh_onload_callbacks\n", + " }\n", + " console.debug(\"Bokeh: all callbacks have finished\");\n", + " }\n", + "\n", + " function load_libs(css_urls, js_urls, callback) {\n", + " if (css_urls == null) css_urls = [];\n", + " if (js_urls == null) js_urls = [];\n", + "\n", + " root._bokeh_onload_callbacks.push(callback);\n", + " if (root._bokeh_is_loading > 0) {\n", + " console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n", + " return null;\n", + " }\n", + " if (js_urls == null || js_urls.length === 0) {\n", + " run_callbacks();\n", + " return null;\n", + " }\n", + " console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n", + " root._bokeh_is_loading = css_urls.length + js_urls.length;\n", + "\n", + " function on_load() {\n", + " root._bokeh_is_loading--;\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n", + " run_callbacks()\n", + " }\n", + " }\n", + "\n", + " function on_error() {\n", + " console.error(\"failed to load \" + url);\n", + " }\n", + "\n", + " for (var i = 0; i < css_urls.length; i++) {\n", + " var url = css_urls[i];\n", + " const element = document.createElement(\"link\");\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.rel = \"stylesheet\";\n", + " element.type = \"text/css\";\n", + " element.href = url;\n", + " console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " for (var i = 0; i < js_urls.length; i++) {\n", + " var url = js_urls[i];\n", + " var element = document.createElement('script');\n", + " element.onload = on_load;\n", + " element.onerror = on_error;\n", + " element.async = false;\n", + " element.src = url;\n", + " console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n", + " document.head.appendChild(element);\n", + " }\n", + " };\n", + "\n", + " function inject_raw_css(css) {\n", + " const element = document.createElement(\"style\");\n", + " element.appendChild(document.createTextNode(css));\n", + " document.body.appendChild(element);\n", + " }\n", + "\n", + " \n", + " var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n", + " var css_urls = [];\n", + " \n", + "\n", + " var inline_js = [\n", + " function(Bokeh) {\n", + " Bokeh.set_log_level(\"info\");\n", + " },\n", + " function(Bokeh) {\n", + " \n", + " \n", + " }\n", + " ];\n", + "\n", + " function run_inline_js() {\n", + " \n", + " if (root.Bokeh !== undefined || force === true) {\n", + " \n", + " for (var i = 0; i < inline_js.length; i++) {\n", + " inline_js[i].call(root, root.Bokeh);\n", + " }\n", + " } else if (Date.now() < root._bokeh_timeout) {\n", + " setTimeout(run_inline_js, 100);\n", + " } else if (!root._bokeh_failed_load) {\n", + " console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n", + " root._bokeh_failed_load = true;\n", + " } else if (force !== true) {\n", + " var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n", + " cell.output_area.append_execute_result(NB_LOAD_WARNING)\n", + " }\n", + "\n", + " }\n", + "\n", + " if (root._bokeh_is_loading === 0) {\n", + " console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n", + " run_inline_js();\n", + " } else {\n", + " load_libs(css_urls, js_urls, function() {\n", + " console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n", + " run_inline_js();\n", + " });\n", + " }\n", + "}(window));" + ], + "application/vnd.bokehjs_load.v0+json": "\n(function(root) {\n function now() {\n return new Date();\n }\n\n var force = true;\n\n if (typeof root._bokeh_onload_callbacks === \"undefined\" || force === true) {\n root._bokeh_onload_callbacks = [];\n root._bokeh_is_loading = undefined;\n }\n\n \n\n \n if (typeof (root._bokeh_timeout) === \"undefined\" || force === true) {\n root._bokeh_timeout = Date.now() + 5000;\n root._bokeh_failed_load = false;\n }\n\n var NB_LOAD_WARNING = {'data': {'text/html':\n \"
\\n\"+\n \"

\\n\"+\n \"BokehJS does not appear to have successfully loaded. If loading BokehJS from CDN, this \\n\"+\n \"may be due to a slow or bad network connection. Possible fixes:\\n\"+\n \"

\\n\"+\n \"
    \\n\"+\n \"
  • re-rerun `output_notebook()` to attempt to load from CDN again, or
  • \\n\"+\n \"
  • use INLINE resources instead, as so:
  • \\n\"+\n \"
\\n\"+\n \"\\n\"+\n \"from bokeh.resources import INLINE\\n\"+\n \"output_notebook(resources=INLINE)\\n\"+\n \"\\n\"+\n \"
\"}};\n\n function display_loaded() {\n var el = document.getElementById(null);\n if (el != null) {\n el.textContent = \"BokehJS is loading...\";\n }\n if (root.Bokeh !== undefined) {\n if (el != null) {\n el.textContent = \"BokehJS \" + root.Bokeh.version + \" successfully loaded.\";\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(display_loaded, 100)\n }\n }\n\n\n function run_callbacks() {\n try {\n root._bokeh_onload_callbacks.forEach(function(callback) {\n if (callback != null)\n callback();\n });\n } finally {\n delete root._bokeh_onload_callbacks\n }\n console.debug(\"Bokeh: all callbacks have finished\");\n }\n\n function load_libs(css_urls, js_urls, callback) {\n if (css_urls == null) css_urls = [];\n if (js_urls == null) js_urls = [];\n\n root._bokeh_onload_callbacks.push(callback);\n if (root._bokeh_is_loading > 0) {\n console.debug(\"Bokeh: BokehJS is being loaded, scheduling callback at\", now());\n return null;\n }\n if (js_urls == null || js_urls.length === 0) {\n run_callbacks();\n return null;\n }\n console.debug(\"Bokeh: BokehJS not loaded, scheduling load and callback at\", now());\n root._bokeh_is_loading = css_urls.length + js_urls.length;\n\n function on_load() {\n root._bokeh_is_loading--;\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: all BokehJS libraries/stylesheets loaded\");\n run_callbacks()\n }\n }\n\n function on_error() {\n console.error(\"failed to load \" + url);\n }\n\n for (var i = 0; i < css_urls.length; i++) {\n var url = css_urls[i];\n const element = document.createElement(\"link\");\n element.onload = on_load;\n element.onerror = on_error;\n element.rel = \"stylesheet\";\n element.type = \"text/css\";\n element.href = url;\n console.debug(\"Bokeh: injecting link tag for BokehJS stylesheet: \", url);\n document.body.appendChild(element);\n }\n\n for (var i = 0; i < js_urls.length; i++) {\n var url = js_urls[i];\n var element = document.createElement('script');\n element.onload = on_load;\n element.onerror = on_error;\n element.async = false;\n element.src = url;\n console.debug(\"Bokeh: injecting script tag for BokehJS library: \", url);\n document.head.appendChild(element);\n }\n };\n\n function inject_raw_css(css) {\n const element = document.createElement(\"style\");\n element.appendChild(document.createTextNode(css));\n document.body.appendChild(element);\n }\n\n \n var js_urls = [\"https://cdn.pydata.org/bokeh/release/bokeh-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-widgets-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-tables-1.4.0.min.js/", \"https://cdn.pydata.org/bokeh/release/bokeh-gl-1.4.0.min.js/"];\n var css_urls = [];\n \n\n var inline_js = [\n function(Bokeh) {\n Bokeh.set_log_level(\"info\");\n },\n function(Bokeh) {\n \n \n }\n ];\n\n function run_inline_js() {\n \n if (root.Bokeh !== undefined || force === true) {\n \n for (var i = 0; i < inline_js.length; i++) {\n inline_js[i].call(root, root.Bokeh);\n }\n } else if (Date.now() < root._bokeh_timeout) {\n setTimeout(run_inline_js, 100);\n } else if (!root._bokeh_failed_load) {\n console.log(\"Bokeh: BokehJS failed to load within specified timeout.\");\n root._bokeh_failed_load = true;\n } else if (force !== true) {\n var cell = $(document.getElementById(null)).parents('.cell').data().cell;\n cell.output_area.append_execute_result(NB_LOAD_WARNING)\n }\n\n }\n\n if (root._bokeh_is_loading === 0) {\n console.debug(\"Bokeh: BokehJS loaded, going straight to plotting\");\n run_inline_js();\n } else {\n load_libs(css_urls, js_urls, function() {\n console.debug(\"Bokeh: BokehJS plotting callback run at\", now());\n run_inline_js();\n });\n }\n}(window));" + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n" + ] + }, + "metadata": { + "tags": [] + }, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "(function(root) {\n", + " function embed_document(root) {\n", + " \n", + " var docs_json = {\"3d4a0aff-b8c3-43fb-a5af-6dfe1b9d0e1f\":{\"roots\":{\"references\":[{\"attributes\":{\"above\":[{\"id\":\"1849\",\"type\":\"CategoricalAxis\"}],\"center\":[{\"id\":\"1852\",\"type\":\"Grid\"},{\"id\":\"1856\",\"type\":\"Grid\"}],\"left\":[{\"id\":\"1853\",\"type\":\"CategoricalAxis\"}],\"min_border_right\":300,\"plot_width\":1200,\"renderers\":[{\"id\":\"1866\",\"type\":\"GlyphRenderer\"}],\"title\":{\"id\":\"1839\",\"type\":\"Title\"},\"toolbar\":{\"id\":\"1859\",\"type\":\"Toolbar\"},\"toolbar_location\":\"below\",\"x_range\":{\"id\":\"1841\",\"type\":\"FactorRange\"},\"x_scale\":{\"id\":\"1845\",\"type\":\"CategoricalScale\"},\"y_range\":{\"id\":\"1843\",\"type\":\"FactorRange\"},\"y_scale\":{\"id\":\"1847\",\"type\":\"CategoricalScale\"}},\"id\":\"1838\",\"subtype\":\"Figure\",\"type\":\"Plot\"},{\"attributes\":{},\"id\":\"1925\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1927\",\"type\":\"CategoricalTickFormatter\"},\"major_label_orientation\":0.7853981633974483,\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1850\",\"type\":\"CategoricalTicker\"}},\"id\":\"1849\",\"type\":\"CategoricalAxis\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\"]},\"id\":\"1841\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1929\",\"type\":\"Selection\"},{\"attributes\":{},\"id\":\"1850\",\"type\":\"CategoricalTicker\"},{\"attributes\":{},\"id\":\"1927\",\"type\":\"CategoricalTickFormatter\"},{\"attributes\":{\"ticker\":{\"id\":\"1850\",\"type\":\"CategoricalTicker\"}},\"id\":\"1852\",\"type\":\"Grid\"},{\"attributes\":{\"axis_line_color\":{\"value\":null},\"formatter\":{\"id\":\"1925\",\"type\":\"CategoricalTickFormatter\"},\"major_label_standoff\":16,\"major_label_text_font_size\":{\"value\":\"12pt\"},\"major_tick_line_color\":{\"value\":null},\"ticker\":{\"id\":\"1854\",\"type\":\"CategoricalTicker\"}},\"id\":\"1853\",\"type\":\"CategoricalAxis\"},{\"attributes\":{},\"id\":\"1854\",\"type\":\"CategoricalTicker\"},{\"attributes\":{\"dimension\":1,\"ticker\":{\"id\":\"1854\",\"type\":\"CategoricalTicker\"}},\"id\":\"1856\",\"type\":\"Grid\"},{\"attributes\":{\"text\":\"Chinese-Korean Similarity\",\"text_font_size\":{\"value\":\"12pt\"}},\"id\":\"1839\",\"type\":\"Title\"},{\"attributes\":{\"active_drag\":\"auto\",\"active_inspect\":\"auto\",\"active_multi\":null,\"active_scroll\":\"auto\",\"active_tap\":\"auto\",\"tools\":[{\"id\":\"1857\",\"type\":\"SaveTool\"},{\"id\":\"1858\",\"type\":\"HoverTool\"}]},\"id\":\"1859\",\"type\":\"Toolbar\"},{\"attributes\":{},\"id\":\"1857\",\"type\":\"SaveTool\"},{\"attributes\":{\"callback\":null,\"tooltips\":[[\"pair\",\"@embeddings_1 ||| @embeddings_2\"],[\"sim\",\"@sim\"]]},\"id\":\"1858\",\"type\":\"HoverTool\"},{\"attributes\":{\"callback\":null,\"data\":{\"embeddings_1\":[\"\\u72d7\",\"\\u72d7\",\"\\u72d7\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u5c0f\\u72d7\\u5f88\\u597d\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\",\"\\u6211\\u559c\\u6b22\\u548c\\u6211\\u7684\\u72d7\\u4e00\\u8d77\\u6cbf\\u7740\\u6d77\\u6ee9\\u6563\\u6b65\\u3002\"],\"embeddings_2\":[\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac1c\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\"],\"index\":[0,1,2,3,4,5,6,7,8],\"sim\":{\"__ndarray__\":\"AAAAwIKP6z8AAACAHL7lPwAAAKDsSuA/AAAAoN0A5j8AAACgWsboPwAAAIANGeE/AAAAQMFJ4z8AAADA8D7jPwAAAABna+c/\",\"dtype\":\"float64\",\"shape\":[9]}},\"selected\":{\"id\":\"1929\",\"type\":\"Selection\"},\"selection_policy\":{\"id\":\"1930\",\"type\":\"UnionRenderers\"}},\"id\":\"1862\",\"type\":\"ColumnDataSource\"},{\"attributes\":{\"fill_alpha\":{\"value\":0.1},\"fill_color\":{\"value\":\"#1f77b4\"},\"height\":{\"units\":\"data\",\"value\":1},\"line_alpha\":{\"value\":0.1},\"line_color\":{\"value\":\"#1f77b4\"},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1865\",\"type\":\"Rect\"},{\"attributes\":{\"fill_color\":{\"field\":\"sim\",\"transform\":{\"id\":\"1837\",\"type\":\"LinearColorMapper\"}},\"height\":{\"units\":\"data\",\"value\":1},\"line_color\":{\"value\":null},\"width\":{\"units\":\"data\",\"value\":1},\"x\":{\"field\":\"embeddings_1\"},\"y\":{\"field\":\"embeddings_2\"}},\"id\":\"1864\",\"type\":\"Rect\"},{\"attributes\":{},\"id\":\"1847\",\"type\":\"CategoricalScale\"},{\"attributes\":{\"data_source\":{\"id\":\"1862\",\"type\":\"ColumnDataSource\"},\"glyph\":{\"id\":\"1864\",\"type\":\"Rect\"},\"hover_glyph\":null,\"muted_glyph\":null,\"nonselection_glyph\":{\"id\":\"1865\",\"type\":\"Rect\"},\"selection_glyph\":null,\"view\":{\"id\":\"1867\",\"type\":\"CDSView\"}},\"id\":\"1866\",\"type\":\"GlyphRenderer\"},{\"attributes\":{\"source\":{\"id\":\"1862\",\"type\":\"ColumnDataSource\"}},\"id\":\"1867\",\"type\":\"CDSView\"},{\"attributes\":{\"high\":0.8612684011459351,\"low\":0.5091460347175598,\"palette\":[\"#ffffcc\",\"#ffeda0\",\"#fed976\",\"#feb24c\",\"#fd8d3c\",\"#fc4e2a\",\"#e31a1c\",\"#bd0026\",\"#800026\"]},\"id\":\"1837\",\"type\":\"LinearColorMapper\"},{\"attributes\":{\"callback\":null,\"factors\":[\"\\ub098\\ub294 \\ub098\\uc758 \\uc0b0\\ucc45\\uc744 \\ud574\\ubcc0\\uc744 \\ub530\\ub77c \\uae38\\uac8c \\uc0b0\\ucc45\\ud558\\ub294 \\uac83\\uc744 \\uc990\\uae34\\ub2e4.\",\"\\uac15\\uc544\\uc9c0\\uac00 \\uc88b\\ub2e4.\",\"\\uac1c\"]},\"id\":\"1843\",\"type\":\"FactorRange\"},{\"attributes\":{},\"id\":\"1845\",\"type\":\"CategoricalScale\"},{\"attributes\":{},\"id\":\"1930\",\"type\":\"UnionRenderers\"}],\"root_ids\":[\"1838\"]},\"title\":\"Bokeh Application\",\"version\":\"1.4.0\"}};\n", + " var render_items = [{\"docid\":\"3d4a0aff-b8c3-43fb-a5af-6dfe1b9d0e1f\",\"roots\":{\"1838\":\"63952aa4-d54a-4445-ad10-ef5bef98f1ef\"}}];\n", + " root.Bokeh.embed.embed_items_notebook(docs_json, render_items);\n", + "\n", + " }\n", + " if (root.Bokeh !== undefined) {\n", + " embed_document(root);\n", + " } else {\n", + " var attempts = 0;\n", + " var timer = setInterval(function(root) {\n", + " if (root.Bokeh !== undefined) {\n", + " clearInterval(timer);\n", + " embed_document(root);\n", + " } else {\n", + " attempts++;\n", + " if (attempts > 100) {\n", + " clearInterval(timer);\n", + " console.log(\"Bokeh: ERROR: Unable to run BokehJS code because BokehJS library is missing\");\n", + " }\n", + " }\n", + " }, 10, root)\n", + " }\n", + "})(window);" + ], + "application/vnd.bokehjs_exec.v0+json": "" + }, + "metadata": { + "application/vnd.bokehjs_exec.v0+json": { + "id": "1838" + }, + "tags": [] + }, + "output_type": "display_data" + } + ], + "source": [ + "visualize_similarity(zh_result, ko_result, chinese_sentences, korean_sentences, 'Chinese-Korean Similarity')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rRabHHQYQfLr" + }, + "source": [ + "### And more...\n", + "\n", + "The above examples can be extended to any language pair from **English, Arabic, Chinese, Dutch, French, German, Italian, Japanese, Korean, Polish, Portuguese, Russian, Spanish, Thai and Turkish**. Happy coding!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mxAFAJI9xsAU" + }, + "source": [ + "# Creating a Multilingual Semantic-Similarity Search Engine\n", + "\n", + "Whereas in the previous example we visualized a handful of sentences, in this section we will build a semantic-search index of about 200,000 sentences from a Wikipedia Corpus. About half will be in English and the other half in Spanish to demonstrate the multilingual capabilities of the Universal Sentence Encoder.\n", + "\n", + "## Download Data to Index\n", + "First, we will download news sentences in multiples languages from the [News Commentary Corpus](http://opus.nlpl.eu/News-Commentary-v11.php) [1]. Without loss of generality, this approach should also work for indexing the rest of the supported languages.\n", + "\n", + "To speed up the demo, we limit to 1000 sentences per language." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "587I9ye6yXEU" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/ar-en.txt.zip\n", + "24715264/24714354 [==============================] - 2s 0us/step\n", + "1,000 Arabic sentences\n", + "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/en-zh.txt.zip\n", + "18104320/18101984 [==============================] - 2s 0us/step\n", + "1,000 Chinese sentences\n", + "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/en-es.txt.zip\n", + "28106752/28106064 [==============================] - 2s 0us/step\n", + "1,000 English sentences\n", + "Downloading data from http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/en-ru.txt.zip\n", + "24854528/24849511 [==============================] - 2s 0us/step\n", + "1,000 Russian sentences\n", + "1,000 Spanish sentences\n" + ] + } + ], + "source": [ + "corpus_metadata = [\n", + " ('ar', 'ar-en.txt.zip', 'News-Commentary.ar-en.ar', 'Arabic'),\n", + " ('zh', 'en-zh.txt.zip', 'News-Commentary.en-zh.zh', 'Chinese'),\n", + " ('en', 'en-es.txt.zip', 'News-Commentary.en-es.en', 'English'),\n", + " ('ru', 'en-ru.txt.zip', 'News-Commentary.en-ru.ru', 'Russian'),\n", + " ('es', 'en-es.txt.zip', 'News-Commentary.en-es.es', 'Spanish'),\n", + "]\n", + "\n", + "language_to_sentences = {}\n", + "language_to_news_path = {}\n", + "for language_code, zip_file, news_file, language_name in corpus_metadata:\n", + " zip_path = tf.keras.utils.get_file(\n", + " fname=zip_file,\n", + " origin='http://opus.nlpl.eu/download.php?f=News-Commentary/v11/moses/' + zip_file,\n", + " extract=True)\n", + " news_path = os.path.join(os.path.dirname(zip_path), news_file)\n", + " language_to_sentences[language_code] = pd.read_csv(news_path, sep='\\t', header=None)[0][:1000]\n", + " language_to_news_path[language_code] = news_path\n", + "\n", + " print('{:,} {} sentences'.format(len(language_to_sentences[language_code]), language_name))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m3DIT9uT7Z34" + }, + "source": [ + "## Using a pre-trained model to transform sentences into vectors\n", + "\n", + "We compute embeddings in _batches_ so that they fit in the GPU's RAM." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yRoRT5qCEIYy" + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\r 0%| | 0/1000 [00:00\n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U5POcTVNB_dv" + }, + "source": [ + "# HRNet based model for semantic segmentation\n", + "\n", + "In this notebook, you will:\n", + "\n", + "- Choose and load one of the 17 pre-trained HRNet models on different semantic segmentation datasets\n", + "- Run inference to extract features from the model backbone and predictions from the model head" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_XgTpm9ZxoN9" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import matplotlib.pyplot as plt\n", + "from PIL import Image\n", + "import numpy as np" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UVtEyxDFpKE1" + }, + "source": [ + "## Loading models from TensorFlow Hub\n", + "\n", + "Here you can choose the pre-trained HRNet model to load, different models means a different training dataset used. All models have the same architecture, except for the model head, which has a different dimension based on the number of classes contained in the training dataset (dataset_output_classes). For more information about the different datasets we refer to the links above and the [factors of influence dataset collection](https://github.com/google-research/google-research/tree/master/factors_of_influence)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y8_ctG55-uTX" + }, + "outputs": [], + "source": [ + "#@title Choose a pre-trained HRNet model to load.\n", + "\n", + "hrnet_model_name = 'ade20k-hrnetv2-w48/1' #@param [\"ade20k-hrnetv2-w48/1\", \"isprs-hrnetv2-w48/1\", \"vkitti2-hrnetv2-w48/1\", \"vgallery-hrnetv2-w48/1\", \"sunrgbd-hrnetv2-w48/1\", \"suim-hrnetv2-w48/1\", \"scannet-hrnetv2-w48/1\", \"pvoc-hrnetv2-w48/1\", \"msegpcontext-hrnetv2-w48/1\", \"mapillary-hrnetv2-w48/1\", \"kitti-hrnetv2-w48/1\", \"isaid-hrnetv2-w48/1\", \"idd-hrnetv2-w48/1\", \"coco-hrnetv2-w48/1\", \"city-hrnetv2-w48/1\", \"camvid-hrnetv2-w48/1\", \"bdd-hrnetv2-w48/1\"]\n", + "\n", + "tfhub_model_name = 'https://tfhub.dev/google/HRNet/' + hrnet_model_name\n", + "\n", + "print('HRNet model selected :', tfhub_model_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T-yHJ5X55kWN" + }, + "outputs": [], + "source": [ + "hrnet_model = hub.load(tfhub_model_name)\n", + "\n", + "print('HRNet model loaded :', tfhub_model_name)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pMP_7v9x6kol" + }, + "source": [ + "## Loading an image and running inference\n", + "\n", + "This is a demonstration on how to run inference for extracting features and predictions from an image. The image was taken from the scene150 dataset.\n", + "\n", + "To perform inference on the datasets that were used during training we refer to the [factors of influence dataset collection](https://github.com/google-research/google-research/tree/master/factors_of_influence)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GNzjieS66td_" + }, + "outputs": [], + "source": [ + "img_file = tf.keras.utils.get_file(origin=\"https://tensorflow.org/images/bedroom_hrnet_tutorial.jpg/")\n", + "img = np.array(Image.open(img_file))/255.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Lp54vD_FZuHw" + }, + "outputs": [], + "source": [ + "plt.imshow(img)\n", + "plt.show()\n", + "\n", + "# Predictions will have shape (batch_size, h, w, dataset_output_classes)\n", + "predictions = hrnet_model.predict([img])\n", + "plt.imshow(predictions[0,:,:,1])\n", + "plt.title('Predictions for class #1')\n", + "plt.show() \n", + "# Features will have shape (batch_size, h/4, w/4, 720)\n", + "features = hrnet_model.get_features([img])\n", + "plt.imshow(features[0,:,:,1])\n", + "plt.title('Feature #1 out of 720')\n", + "plt.show()" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "hrnet_semantic_segmentation.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/image_classification.ipynb b/site/en/hub/tutorials/image_classification.ipynb new file mode 100644 index 00000000000..91aadab727e --- /dev/null +++ b/site/en/hub/tutorials/image_classification.ipynb @@ -0,0 +1,463 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ScitaPqhKtuW" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jvztxQ6VsK2k" + }, + "outputs": [], + "source": [ + "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7t7KGfIwHaXz" + }, + "source": [ + "# Image Classification with TensorFlow Hub\n", + "\n", + "In this colab, you'll try multiple image classification models from TensorFlow Hub and decide which one is best for your use case.\n", + "\n", + "Because TF Hub encourages a [consistent input convention](https://www.tensorflow.org/hub/common_saved_model_apis/images#image_input) for models that operate on images, it's easy to experiment with different architectures to find the one that best fits your needs." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "N8H5ufxkc2mk" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "import requests\n", + "from PIL import Image\n", + "from io import BytesIO\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "oKvj6lY6kZx8" + }, + "outputs": [], + "source": [ + "#@title Helper functions for loading image (hidden)\n", + "\n", + "original_image_cache = {}\n", + "\n", + "def preprocess_image(image):\n", + " image = np.array(image)\n", + " # reshape into shape [batch_size, height, width, num_channels]\n", + " img_reshaped = tf.reshape(image, [1, image.shape[0], image.shape[1], image.shape[2]])\n", + " # Use `convert_image_dtype` to convert to floats in the [0,1] range.\n", + " image = tf.image.convert_image_dtype(img_reshaped, tf.float32)\n", + " return image\n", + "\n", + "def load_image_from_url(http://github.com/img_url):\n", + " \"\"\"Returns an image with shape [1, height, width, num_channels].\"\"\"\n", + " user_agent = {'User-agent': 'Colab Sample (https://tensorflow.org)'}\n", + " response = requests.get(img_url, headers=user_agent)\n", + " image = Image.open(BytesIO(response.content))\n", + " image = preprocess_image(image)\n", + " return image\n", + "\n", + "def load_image(image_url, image_size=256, dynamic_size=False, max_dynamic_size=512):\n", + " \"\"\"Loads and preprocesses images.\"\"\"\n", + " # Cache image file locally.\n", + " if image_url in original_image_cache:\n", + " img = original_image_cache[image_url]\n", + " elif image_url.startswith('https://'):\n", + " img = load_image_from_url(http://github.com/image_url)\n", + " else:\n", + " fd = tf.io.gfile.GFile(image_url, 'rb')\n", + " img = preprocess_image(Image.open(fd))\n", + " original_image_cache[image_url] = img\n", + " # Load and convert to float32 numpy array, add batch dimension, and normalize to range [0, 1].\n", + " img_raw = img\n", + " if tf.reduce_max(img) > 1.0:\n", + " img = img / 255.\n", + " if len(img.shape) == 3:\n", + " img = tf.stack([img, img, img], axis=-1)\n", + " if not dynamic_size:\n", + " img = tf.image.resize_with_pad(img, image_size, image_size)\n", + " elif img.shape[1] > max_dynamic_size or img.shape[2] > max_dynamic_size:\n", + " img = tf.image.resize_with_pad(img, max_dynamic_size, max_dynamic_size)\n", + " return img, img_raw\n", + "\n", + "def show_image(image, title=''):\n", + " image_size = image.shape[1]\n", + " w = (image_size * 6) // 320\n", + " plt.figure(figsize=(w, w))\n", + " plt.imshow(image[0], aspect='equal')\n", + " plt.axis('off')\n", + " plt.title(title)\n", + " plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ws1AMDT_CDPq" + }, + "source": [ + "Select an Image Classification Model. After that, some internal variables are set and the labels file is downloaded and prepared for use.\n", + "\n", + "There are some technical differences between the models, like different input size, model size, accuracy, and inference time. Here you can change the model you are using until you find the one most suitable for your use case.\n", + "\n", + "The handle (url) of the model is printed for your convenience. More documentation about each model is available there.\n", + "\n", + "Note: All these models were trained on the ImageNet dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iQ3aamrBfs-c" + }, + "outputs": [], + "source": [ + "#@title Select an Image Classification model\n", + "\n", + "image_size = 224\n", + "dynamic_size = False\n", + "\n", + "model_name = \"efficientnetv2-s\" # @param ['efficientnetv2-s', 'efficientnetv2-m', 'efficientnetv2-l', 'efficientnetv2-s-21k', 'efficientnetv2-m-21k', 'efficientnetv2-l-21k', 'efficientnetv2-xl-21k', 'efficientnetv2-b0-21k', 'efficientnetv2-b1-21k', 'efficientnetv2-b2-21k', 'efficientnetv2-b3-21k', 'efficientnetv2-s-21k-ft1k', 'efficientnetv2-m-21k-ft1k', 'efficientnetv2-l-21k-ft1k', 'efficientnetv2-xl-21k-ft1k', 'efficientnetv2-b0-21k-ft1k', 'efficientnetv2-b1-21k-ft1k', 'efficientnetv2-b2-21k-ft1k', 'efficientnetv2-b3-21k-ft1k', 'efficientnetv2-b0', 'efficientnetv2-b1', 'efficientnetv2-b2', 'efficientnetv2-b3', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'bit_s-r50x1', 'inception_v3', 'inception_resnet_v2', 'resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'nasnet_large', 'nasnet_mobile', 'pnasnet_large', 'mobilenet_v2_100_224', 'mobilenet_v2_130_224', 'mobilenet_v2_140_224', 'mobilenet_v3_small_100_224', 'mobilenet_v3_small_075_224', 'mobilenet_v3_large_100_224', 'mobilenet_v3_large_075_224']\n", + "\n", + "model_handle_map = {\n", + " \"efficientnetv2-s\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/classification/2/",\n", + " \"efficientnetv2-m\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_m/classification/2/",\n", + " \"efficientnetv2-l\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_l/classification/2/",\n", + " \"efficientnetv2-s-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/classification/2/",\n", + " \"efficientnetv2-m-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_m/classification/2/",\n", + " \"efficientnetv2-l-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/classification/2/",\n", + " \"efficientnetv2-xl-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_xl/classification/2/",\n", + " \"efficientnetv2-b0-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b0/classification/2/",\n", + " \"efficientnetv2-b1-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b1/classification/2/",\n", + " \"efficientnetv2-b2-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b2/classification/2/",\n", + " \"efficientnetv2-b3-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b3/classification/2/",\n", + " \"efficientnetv2-s-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/classification/2/",\n", + " \"efficientnetv2-m-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/classification/2/",\n", + " \"efficientnetv2-l-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/classification/2/",\n", + " \"efficientnetv2-xl-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/classification/2/",\n", + " \"efficientnetv2-b0-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b0/classification/2/",\n", + " \"efficientnetv2-b1-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b1/classification/2/",\n", + " \"efficientnetv2-b2-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b2/classification/2/",\n", + " \"efficientnetv2-b3-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b3/classification/2/",\n", + " \"efficientnetv2-b0\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/classification/2/",\n", + " \"efficientnetv2-b1\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b1/classification/2/",\n", + " \"efficientnetv2-b2\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b2/classification/2/",\n", + " \"efficientnetv2-b3\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b3/classification/2/",\n", + " \"efficientnet_b0\": \"https://tfhub.dev/tensorflow/efficientnet/b0/classification/1/",\n", + " \"efficientnet_b1\": \"https://tfhub.dev/tensorflow/efficientnet/b1/classification/1/",\n", + " \"efficientnet_b2\": \"https://tfhub.dev/tensorflow/efficientnet/b2/classification/1/",\n", + " \"efficientnet_b3\": \"https://tfhub.dev/tensorflow/efficientnet/b3/classification/1/",\n", + " \"efficientnet_b4\": \"https://tfhub.dev/tensorflow/efficientnet/b4/classification/1/",\n", + " \"efficientnet_b5\": \"https://tfhub.dev/tensorflow/efficientnet/b5/classification/1/",\n", + " \"efficientnet_b6\": \"https://tfhub.dev/tensorflow/efficientnet/b6/classification/1/",\n", + " \"efficientnet_b7\": \"https://tfhub.dev/tensorflow/efficientnet/b7/classification/1/",\n", + " \"bit_s-r50x1\": \"https://tfhub.dev/google/bit/s-r50x1/ilsvrc2012_classification/1/",\n", + " \"inception_v3\": \"https://tfhub.dev/google/imagenet/inception_v3/classification/4/",\n", + " \"inception_resnet_v2\": \"https://tfhub.dev/google/imagenet/inception_resnet_v2/classification/4/",\n", + " \"resnet_v1_50\": \"https://tfhub.dev/google/imagenet/resnet_v1_50/classification/4/",\n", + " \"resnet_v1_101\": \"https://tfhub.dev/google/imagenet/resnet_v1_101/classification/4/",\n", + " \"resnet_v1_152\": \"https://tfhub.dev/google/imagenet/resnet_v1_152/classification/4/",\n", + " \"resnet_v2_50\": \"https://tfhub.dev/google/imagenet/resnet_v2_50/classification/4/",\n", + " \"resnet_v2_101\": \"https://tfhub.dev/google/imagenet/resnet_v2_101/classification/4/",\n", + " \"resnet_v2_152\": \"https://tfhub.dev/google/imagenet/resnet_v2_152/classification/4/",\n", + " \"nasnet_large\": \"https://tfhub.dev/google/imagenet/nasnet_large/classification/4/",\n", + " \"nasnet_mobile\": \"https://tfhub.dev/google/imagenet/nasnet_mobile/classification/4/",\n", + " \"pnasnet_large\": \"https://tfhub.dev/google/imagenet/pnasnet_large/classification/4/",\n", + " \"mobilenet_v2_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/classification/4/",\n", + " \"mobilenet_v2_130_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/classification/4/",\n", + " \"mobilenet_v2_140_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/classification/4/",\n", + " \"mobilenet_v3_small_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_100_224/classification/5/",\n", + " \"mobilenet_v3_small_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_075_224/classification/5/",\n", + " \"mobilenet_v3_large_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/classification/5/",\n", + " \"mobilenet_v3_large_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_075_224/classification/5/",\n", + "}\n", + "\n", + "model_image_size_map = {\n", + " \"efficientnetv2-s\": 384,\n", + " \"efficientnetv2-m\": 480,\n", + " \"efficientnetv2-l\": 480,\n", + " \"efficientnetv2-b0\": 224,\n", + " \"efficientnetv2-b1\": 240,\n", + " \"efficientnetv2-b2\": 260,\n", + " \"efficientnetv2-b3\": 300,\n", + " \"efficientnetv2-s-21k\": 384,\n", + " \"efficientnetv2-m-21k\": 480,\n", + " \"efficientnetv2-l-21k\": 480,\n", + " \"efficientnetv2-xl-21k\": 512,\n", + " \"efficientnetv2-b0-21k\": 224,\n", + " \"efficientnetv2-b1-21k\": 240,\n", + " \"efficientnetv2-b2-21k\": 260,\n", + " \"efficientnetv2-b3-21k\": 300,\n", + " \"efficientnetv2-s-21k-ft1k\": 384,\n", + " \"efficientnetv2-m-21k-ft1k\": 480,\n", + " \"efficientnetv2-l-21k-ft1k\": 480,\n", + " \"efficientnetv2-xl-21k-ft1k\": 512,\n", + " \"efficientnetv2-b0-21k-ft1k\": 224,\n", + " \"efficientnetv2-b1-21k-ft1k\": 240,\n", + " \"efficientnetv2-b2-21k-ft1k\": 260,\n", + " \"efficientnetv2-b3-21k-ft1k\": 300, \n", + " \"efficientnet_b0\": 224,\n", + " \"efficientnet_b1\": 240,\n", + " \"efficientnet_b2\": 260,\n", + " \"efficientnet_b3\": 300,\n", + " \"efficientnet_b4\": 380,\n", + " \"efficientnet_b5\": 456,\n", + " \"efficientnet_b6\": 528,\n", + " \"efficientnet_b7\": 600,\n", + " \"inception_v3\": 299,\n", + " \"inception_resnet_v2\": 299,\n", + " \"mobilenet_v2_100_224\": 224,\n", + " \"mobilenet_v2_130_224\": 224,\n", + " \"mobilenet_v2_140_224\": 224,\n", + " \"nasnet_large\": 331,\n", + " \"nasnet_mobile\": 224,\n", + " \"pnasnet_large\": 331,\n", + " \"resnet_v1_50\": 224,\n", + " \"resnet_v1_101\": 224,\n", + " \"resnet_v1_152\": 224,\n", + " \"resnet_v2_50\": 224,\n", + " \"resnet_v2_101\": 224,\n", + " \"resnet_v2_152\": 224,\n", + " \"mobilenet_v3_small_100_224\": 224,\n", + " \"mobilenet_v3_small_075_224\": 224,\n", + " \"mobilenet_v3_large_100_224\": 224,\n", + " \"mobilenet_v3_large_075_224\": 224,\n", + "}\n", + "\n", + "model_handle = model_handle_map[model_name]\n", + "\n", + "print(f\"Selected model: {model_name} : {model_handle}\")\n", + "\n", + "\n", + "max_dynamic_size = 512\n", + "if model_name in model_image_size_map:\n", + " image_size = model_image_size_map[model_name]\n", + " dynamic_size = False\n", + " print(f\"Images will be converted to {image_size}x{image_size}\")\n", + "else:\n", + " dynamic_size = True\n", + " print(f\"Images will be capped to a max size of {max_dynamic_size}x{max_dynamic_size}\")\n", + "\n", + "labels_file = \"https://storage.googleapis.com/download.tensorflow.org/data/ImageNetLabels.txt/"\n", + "\n", + "#download labels and creates a maps\n", + "downloaded_file = tf.keras.utils.get_file(\"labels.txt\", origin=labels_file)\n", + "\n", + "classes = []\n", + "\n", + "with open(downloaded_file) as f:\n", + " labels = f.readlines()\n", + " classes = [l.strip() for l in labels]\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vxcASidjBAE8" + }, + "source": [ + "You can select one of the images below, or use your own image. Just remember that the input size for the models vary and some of them use a dynamic input size (enabling inference on the unscaled image). Given that, the method `load_image` will already rescale the image to the expected format." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "o2rMsr4CgET2" + }, + "outputs": [], + "source": [ + "#@title Select an Input Image\n", + "\n", + "image_name = \"turtle\" # @param ['tiger', 'bus', 'car', 'cat', 'dog', 'apple', 'banana', 'turtle', 'flamingo', 'piano', 'honeycomb', 'teapot']\n", + "\n", + "images_for_test_map = {\n", + " \"tiger\": \"https://upload.wikimedia.org/wikipedia/commons/b/b0/Bengal_tiger_%28Panthera_tigris_tigris%29_female_3_crop.jpg/",\n", + " #by Charles James Sharp, CC BY-SA 4.0 , via Wikimedia Commons\n", + " \"bus\": \"https://upload.wikimedia.org/wikipedia/commons/6/63/LT_471_%28LTZ_1471%29_Arriva_London_New_Routemaster_%2819522859218%29.jpg/",\n", + " #by Martin49 from London, England, CC BY 2.0 , via Wikimedia Commons\n", + " \"car\": \"https://upload.wikimedia.org/wikipedia/commons/4/49/2013-2016_Toyota_Corolla_%28ZRE172R%29_SX_sedan_%282018-09-17%29_01.jpg/",\n", + " #by EurovisionNim, CC BY-SA 4.0 , via Wikimedia Commons\n", + " \"cat\": \"https://upload.wikimedia.org/wikipedia/commons/4/4d/Cat_November_2010-1a.jpg/",\n", + " #by Alvesgaspar, CC BY-SA 3.0 , via Wikimedia Commons\n", + " \"dog\": \"https://upload.wikimedia.org/wikipedia/commons/archive/a/a9/20090914031557%21Saluki_dog_breed.jpg/",\n", + " #by Craig Pemberton, CC BY-SA 3.0 , via Wikimedia Commons\n", + " \"apple\": \"https://upload.wikimedia.org/wikipedia/commons/1/15/Red_Apple.jpg/",\n", + " #by Abhijit Tembhekar from Mumbai, India, CC BY 2.0 , via Wikimedia Commons\n", + " \"banana\": \"https://upload.wikimedia.org/wikipedia/commons/1/1c/Bananas_white_background.jpg/",\n", + " #by fir0002 flagstaffotos [at] gmail.com\t\tCanon 20D + Tamron 28-75mm f/2.8, GFDL 1.2 , via Wikimedia Commons\n", + " \"turtle\": \"https://upload.wikimedia.org/wikipedia/commons/8/80/Turtle_golfina_escobilla_oaxaca_mexico_claudio_giovenzana_2010.jpg/",\n", + " #by Claudio Giovenzana, CC BY-SA 3.0 , via Wikimedia Commons\n", + " \"flamingo\": \"https://upload.wikimedia.org/wikipedia/commons/b/b8/James_Flamingos_MC.jpg/",\n", + " #by Christian Mehlführer, User:Chmehl, CC BY 3.0 , via Wikimedia Commons\n", + " \"piano\": \"https://upload.wikimedia.org/wikipedia/commons/d/da/Steinway_%26_Sons_upright_piano%2C_model_K-132%2C_manufactured_at_Steinway%27s_factory_in_Hamburg%2C_Germany.png/",\n", + " #by \"Photo: © Copyright Steinway & Sons\", CC BY-SA 3.0 , via Wikimedia Commons\n", + " \"honeycomb\": \"https://upload.wikimedia.org/wikipedia/commons/f/f7/Honey_comb.jpg/",\n", + " #by Merdal, CC BY-SA 3.0 , via Wikimedia Commons\n", + " \"teapot\": \"https://upload.wikimedia.org/wikipedia/commons/4/44/Black_tea_pot_cropped.jpg/",\n", + " #by Mendhak, CC BY-SA 2.0 , via Wikimedia Commons\n", + "}\n", + "\n", + "img_url = images_for_test_map[image_name]\n", + "image, original_image = load_image(img_url, image_size, dynamic_size, max_dynamic_size)\n", + "show_image(image, 'Scaled image')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CMwWx8_8Aw3X" + }, + "source": [ + "Now that the model was chosen, loading it with TensorFlow Hub is simple.\n", + "\n", + "This also calls the model with a random input as a \"warmup\" run. Subsequent calls are often much faster, and you can compare this with the latency below.\n", + "\n", + "*Note:* models that use a dynamic size might need a fresh \"warmup\" run for each image size." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LRAccT3UhRga" + }, + "outputs": [], + "source": [ + "classifier = hub.load(model_handle)\n", + "\n", + "input_shape = image.shape\n", + "warmup_input = tf.random.uniform(input_shape, 0, 1.0)\n", + "%time warmup_logits = classifier(warmup_input).numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e7vkdUqpBkfE" + }, + "source": [ + "Everything is ready for inference. Here you can see the top 5 results from the model for the selected image." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I0QNHg3bk-G1" + }, + "outputs": [], + "source": [ + "# Run model on image\n", + "%time probabilities = tf.nn.softmax(classifier(image)).numpy()\n", + "\n", + "top_5 = tf.argsort(probabilities, axis=-1, direction=\"DESCENDING\")[0][:5].numpy()\n", + "np_classes = np.array(classes)\n", + "\n", + "# Some models include an additional 'background' class in the predictions, so\n", + "# we must account for this when reading the class labels.\n", + "includes_background_class = probabilities.shape[1] == 1001\n", + "\n", + "for i, item in enumerate(top_5):\n", + " class_index = item if includes_background_class else item + 1\n", + " line = f'({i+1}) {class_index:4} - {classes[class_index]}: {probabilities[0][top_5][i]}'\n", + " print(line)\n", + "\n", + "show_image(image, '')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e4IJrq5eZDWl" + }, + "source": [ + "## Learn More\n", + "\n", + "If you want to learn more and try how to do Transfer Learning with these models you can try this tutorial: [Transfer Learning for Image classification](https://www.tensorflow.org/hub/tutorials/tf2_image_retraining) \n", + "\n", + "If you want to check on more image models you can check them out on [tfhub.dev](https://tfhub.dev/s?module-type=image-augmentation,image-classification,image-classification-logits,image-classifier,image-feature-vector,image-generator,image-object-detection,image-others,image-pose-detection,image-segmentation,image-style-transfer,image-super-resolution,image-rnn-agent)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "image_classification.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/image_enhancing.ipynb b/site/en/hub/tutorials/image_enhancing.ipynb new file mode 100644 index 00000000000..3710ebd6d66 --- /dev/null +++ b/site/en/hub/tutorials/image_enhancing.ipynb @@ -0,0 +1,455 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "GeerbrLA0uju" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "\n", + "Created by @[Adrish Dey](https://github.com/captain-pool) for [Google Summer of Code](https://summerofcode.withgoogle.com/) 2019" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yntM0JbY0uj5" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS, \n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UJeo2a5C0uj2" + }, + "source": [ + "# Image Super Resolution using ESRGAN" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ps4toA1d_tkc" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LkW9jAmt_zjB" + }, + "source": [ + "This colab demonstrates use of TensorFlow Hub Module for Enhanced Super Resolution Generative Adversarial Network (*by Xintao Wang et.al.*) [[Paper](https://arxiv.org/pdf/1809.00219.pdf)] [[Code](https://github.com/captain-pool/GSOC/)]\n", + "\n", + "for image enhancing. *(Preferrably bicubically downsampled images).*\n", + "\n", + "Model trained on DIV2K Dataset (on bicubically downsampled images) on image patches of size 128 x 128." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LBGty4O_0ukJ" + }, + "source": [ + "**Preparing Environment**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lnyLTyUt0ukN" + }, + "outputs": [], + "source": [ + "import os\n", + "import time\n", + "from PIL import Image\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import matplotlib.pyplot as plt\n", + "os.environ[\"TFHUB_DOWNLOAD_PROGRESS\"] = \"True\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dremsFdh0ukX" + }, + "outputs": [], + "source": [ + "!wget \"https://user-images.githubusercontent.com/12981474/40157448-eff91f06-5953-11e8-9a37-f6b5693fa03f.png/" -O original.png" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DXot2kru0ukh" + }, + "outputs": [], + "source": [ + "# Declaring Constants\n", + "IMAGE_PATH = \"original.png\"\n", + "SAVED_MODEL_PATH = \"https://tfhub.dev/captain-pool/esrgan-tf2/1/"" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KF_tHde-p3rn" + }, + "source": [ + "**Defining Helper Functions**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IslbQmTj0ukz" + }, + "outputs": [], + "source": [ + "def preprocess_image(image_path):\n", + " \"\"\" Loads image from path and preprocesses to make it model ready\n", + " Args:\n", + " image_path: Path to the image file\n", + " \"\"\"\n", + " hr_image = tf.image.decode_image(tf.io.read_file(image_path))\n", + " # If PNG, remove the alpha channel. The model only supports\n", + " # images with 3 color channels.\n", + " if hr_image.shape[-1] == 4:\n", + " hr_image = hr_image[...,:-1]\n", + " hr_size = (tf.convert_to_tensor(hr_image.shape[:-1]) // 4) * 4\n", + " hr_image = tf.image.crop_to_bounding_box(hr_image, 0, 0, hr_size[0], hr_size[1])\n", + " hr_image = tf.cast(hr_image, tf.float32)\n", + " return tf.expand_dims(hr_image, 0)\n", + "\n", + "def save_image(image, filename):\n", + " \"\"\"\n", + " Saves unscaled Tensor Images.\n", + " Args:\n", + " image: 3D image tensor. [height, width, channels]\n", + " filename: Name of the file to save.\n", + " \"\"\"\n", + " if not isinstance(image, Image.Image):\n", + " image = tf.clip_by_value(image, 0, 255)\n", + " image = Image.fromarray(tf.cast(image, tf.uint8).numpy())\n", + " image.save(\"%s.jpg\" % filename)\n", + " print(\"Saved as %s.jpg\" % filename)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uh1E2rBpnWxV" + }, + "outputs": [], + "source": [ + "%matplotlib inline\n", + "def plot_image(image, title=\"\"):\n", + " \"\"\"\n", + " Plots images from image tensors.\n", + " Args:\n", + " image: 3D image tensor. [height, width, channels].\n", + " title: Title to display in the plot.\n", + " \"\"\"\n", + " image = np.asarray(image)\n", + " image = tf.clip_by_value(image, 0, 255)\n", + " image = Image.fromarray(tf.cast(image, tf.uint8).numpy())\n", + " plt.imshow(image)\n", + " plt.axis(\"off\")\n", + " plt.title(title)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ycrCTvmlqBMD" + }, + "source": [ + "#### Performing Super Resolution of images loaded from path" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L7XpMk8Y0uk7" + }, + "outputs": [], + "source": [ + "hr_image = preprocess_image(IMAGE_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hWgCbUa_0ulG" + }, + "outputs": [], + "source": [ + "# Plotting Original Resolution image\n", + "plot_image(tf.squeeze(hr_image), title=\"Original Image\")\n", + "save_image(tf.squeeze(hr_image), filename=\"Original Image\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ouwEyKLA0ulO" + }, + "outputs": [], + "source": [ + "model = hub.load(SAVED_MODEL_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dz79ncnT0ulX" + }, + "outputs": [], + "source": [ + "start = time.time()\n", + "fake_image = model(hr_image)\n", + "fake_image = tf.squeeze(fake_image)\n", + "print(\"Time Taken: %f\" % (time.time() - start))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ABjkkJHC2jNL" + }, + "outputs": [], + "source": [ + "# Plotting Super Resolution Image\n", + "plot_image(tf.squeeze(fake_image), title=\"Super Resolution\")\n", + "save_image(tf.squeeze(fake_image), filename=\"Super Resolution\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tuKu18UYptkx" + }, + "source": [ + "### Evaluating Performance of the Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Qdz55sxMgiwO" + }, + "outputs": [], + "source": [ + "!wget \"https://lh4.googleusercontent.com/-Anmw5df4gj0/AAAAAAAAAAI/AAAAAAAAAAc/6HxU8XFLnQE/photo.jpg64/" -O test.jpg\n", + "IMAGE_PATH = \"test.jpg\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "F6tMNtqy0ukq" + }, + "outputs": [], + "source": [ + "# Defining helper functions\n", + "def downscale_image(image):\n", + " \"\"\"\n", + " Scales down images using bicubic downsampling.\n", + " Args:\n", + " image: 3D or 4D tensor of preprocessed image\n", + " \"\"\"\n", + " image_size = []\n", + " if len(image.shape) == 3:\n", + " image_size = [image.shape[1], image.shape[0]]\n", + " else:\n", + " raise ValueError(\"Dimension mismatch. Can work only on single image.\")\n", + "\n", + " image = tf.squeeze(\n", + " tf.cast(\n", + " tf.clip_by_value(image, 0, 255), tf.uint8))\n", + "\n", + " lr_image = np.asarray(\n", + " Image.fromarray(image.numpy())\n", + " .resize([image_size[0] // 4, image_size[1] // 4],\n", + " Image.BICUBIC))\n", + "\n", + " lr_image = tf.expand_dims(lr_image, 0)\n", + " lr_image = tf.cast(lr_image, tf.float32)\n", + " return lr_image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r2ANR1XDy77I" + }, + "outputs": [], + "source": [ + "hr_image = preprocess_image(IMAGE_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r_defaultO6qbTV" + }, + "outputs": [], + "source": [ + "lr_image = downscale_image(tf.squeeze(hr_image))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jRw1x6xY0ulj" + }, + "outputs": [], + "source": [ + "# Plotting Low Resolution Image\n", + "plot_image(tf.squeeze(lr_image), title=\"Low Resolution\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "g--yyHg7qXCw" + }, + "outputs": [], + "source": [ + "model = hub.load(SAVED_MODEL_PATH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZX-deZlhqaYz" + }, + "outputs": [], + "source": [ + "start = time.time()\n", + "fake_image = model(lr_image)\n", + "fake_image = tf.squeeze(fake_image)\n", + "print(\"Time Taken: %f\" % (time.time() - start))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AmSga6MSq1PB" + }, + "outputs": [], + "source": [ + "plot_image(tf.squeeze(fake_image), title=\"Super Resolution\")\n", + "# Calculating PSNR wrt Original Image\n", + "psnr = tf.image.psnr(\n", + " tf.clip_by_value(fake_image, 0, 255),\n", + " tf.clip_by_value(hr_image, 0, 255), max_val=255)\n", + "print(\"PSNR Achieved: %f\" % psnr)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5YTBKCXPq9UZ" + }, + "source": [ + "**Comparing Outputs size by side.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ubdupldDypCy" + }, + "outputs": [], + "source": [ + "plt.rcParams['figure.figsize'] = [15, 10]\n", + "fig, axes = plt.subplots(1, 3)\n", + "fig.tight_layout()\n", + "plt.subplot(131)\n", + "plot_image(tf.squeeze(hr_image), title=\"Original\")\n", + "plt.subplot(132)\n", + "fig.tight_layout()\n", + "plot_image(tf.squeeze(lr_image), \"x4 Bicubic\")\n", + "plt.subplot(133)\n", + "fig.tight_layout()\n", + "plot_image(tf.squeeze(fake_image), \"Super Resolution\")\n", + "plt.savefig(\"ESRGAN_DIV2K.jpg\", bbox_inches=\"tight\")\n", + "print(\"PSNR: %f\" % psnr)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "image_enhancing.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/image_feature_vector.ipynb b/site/en/hub/tutorials/image_feature_vector.ipynb new file mode 100644 index 00000000000..b5283c45b3d --- /dev/null +++ b/site/en/hub/tutorials/image_feature_vector.ipynb @@ -0,0 +1,533 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ScitaPqhKtuW" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bNnChGfZK2_w" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9Z_ZvMk5JPFV" + }, + "source": [ + "# Classify Flowers with Transfer Learning\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gh-LWtlqLtgH" + }, + "source": [ + "Have you ever seen a beautiful flower and wondered what kind of flower it is? Well, you're not the first, so let's build a way to identify the type of flower from a photo!\n", + "\n", + "For classifying images, a particular type of *deep neural network*, called a *convolutional neural network* has proved to be particularly powerful. However, modern convolutional neural networks have millions of parameters. Training them from scratch requires a lot of labeled training data and a lot of computing power (hundreds of GPU-hours or more). We only have about three thousand labeled photos and want to spend much less time, so we need to be more clever.\n", + "\n", + "We will use a technique called *transfer learning* where we take a pre-trained network (trained on about a million general images), use it to extract features, and train a new layer on top for our own task of classifying images of flowers.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ORy-KvWXGXBo" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NTrs9zBKJK1c" + }, + "outputs": [], + "source": [ + "import collections\n", + "import io\n", + "import math\n", + "import os\n", + "import random\n", + "from six.moves import urllib\n", + "\n", + "from IPython.display import clear_output, Image, display, HTML\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "\n", + "import tensorflow_hub as hub\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import sklearn.metrics as sk_metrics\n", + "import time" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Do-T63G7NCSB" + }, + "source": [ + "## The flowers dataset\n", + "\n", + "The flowers dataset consists of images of flowers with 5 possible class labels.\n", + "\n", + "When training a machine learning model, we split our data into training and test datasets. We will train the model on our training data and then evaluate how well the model performs on data it has never seen - the test set.\n", + "\n", + "Let's download our training and test examples (it may take a while) and split them into train and test sets.\n", + "\n", + "Run the following two cells:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "HYQr1SILIxSK" + }, + "outputs": [], + "source": [ + "FLOWERS_DIR = './flower_photos'\n", + "TRAIN_FRACTION = 0.8\n", + "RANDOM_SEED = 2018\n", + "\n", + "\n", + "def download_images():\n", + " \"\"\"If the images aren't already downloaded, save them to FLOWERS_DIR.\"\"\"\n", + " if not os.path.exists(FLOWERS_DIR):\n", + " DOWNLOAD_URL = 'http://download.tensorflow.org/example_images/flower_photos.tgz'\n", + " print('Downloading flower images from %s...' % DOWNLOAD_URL)\n", + " urllib.request.urlretrieve(DOWNLOAD_URL, 'flower_photos.tgz')\n", + " !tar xfz flower_photos.tgz\n", + " print('Flower photos are located in %s' % FLOWERS_DIR)\n", + "\n", + "\n", + "def make_train_and_test_sets():\n", + " \"\"\"Split the data into train and test sets and get the label classes.\"\"\"\n", + " train_examples, test_examples = [], []\n", + " shuffler = random.Random(RANDOM_SEED)\n", + " is_root = True\n", + " for (dirname, subdirs, filenames) in tf.gfile.Walk(FLOWERS_DIR):\n", + " # The root directory gives us the classes\n", + " if is_root:\n", + " subdirs = sorted(subdirs)\n", + " classes = collections.OrderedDict(enumerate(subdirs))\n", + " label_to_class = dict([(x, i) for i, x in enumerate(subdirs)])\n", + " is_root = False\n", + " # The sub directories give us the image files for training.\n", + " else:\n", + " filenames.sort()\n", + " shuffler.shuffle(filenames)\n", + " full_filenames = [os.path.join(dirname, f) for f in filenames]\n", + " label = dirname.split('/')[-1]\n", + " label_class = label_to_class[label]\n", + " # An example is the image file and it's label class.\n", + " examples = list(zip(full_filenames, [label_class] * len(filenames)))\n", + " num_train = int(len(filenames) * TRAIN_FRACTION)\n", + " train_examples.extend(examples[:num_train])\n", + " test_examples.extend(examples[num_train:])\n", + "\n", + " shuffler.shuffle(train_examples)\n", + " shuffler.shuffle(test_examples)\n", + " return train_examples, test_examples, classes\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_9NklpcANhtB" + }, + "outputs": [], + "source": [ + "# Download the images and split the images into train and test sets.\n", + "download_images()\n", + "TRAIN_EXAMPLES, TEST_EXAMPLES, CLASSES = make_train_and_test_sets()\n", + "NUM_CLASSES = len(CLASSES)\n", + "\n", + "print('\\nThe dataset has %d label classes: %s' % (NUM_CLASSES, CLASSES.values()))\n", + "print('There are %d training images' % len(TRAIN_EXAMPLES))\n", + "print('there are %d test images' % len(TEST_EXAMPLES))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tHF7bHTfnD6S" + }, + "source": [ + "## Explore the data\n", + "\n", + "The flowers dataset consists of examples which are labeled images of flowers. Each example contains a JPEG flower image and the class label: what type of flower it is. Let's display a few images together with their labels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "1friUvN6kPYM" + }, + "outputs": [], + "source": [ + "#@title Show some labeled images\n", + "def get_label(example):\n", + " \"\"\"Get the label (number) for given example.\"\"\"\n", + " return example[1]\n", + "\n", + "def get_class(example):\n", + " \"\"\"Get the class (string) of given example.\"\"\"\n", + " return CLASSES[get_label(example)]\n", + "\n", + "def get_encoded_image(example):\n", + " \"\"\"Get the image data (encoded jpg) of given example.\"\"\"\n", + " image_path = example[0]\n", + " return tf.gfile.GFile(image_path, 'rb').read()\n", + "\n", + "def get_image(example):\n", + " \"\"\"Get image as np.array of pixels for given example.\"\"\"\n", + " return plt.imread(io.BytesIO(get_encoded_image(example)), format='jpg')\n", + "\n", + "def display_images(images_and_classes, cols=5):\n", + " \"\"\"Display given images and their labels in a grid.\"\"\"\n", + " rows = int(math.ceil(len(images_and_classes) / cols))\n", + " fig = plt.figure()\n", + " fig.set_size_inches(cols * 3, rows * 3)\n", + " for i, (image, flower_class) in enumerate(images_and_classes):\n", + " plt.subplot(rows, cols, i + 1)\n", + " plt.axis('off')\n", + " plt.imshow(image)\n", + " plt.title(flower_class)\n", + "\n", + "NUM_IMAGES = 15 #@param {type: 'integer'}\n", + "display_images([(get_image(example), get_class(example))\n", + " for example in TRAIN_EXAMPLES[:NUM_IMAGES]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Hyjr6PuboTAg" + }, + "source": [ + "## Build the model\n", + "\n", + "We will load a [TF-Hub](https://tensorflow.org/hub) image feature vector module, stack a linear classifier on it, and add training and evaluation ops. The following cell builds a TF graph describing the model and its training, but it doesn't run the training (that will be the next step)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LbkSRaK_oW5Y" + }, + "outputs": [], + "source": [ + "LEARNING_RATE = 0.01\n", + "\n", + "tf.reset_default_graph()\n", + "\n", + "# Load a pre-trained TF-Hub module for extracting features from images. We've\n", + "# chosen this particular module for speed, but many other choices are available.\n", + "image_module = hub.Module('https://tfhub.dev/google/imagenet/mobilenet_v2_035_128/feature_vector/2')\n", + "\n", + "# Preprocessing images into tensors with size expected by the image module.\n", + "encoded_images = tf.placeholder(tf.string, shape=[None])\n", + "image_size = hub.get_expected_image_size(image_module)\n", + "\n", + "\n", + "def decode_and_resize_image(encoded):\n", + " decoded = tf.image.decode_jpeg(encoded, channels=3)\n", + " decoded = tf.image.convert_image_dtype(decoded, tf.float32)\n", + " return tf.image.resize_images(decoded, image_size)\n", + "\n", + "\n", + "batch_images = tf.map_fn(decode_and_resize_image, encoded_images, dtype=tf.float32)\n", + "\n", + "# The image module can be applied as a function to extract feature vectors for a\n", + "# batch of images.\n", + "features = image_module(batch_images)\n", + "\n", + "\n", + "def create_model(features):\n", + " \"\"\"Build a model for classification from extracted features.\"\"\"\n", + " # Currently, the model is just a single linear layer. You can try to add\n", + " # another layer, but be careful... two linear layers (when activation=None)\n", + " # are equivalent to a single linear layer. You can create a nonlinear layer\n", + " # like this:\n", + " # layer = tf.layers.dense(inputs=..., units=..., activation=tf.nn.relu)\n", + " layer = tf.layers.dense(inputs=features, units=NUM_CLASSES, activation=None)\n", + " return layer\n", + "\n", + "\n", + "# For each class (kind of flower), the model outputs some real number as a score\n", + "# how much the input resembles this class. This vector of numbers is often\n", + "# called the \"logits\".\n", + "logits = create_model(features)\n", + "labels = tf.placeholder(tf.float32, [None, NUM_CLASSES])\n", + "\n", + "# Mathematically, a good way to measure how much the predicted probabilities\n", + "# diverge from the truth is the \"cross-entropy\" between the two probability\n", + "# distributions. For numerical stability, this is best done directly from the\n", + "# logits, not the probabilities extracted from them.\n", + "cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels)\n", + "cross_entropy_mean = tf.reduce_mean(cross_entropy)\n", + "\n", + "# Let's add an optimizer so we can train the network.\n", + "optimizer = tf.train.GradientDescentOptimizer(learning_rate=LEARNING_RATE)\n", + "train_op = optimizer.minimize(loss=cross_entropy_mean)\n", + "\n", + "# The \"softmax\" function transforms the logits vector into a vector of\n", + "# probabilities: non-negative numbers that sum up to one, and the i-th number\n", + "# says how likely the input comes from class i.\n", + "probabilities = tf.nn.softmax(logits)\n", + "\n", + "# We choose the highest one as the predicted class.\n", + "prediction = tf.argmax(probabilities, 1)\n", + "correct_prediction = tf.equal(prediction, tf.argmax(labels, 1))\n", + "\n", + "# The accuracy will allow us to eval on our test set. \n", + "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0vvhYQ7-3AG_" + }, + "source": [ + "## Train the network\n", + "\n", + "Now that our model is built, let's train it and see how it performs on our test set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1YnBg7-OS3Dz" + }, + "outputs": [], + "source": [ + "# How long will we train the network (number of batches).\n", + "NUM_TRAIN_STEPS = 100 #@param {type: 'integer'}\n", + "# How many training examples we use in each step.\n", + "TRAIN_BATCH_SIZE = 10 #@param {type: 'integer'}\n", + "# How often to evaluate the model performance.\n", + "EVAL_EVERY = 10 #@param {type: 'integer'}\n", + "\n", + "def get_batch(batch_size=None, test=False):\n", + " \"\"\"Get a random batch of examples.\"\"\"\n", + " examples = TEST_EXAMPLES if test else TRAIN_EXAMPLES\n", + " batch_examples = random.sample(examples, batch_size) if batch_size else examples\n", + " return batch_examples\n", + "\n", + "def get_images_and_labels(batch_examples):\n", + " images = [get_encoded_image(e) for e in batch_examples]\n", + " one_hot_labels = [get_label_one_hot(e) for e in batch_examples]\n", + " return images, one_hot_labels\n", + "\n", + "def get_label_one_hot(example):\n", + " \"\"\"Get the one hot encoding vector for the example.\"\"\"\n", + " one_hot_vector = np.zeros(NUM_CLASSES)\n", + " np.put(one_hot_vector, get_label(example), 1)\n", + " return one_hot_vector\n", + "\n", + "with tf.Session() as sess:\n", + " sess.run(tf.global_variables_initializer())\n", + " for i in range(NUM_TRAIN_STEPS):\n", + " # Get a random batch of training examples.\n", + " train_batch = get_batch(batch_size=TRAIN_BATCH_SIZE)\n", + " batch_images, batch_labels = get_images_and_labels(train_batch)\n", + " # Run the train_op to train the model.\n", + " train_loss, _, train_accuracy = sess.run(\n", + " [cross_entropy_mean, train_op, accuracy],\n", + " feed_dict={encoded_images: batch_images, labels: batch_labels})\n", + " is_final_step = (i == (NUM_TRAIN_STEPS - 1))\n", + " if i % EVAL_EVERY == 0 or is_final_step:\n", + " # Get a batch of test examples.\n", + " test_batch = get_batch(batch_size=None, test=True)\n", + " batch_images, batch_labels = get_images_and_labels(test_batch)\n", + " # Evaluate how well our model performs on the test set.\n", + " test_loss, test_accuracy, test_prediction, correct_predicate = sess.run(\n", + " [cross_entropy_mean, accuracy, prediction, correct_prediction],\n", + " feed_dict={encoded_images: batch_images, labels: batch_labels})\n", + " print('Test accuracy at step %s: %.2f%%' % (i, (test_accuracy * 100)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZFUNJxuH2t0V" + }, + "outputs": [], + "source": [ + "def show_confusion_matrix(test_labels, predictions):\n", + " \"\"\"Compute confusion matrix and normalize.\"\"\"\n", + " confusion = sk_metrics.confusion_matrix(\n", + " np.argmax(test_labels, axis=1), predictions)\n", + " confusion_normalized = confusion.astype(\"float\") / confusion.sum(axis=1)\n", + " axis_labels = list(CLASSES.values())\n", + " ax = sns.heatmap(\n", + " confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,\n", + " cmap='Blues', annot=True, fmt='.2f', square=True)\n", + " plt.title(\"Confusion matrix\")\n", + " plt.ylabel(\"True label\")\n", + " plt.xlabel(\"Predicted label\")\n", + "\n", + "show_confusion_matrix(batch_labels, test_prediction)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uu3vo8DK8BdL" + }, + "source": [ + "## Incorrect predictions\n", + "\n", + "Let's a take a closer look at the test examples that our model got wrong.\n", + "\n", + "- Are there any mislabeled examples in our test set?\n", + "- Is there any bad data in the test set - images that aren't actually pictures of flowers?\n", + "- Are there images where you can understand why the model made a mistake?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hqa0V3WN8C9M" + }, + "outputs": [], + "source": [ + "incorrect = [\n", + " (example, CLASSES[prediction])\n", + " for example, prediction, is_correct in zip(test_batch, test_prediction, correct_predicate)\n", + " if not is_correct\n", + "]\n", + "display_images(\n", + " [(get_image(example), \"prediction: {0}\\nlabel:{1}\".format(incorrect_prediction, get_class(example)))\n", + " for (example, incorrect_prediction) in incorrect[:20]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YN_s04Il8TvK" + }, + "source": [ + "## Exercises: Improve the model!\n", + "\n", + "We've trained a baseline model, now let's try to improve it to achieve better accuracy. (Remember that you'll need to re-run the cells when you make a change.)\n", + "\n", + "### Exercise 1: Try a different image model.\n", + "With TF-Hub, trying a few different image models is simple. Just replace the `\"https://tfhub.dev/google/imagenet/mobilenet_v2_050_128/feature_vector/2/"` handle in the `hub.Module()` call with a handle of different module and rerun all the code. You can see all available image modules at [tfhub.dev](https://tfhub.dev/s?module-type=image-feature-vector). \n", + "\n", + "A good choice might be one of the other [MobileNet V2 modules](https://tfhub.dev/s?module-type=image-feature-vector&network-architecture=mobilenet-v2). Many of the modules -- including the MobileNet modules -- were trained on the [ImageNet dataset](https://www.tensorflow.org/datasets/catalog/imagenet2012) which contains over 1 million images and 1000 classes. Choosing a network architecture provides a tradeoff between speed and classification accuracy: models like MobileNet or NASNet Mobile are fast and small, more traditional architectures like Inception and ResNet were designed for accuracy.\n", + "\n", + "For the larger Inception V3 architecture, you can also explore the benefits of pre-training on a domain closer to your own task: it is also available as a [module trained on the iNaturalist dataset](https://tfhub.dev/google/inaturalist/inception_v3/feature_vector/1) of plants and animals.\n", + "\n", + "### Exercise 2: Add a hidden layer.\n", + "Stack a hidden layer between extracted image features and the linear classifier (in function `create_model()` above). To create a non-linear hidden layer with e.g. 100 nodes, use [tf.layers.dense](https://www.tensorflow.org/api_docs/python/tf/compat/v1/layers/dense) with units set to 100 and activation set to `tf.nn.relu`. Does changing the size of the hidden layer affect the test accuracy? Does adding second hidden layer improve the accuracy?\n", + "\n", + "### Exercise 3: Change hyperparameters.\n", + "Does increasing *number of training steps* improves final accuracy? Can you *change the learning rate* to make your model converge more quickly? Does the training *batch size* affect your model's performance?\n", + "\n", + "### Exercise 4: Try a different optimizer.\n", + "\n", + "Replace the basic GradientDescentOptimizer with a more sophisticate optimizer, e.g. [AdagradOptimizer](https://www.tensorflow.org/api_docs/python/tf/compat/v1/train/AdagradOptimizer). Does it make a difference to your model training? If you want to learn more about the benefits of different optimization algorithms, check out [this post](http://ruder.io/optimizing-gradient-descent/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kdwVXO1eJS5-" + }, + "source": [ + "## Want to learn more?\n", + "\n", + "If you are interested in a more advanced version of this tutorial, check out the [TensorFlow image retraining tutorial](https://www.tensorflow.org/hub/tutorials/image_retraining) which walks you through visualizing the training using TensorBoard, advanced techniques like dataset augmentation by distorting images, and replacing the flowers dataset to learn an image classifier on your own dataset.\n", + "\n", + "You can learn more about TensorFlow at [tensorflow.org](http://tensorflow.org) and see the TF-Hub API documentation is available at [tensorflow.org/hub](https://www.tensorflow.org/hub/). Find available TensorFlow Hub modules at [tfhub.dev](http://tfhub.dev) including more image feature vector modules and text embedding modules.\n", + "\n", + "Also check out the [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/) which is Google's fast-paced, practical introduction to machine learning." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "ScitaPqhKtuW" + ], + "name": "image_feature_vector.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/movenet.ipynb b/site/en/hub/tutorials/movenet.ipynb new file mode 100644 index 00000000000..f7955a5253b --- /dev/null +++ b/site/en/hub/tutorials/movenet.ipynb @@ -0,0 +1,816 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "toCy3v03Dwx7" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QKe-ubNcDvgv" + }, + "outputs": [], + "source": [ + "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KqtQzBCpIJ7Y" + }, + "source": [ + "# MoveNet: Ultra fast and accurate pose detection model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MCmFOosnSkCd" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6x99e0aEY_d6" + }, + "source": [ + "**[MoveNet](https://t.co/QpfnVL0YYI?amp=1)** is an ultra fast and accurate model that detects 17 keypoints of a body. The model is offered on [TF Hub](https://tfhub.dev/s?q=movenet) with two variants, known as Lightning and Thunder. Lightning is intended for latency-critical applications, while Thunder is intended for applications that require high accuracy. Both models run faster than real time (30+ FPS) on most modern desktops, laptops, and phones, which proves crucial for live fitness, health, and wellness applications.\n", + "\n", + "\n", + "\"drawing\"/\n", + "\n", + "*Images downloaded from Pexels (https://www.pexels.com/)\n", + "\n", + "This Colab walks you through the details of how to load MoveNet, and run inference on the input image and video below.\n", + "\n", + "Note: check out the [live demo](https://storage.googleapis.com/tfjs-models/demos/pose-detection/index.html?model=movenet) for how the model works!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "10_zkgbZBkIE" + }, + "source": [ + "# Human Pose Estimation with MoveNet" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9u_VGR6_BmbZ" + }, + "source": [ + "## Visualization libraries & Imports" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TtcwSIcgbIVN" + }, + "outputs": [], + "source": [ + "!pip install -q imageio\n", + "!pip install -q opencv-python\n", + "!pip install -q git+https://github.com/tensorflow/docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9BLeJv-pCCld" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "from tensorflow_docs.vis import embed\n", + "import numpy as np\n", + "import cv2\n", + "\n", + "# Import matplotlib libraries\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.collections import LineCollection\n", + "import matplotlib.patches as patches\n", + "\n", + "# Some modules to display an animation using imageio.\n", + "import imageio\n", + "from IPython.display import HTML, display" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "bEJBMeRb3YUy" + }, + "outputs": [], + "source": [ + "#@title Helper functions for visualization\n", + "\n", + "# Dictionary that maps from joint names to keypoint indices.\n", + "KEYPOINT_DICT = {\n", + " 'nose': 0,\n", + " 'left_eye': 1,\n", + " 'right_eye': 2,\n", + " 'left_ear': 3,\n", + " 'right_ear': 4,\n", + " 'left_shoulder': 5,\n", + " 'right_shoulder': 6,\n", + " 'left_elbow': 7,\n", + " 'right_elbow': 8,\n", + " 'left_wrist': 9,\n", + " 'right_wrist': 10,\n", + " 'left_hip': 11,\n", + " 'right_hip': 12,\n", + " 'left_knee': 13,\n", + " 'right_knee': 14,\n", + " 'left_ankle': 15,\n", + " 'right_ankle': 16\n", + "}\n", + "\n", + "# Maps bones to a matplotlib color name.\n", + "KEYPOINT_EDGE_INDS_TO_COLOR = {\n", + " (0, 1): 'm',\n", + " (0, 2): 'c',\n", + " (1, 3): 'm',\n", + " (2, 4): 'c',\n", + " (0, 5): 'm',\n", + " (0, 6): 'c',\n", + " (5, 7): 'm',\n", + " (7, 9): 'm',\n", + " (6, 8): 'c',\n", + " (8, 10): 'c',\n", + " (5, 6): 'y',\n", + " (5, 11): 'm',\n", + " (6, 12): 'c',\n", + " (11, 12): 'y',\n", + " (11, 13): 'm',\n", + " (13, 15): 'm',\n", + " (12, 14): 'c',\n", + " (14, 16): 'c'\n", + "}\n", + "\n", + "def _keypoints_and_edges_for_display(keypoints_with_scores,\n", + " height,\n", + " width,\n", + " keypoint_threshold=0.11):\n", + " \"\"\"Returns high confidence keypoints and edges for visualization.\n", + "\n", + " Args:\n", + " keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing\n", + " the keypoint coordinates and scores returned from the MoveNet model.\n", + " height: height of the image in pixels.\n", + " width: width of the image in pixels.\n", + " keypoint_threshold: minimum confidence score for a keypoint to be\n", + " visualized.\n", + "\n", + " Returns:\n", + " A (keypoints_xy, edges_xy, edge_colors) containing:\n", + " * the coordinates of all keypoints of all detected entities;\n", + " * the coordinates of all skeleton edges of all detected entities;\n", + " * the colors in which the edges should be plotted.\n", + " \"\"\"\n", + " keypoints_all = []\n", + " keypoint_edges_all = []\n", + " edge_colors = []\n", + " num_instances, _, _, _ = keypoints_with_scores.shape\n", + " for idx in range(num_instances):\n", + " kpts_x = keypoints_with_scores[0, idx, :, 1]\n", + " kpts_y = keypoints_with_scores[0, idx, :, 0]\n", + " kpts_scores = keypoints_with_scores[0, idx, :, 2]\n", + " kpts_absolute_xy = np.stack(\n", + " [width * np.array(kpts_x), height * np.array(kpts_y)], axis=-1)\n", + " kpts_above_thresh_absolute = kpts_absolute_xy[\n", + " kpts_scores > keypoint_threshold, :]\n", + " keypoints_all.append(kpts_above_thresh_absolute)\n", + "\n", + " for edge_pair, color in KEYPOINT_EDGE_INDS_TO_COLOR.items():\n", + " if (kpts_scores[edge_pair[0]] > keypoint_threshold and\n", + " kpts_scores[edge_pair[1]] > keypoint_threshold):\n", + " x_start = kpts_absolute_xy[edge_pair[0], 0]\n", + " y_start = kpts_absolute_xy[edge_pair[0], 1]\n", + " x_end = kpts_absolute_xy[edge_pair[1], 0]\n", + " y_end = kpts_absolute_xy[edge_pair[1], 1]\n", + " line_seg = np.array([[x_start, y_start], [x_end, y_end]])\n", + " keypoint_edges_all.append(line_seg)\n", + " edge_colors.append(color)\n", + " if keypoints_all:\n", + " keypoints_xy = np.concatenate(keypoints_all, axis=0)\n", + " else:\n", + " keypoints_xy = np.zeros((0, 17, 2))\n", + "\n", + " if keypoint_edges_all:\n", + " edges_xy = np.stack(keypoint_edges_all, axis=0)\n", + " else:\n", + " edges_xy = np.zeros((0, 2, 2))\n", + " return keypoints_xy, edges_xy, edge_colors\n", + "\n", + "\n", + "def draw_prediction_on_image(\n", + " image, keypoints_with_scores, crop_region=None, close_figure=False,\n", + " output_image_height=None):\n", + " \"\"\"Draws the keypoint predictions on image.\n", + "\n", + " Args:\n", + " image: A numpy array with shape [height, width, channel] representing the\n", + " pixel values of the input image.\n", + " keypoints_with_scores: A numpy array with shape [1, 1, 17, 3] representing\n", + " the keypoint coordinates and scores returned from the MoveNet model.\n", + " crop_region: A dictionary that defines the coordinates of the bounding box\n", + " of the crop region in normalized coordinates (see the init_crop_region\n", + " function below for more detail). If provided, this function will also\n", + " draw the bounding box on the image.\n", + " output_image_height: An integer indicating the height of the output image.\n", + " Note that the image aspect ratio will be the same as the input image.\n", + "\n", + " Returns:\n", + " A numpy array with shape [out_height, out_width, channel] representing the\n", + " image overlaid with keypoint predictions.\n", + " \"\"\"\n", + " height, width, channel = image.shape\n", + " aspect_ratio = float(width) / height\n", + " fig, ax = plt.subplots(figsize=(12 * aspect_ratio, 12))\n", + " # To remove the huge white borders\n", + " fig.tight_layout(pad=0)\n", + " ax.margins(0)\n", + " ax.set_yticklabels([])\n", + " ax.set_xticklabels([])\n", + " plt.axis('off')\n", + "\n", + " im = ax.imshow(image)\n", + " line_segments = LineCollection([], linewidths=(4), linestyle='solid')\n", + " ax.add_collection(line_segments)\n", + " # Turn off tick labels\n", + " scat = ax.scatter([], [], s=60, color='#FF1493', zorder=3)\n", + "\n", + " (keypoint_locs, keypoint_edges,\n", + " edge_colors) = _keypoints_and_edges_for_display(\n", + " keypoints_with_scores, height, width)\n", + "\n", + " line_segments.set_segments(keypoint_edges)\n", + " line_segments.set_color(edge_colors)\n", + " if keypoint_edges.shape[0]:\n", + " line_segments.set_segments(keypoint_edges)\n", + " line_segments.set_color(edge_colors)\n", + " if keypoint_locs.shape[0]:\n", + " scat.set_offsets(keypoint_locs)\n", + "\n", + " if crop_region is not None:\n", + " xmin = max(crop_region['x_min'] * width, 0.0)\n", + " ymin = max(crop_region['y_min'] * height, 0.0)\n", + " rec_width = min(crop_region['x_max'], 0.99) * width - xmin\n", + " rec_height = min(crop_region['y_max'], 0.99) * height - ymin\n", + " rect = patches.Rectangle(\n", + " (xmin,ymin),rec_width,rec_height,\n", + " linewidth=1,edgecolor='b',facecolor='none')\n", + " ax.add_patch(rect)\n", + "\n", + " fig.canvas.draw()\n", + " image_from_plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)\n", + " image_from_plot = image_from_plot.reshape(\n", + " fig.canvas.get_width_height()[::-1] + (3,))\n", + " plt.close(fig)\n", + " if output_image_height is not None:\n", + " output_image_width = int(output_image_height / height * width)\n", + " image_from_plot = cv2.resize(\n", + " image_from_plot, dsize=(output_image_width, output_image_height),\n", + " interpolation=cv2.INTER_CUBIC)\n", + " return image_from_plot\n", + "\n", + "def to_gif(images, duration):\n", + " \"\"\"Converts image sequence (4D numpy array) to gif.\"\"\"\n", + " imageio.mimsave('./animation.gif', images, duration=duration)\n", + " return embed.embed_file('./animation.gif')\n", + "\n", + "def progress(value, max=100):\n", + " return HTML(\"\"\"\n", + " \n", + " {value}\n", + " \n", + " \"\"\".format(value=value, max=max))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UvrN0iQiOxhR" + }, + "source": [ + "## Load Model from TF hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zeGHgANcT7a1" + }, + "outputs": [], + "source": [ + "model_name = \"movenet_lightning\" #@param [\"movenet_lightning\", \"movenet_thunder\", \"movenet_lightning_f16.tflite\", \"movenet_thunder_f16.tflite\", \"movenet_lightning_int8.tflite\", \"movenet_thunder_int8.tflite\"]\n", + "\n", + "if \"tflite\" in model_name:\n", + " if \"movenet_lightning_f16\" in model_name:\n", + " !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/float16/4?lite-format=tflite\n", + " input_size = 192\n", + " elif \"movenet_thunder_f16\" in model_name:\n", + " !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/float16/4?lite-format=tflite\n", + " input_size = 256\n", + " elif \"movenet_lightning_int8\" in model_name:\n", + " !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/lightning/tflite/int8/4?lite-format=tflite\n", + " input_size = 192\n", + " elif \"movenet_thunder_int8\" in model_name:\n", + " !wget -q -O model.tflite https://tfhub.dev/google/lite-model/movenet/singlepose/thunder/tflite/int8/4?lite-format=tflite\n", + " input_size = 256\n", + " else:\n", + " raise ValueError(\"Unsupported model name: %s\" % model_name)\n", + "\n", + " # Initialize the TFLite interpreter\n", + " interpreter = tf.lite.Interpreter(model_path=\"model.tflite\")\n", + " interpreter.allocate_tensors()\n", + "\n", + " def movenet(input_image):\n", + " \"\"\"Runs detection on an input image.\n", + "\n", + " Args:\n", + " input_image: A [1, height, width, 3] tensor represents the input image\n", + " pixels. Note that the height/width should already be resized and match the\n", + " expected input resolution of the model before passing into this function.\n", + "\n", + " Returns:\n", + " A [1, 1, 17, 3] float numpy array representing the predicted keypoint\n", + " coordinates and scores.\n", + " \"\"\"\n", + " # TF Lite format expects tensor type of uint8.\n", + " input_image = tf.cast(input_image, dtype=tf.uint8)\n", + " input_details = interpreter.get_input_details()\n", + " output_details = interpreter.get_output_details()\n", + " interpreter.set_tensor(input_details[0]['index'], input_image.numpy())\n", + " # Invoke inference.\n", + " interpreter.invoke()\n", + " # Get the model prediction.\n", + " keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])\n", + " return keypoints_with_scores\n", + "\n", + "else:\n", + " if \"movenet_lightning\" in model_name:\n", + " module = hub.load(\"https://tfhub.dev/google/movenet/singlepose/lightning/4/")\n", + " input_size = 192\n", + " elif \"movenet_thunder\" in model_name:\n", + " module = hub.load(\"https://tfhub.dev/google/movenet/singlepose/thunder/4/")\n", + " input_size = 256\n", + " else:\n", + " raise ValueError(\"Unsupported model name: %s\" % model_name)\n", + "\n", + " def movenet(input_image):\n", + " \"\"\"Runs detection on an input image.\n", + "\n", + " Args:\n", + " input_image: A [1, height, width, 3] tensor represents the input image\n", + " pixels. Note that the height/width should already be resized and match the\n", + " expected input resolution of the model before passing into this function.\n", + "\n", + " Returns:\n", + " A [1, 1, 17, 3] float numpy array representing the predicted keypoint\n", + " coordinates and scores.\n", + " \"\"\"\n", + " model = module.signatures['serving_default']\n", + "\n", + " # SavedModel format expects tensor type of int32.\n", + " input_image = tf.cast(input_image, dtype=tf.int32)\n", + " # Run model inference.\n", + " outputs = model(input_image)\n", + " # Output is a [1, 1, 17, 3] tensor.\n", + " keypoints_with_scores = outputs['output_0'].numpy()\n", + " return keypoints_with_scores" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-h1qHYaqD9ap" + }, + "source": [ + "## Single Image Example" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ymTVR2I9x22I" + }, + "source": [ + "This session demonstrates the minimum working example of running the model on a **single image** to predict the 17 human keypoints." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5I3xBq80E3N_" + }, + "source": [ + "### Load Input Image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GMO4B-wx5psP" + }, + "outputs": [], + "source": [ + "!curl -o input_image.jpeg https://images.pexels.com/photos/4384679/pexels-photo-4384679.jpeg --silent" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lJZYQ8KYFQ6x" + }, + "outputs": [], + "source": [ + "# Load the input image.\n", + "image_path = 'input_image.jpeg'\n", + "image = tf.io.read_file(image_path)\n", + "image = tf.image.decode_jpeg(image)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "S_UWRdQxE6WN" + }, + "source": [ + "### Run Inference" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VHmTwACwFW-v" + }, + "outputs": [], + "source": [ + "# Resize and pad the image to keep the aspect ratio and fit the expected size.\n", + "input_image = tf.expand_dims(image, axis=0)\n", + "input_image = tf.image.resize_with_pad(input_image, input_size, input_size)\n", + "\n", + "# Run model inference.\n", + "keypoints_with_scores = movenet(input_image)\n", + "\n", + "# Visualize the predictions with image.\n", + "display_image = tf.expand_dims(image, axis=0)\n", + "display_image = tf.cast(tf.image.resize_with_pad(\n", + " display_image, 1280, 1280), dtype=tf.int32)\n", + "output_overlay = draw_prediction_on_image(\n", + " np.squeeze(display_image.numpy(), axis=0), keypoints_with_scores)\n", + "\n", + "plt.figure(figsize=(5, 5))\n", + "plt.imshow(output_overlay)\n", + "_ = plt.axis('off')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rKm-B0eMYeg8" + }, + "source": [ + "## Video (Image Sequence) Example" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gdPFXabLyiKv" + }, + "source": [ + "This section demonstrates how to apply intelligent cropping based on detections from the previous frame when the input is a sequence of frames. This allows the model to devote its attention and resources to the main subject, resulting in much better prediction quality without sacrificing the speed.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "SYFdK-JHYhrv" + }, + "outputs": [], + "source": [ + "#@title Cropping Algorithm\n", + "\n", + "# Confidence score to determine whether a keypoint prediction is reliable.\n", + "MIN_CROP_KEYPOINT_SCORE = 0.2\n", + "\n", + "def init_crop_region(image_height, image_width):\n", + " \"\"\"Defines the default crop region.\n", + "\n", + " The function provides the initial crop region (pads the full image from both\n", + " sides to make it a square image) when the algorithm cannot reliably determine\n", + " the crop region from the previous frame.\n", + " \"\"\"\n", + " if image_width > image_height:\n", + " box_height = image_width / image_height\n", + " box_width = 1.0\n", + " y_min = (image_height / 2 - image_width / 2) / image_height\n", + " x_min = 0.0\n", + " else:\n", + " box_height = 1.0\n", + " box_width = image_height / image_width\n", + " y_min = 0.0\n", + " x_min = (image_width / 2 - image_height / 2) / image_width\n", + "\n", + " return {\n", + " 'y_min': y_min,\n", + " 'x_min': x_min,\n", + " 'y_max': y_min + box_height,\n", + " 'x_max': x_min + box_width,\n", + " 'height': box_height,\n", + " 'width': box_width\n", + " }\n", + "\n", + "def torso_visible(keypoints):\n", + " \"\"\"Checks whether there are enough torso keypoints.\n", + "\n", + " This function checks whether the model is confident at predicting one of the\n", + " shoulders/hips which is required to determine a good crop region.\n", + " \"\"\"\n", + " return ((keypoints[0, 0, KEYPOINT_DICT['left_hip'], 2] >\n", + " MIN_CROP_KEYPOINT_SCORE or\n", + " keypoints[0, 0, KEYPOINT_DICT['right_hip'], 2] >\n", + " MIN_CROP_KEYPOINT_SCORE) and\n", + " (keypoints[0, 0, KEYPOINT_DICT['left_shoulder'], 2] >\n", + " MIN_CROP_KEYPOINT_SCORE or\n", + " keypoints[0, 0, KEYPOINT_DICT['right_shoulder'], 2] >\n", + " MIN_CROP_KEYPOINT_SCORE))\n", + "\n", + "def determine_torso_and_body_range(\n", + " keypoints, target_keypoints, center_y, center_x):\n", + " \"\"\"Calculates the maximum distance from each keypoints to the center location.\n", + "\n", + " The function returns the maximum distances from the two sets of keypoints:\n", + " full 17 keypoints and 4 torso keypoints. The returned information will be\n", + " used to determine the crop size. See determineCropRegion for more detail.\n", + " \"\"\"\n", + " torso_joints = ['left_shoulder', 'right_shoulder', 'left_hip', 'right_hip']\n", + " max_torso_yrange = 0.0\n", + " max_torso_xrange = 0.0\n", + " for joint in torso_joints:\n", + " dist_y = abs(center_y - target_keypoints[joint][0])\n", + " dist_x = abs(center_x - target_keypoints[joint][1])\n", + " if dist_y > max_torso_yrange:\n", + " max_torso_yrange = dist_y\n", + " if dist_x > max_torso_xrange:\n", + " max_torso_xrange = dist_x\n", + "\n", + " max_body_yrange = 0.0\n", + " max_body_xrange = 0.0\n", + " for joint in KEYPOINT_DICT.keys():\n", + " if keypoints[0, 0, KEYPOINT_DICT[joint], 2] < MIN_CROP_KEYPOINT_SCORE:\n", + " continue\n", + " dist_y = abs(center_y - target_keypoints[joint][0]);\n", + " dist_x = abs(center_x - target_keypoints[joint][1]);\n", + " if dist_y > max_body_yrange:\n", + " max_body_yrange = dist_y\n", + "\n", + " if dist_x > max_body_xrange:\n", + " max_body_xrange = dist_x\n", + "\n", + " return [max_torso_yrange, max_torso_xrange, max_body_yrange, max_body_xrange]\n", + "\n", + "def determine_crop_region(\n", + " keypoints, image_height,\n", + " image_width):\n", + " \"\"\"Determines the region to crop the image for the model to run inference on.\n", + "\n", + " The algorithm uses the detected joints from the previous frame to estimate\n", + " the square region that encloses the full body of the target person and\n", + " centers at the midpoint of two hip joints. The crop size is determined by\n", + " the distances between each joints and the center point.\n", + " When the model is not confident with the four torso joint predictions, the\n", + " function returns a default crop which is the full image padded to square.\n", + " \"\"\"\n", + " target_keypoints = {}\n", + " for joint in KEYPOINT_DICT.keys():\n", + " target_keypoints[joint] = [\n", + " keypoints[0, 0, KEYPOINT_DICT[joint], 0] * image_height,\n", + " keypoints[0, 0, KEYPOINT_DICT[joint], 1] * image_width\n", + " ]\n", + "\n", + " if torso_visible(keypoints):\n", + " center_y = (target_keypoints['left_hip'][0] +\n", + " target_keypoints['right_hip'][0]) / 2;\n", + " center_x = (target_keypoints['left_hip'][1] +\n", + " target_keypoints['right_hip'][1]) / 2;\n", + "\n", + " (max_torso_yrange, max_torso_xrange,\n", + " max_body_yrange, max_body_xrange) = determine_torso_and_body_range(\n", + " keypoints, target_keypoints, center_y, center_x)\n", + "\n", + " crop_length_half = np.amax(\n", + " [max_torso_xrange * 1.9, max_torso_yrange * 1.9,\n", + " max_body_yrange * 1.2, max_body_xrange * 1.2])\n", + "\n", + " tmp = np.array(\n", + " [center_x, image_width - center_x, center_y, image_height - center_y])\n", + " crop_length_half = np.amin(\n", + " [crop_length_half, np.amax(tmp)]);\n", + "\n", + " crop_corner = [center_y - crop_length_half, center_x - crop_length_half];\n", + "\n", + " if crop_length_half > max(image_width, image_height) / 2:\n", + " return init_crop_region(image_height, image_width)\n", + " else:\n", + " crop_length = crop_length_half * 2;\n", + " return {\n", + " 'y_min': crop_corner[0] / image_height,\n", + " 'x_min': crop_corner[1] / image_width,\n", + " 'y_max': (crop_corner[0] + crop_length) / image_height,\n", + " 'x_max': (crop_corner[1] + crop_length) / image_width,\n", + " 'height': (crop_corner[0] + crop_length) / image_height -\n", + " crop_corner[0] / image_height,\n", + " 'width': (crop_corner[1] + crop_length) / image_width -\n", + " crop_corner[1] / image_width\n", + " }\n", + " else:\n", + " return init_crop_region(image_height, image_width)\n", + "\n", + "def crop_and_resize(image, crop_region, crop_size):\n", + " \"\"\"Crops and resize the image to prepare for the model input.\"\"\"\n", + " boxes=[[crop_region['y_min'], crop_region['x_min'],\n", + " crop_region['y_max'], crop_region['x_max']]]\n", + " output_image = tf.image.crop_and_resize(\n", + " image, box_indices=[0], boxes=boxes, crop_size=crop_size)\n", + " return output_image\n", + "\n", + "def run_inference(movenet, image, crop_region, crop_size):\n", + " \"\"\"Runs model inference on the cropped region.\n", + "\n", + " The function runs the model inference on the cropped region and updates the\n", + " model output to the original image coordinate system.\n", + " \"\"\"\n", + " image_height, image_width, _ = image.shape\n", + " input_image = crop_and_resize(\n", + " tf.expand_dims(image, axis=0), crop_region, crop_size=crop_size)\n", + " # Run model inference.\n", + " keypoints_with_scores = movenet(input_image)\n", + " # Update the coordinates.\n", + " for idx in range(17):\n", + " keypoints_with_scores[0, 0, idx, 0] = (\n", + " crop_region['y_min'] * image_height +\n", + " crop_region['height'] * image_height *\n", + " keypoints_with_scores[0, 0, idx, 0]) / image_height\n", + " keypoints_with_scores[0, 0, idx, 1] = (\n", + " crop_region['x_min'] * image_width +\n", + " crop_region['width'] * image_width *\n", + " keypoints_with_scores[0, 0, idx, 1]) / image_width\n", + " return keypoints_with_scores" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L2JmA1xAEntQ" + }, + "source": [ + "### Load Input Image Sequence" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CzJxbxDckWl2" + }, + "outputs": [], + "source": [ + "!wget -q -O dance.gif https://github.com/tensorflow/tfjs-models/raw/master/pose-detection/assets/dance_input.gif" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IxbMFZJUkd6W" + }, + "outputs": [], + "source": [ + "# Load the input image.\n", + "image_path = 'dance.gif'\n", + "image = tf.io.read_file(image_path)\n", + "image = tf.image.decode_gif(image)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CJKeQ4siEtU9" + }, + "source": [ + "### Run Inference with Cropping Algorithm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9B57XS0NZPIy" + }, + "outputs": [], + "source": [ + "# Load the input image.\n", + "num_frames, image_height, image_width, _ = image.shape\n", + "crop_region = init_crop_region(image_height, image_width)\n", + "\n", + "output_images = []\n", + "bar = display(progress(0, num_frames-1), display_id=True)\n", + "for frame_idx in range(num_frames):\n", + " keypoints_with_scores = run_inference(\n", + " movenet, image[frame_idx, :, :, :], crop_region,\n", + " crop_size=[input_size, input_size])\n", + " output_images.append(draw_prediction_on_image(\n", + " image[frame_idx, :, :, :].numpy().astype(np.int32),\n", + " keypoints_with_scores, crop_region=None,\n", + " close_figure=True, output_image_height=300))\n", + " crop_region = determine_crop_region(\n", + " keypoints_with_scores, image_height, image_width)\n", + " bar.update(progress(frame_idx, num_frames-1))\n", + "\n", + "# Prepare gif visualization.\n", + "output = np.stack(output_images, axis=0)\n", + "to_gif(output, duration=100)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "9u_VGR6_BmbZ", + "5I3xBq80E3N_", + "L2JmA1xAEntQ" + ], + "name": "movenet.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/movinet.ipynb b/site/en/hub/tutorials/movinet.ipynb new file mode 100644 index 00000000000..24600256cf9 --- /dev/null +++ b/site/en/hub/tutorials/movinet.ipynb @@ -0,0 +1,1047 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "toCy3v03Dwx7" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QKe-ubNcDvgv" + }, + "outputs": [], + "source": [ + "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qFdPvlXBOdUN" + }, + "source": [ + "# MoViNet for streaming action recognition " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-vxk2Kbc_KSP" + }, + "source": [ + "This tutorial demonstrates how to use a pretrained video classification model to classify an activity (such as dancing, swimming, biking etc) in the given video. \n", + "\n", + "The model architecture used in this tutorial is called [MoViNet](https://arxiv.org/pdf/2103.11511.pdf) (Mobile Video Networks). MoVieNets are a family of efficient video classification models trained on huge dataset ([Kinetics 600](https://deepmind.com/research/open-source/kinetics)).\n", + "\n", + "In contrast to the [i3d models](https://tfhub.dev/s?q=i3d-kinetics) available on TF Hub, MoViNets also support frame-by-frame inference on streaming video. \n", + "\n", + "The pretrained models are available from [TF Hub](https://tfhub.dev/google/collections/movinet/1). The TF Hub collection also includes quantized models optimized for [TFLite](https://tensorflow.org/lite).\n", + "\n", + "The source for these models is available in the [TensorFlow Model Garden](https://github.com/tensorflow/models/tree/master/official/projects/movinet). This includes a [longer version of this tutorial](https://colab.sandbox.google.com/github/tensorflow/models/blob/master/official/projects/movinet/movinet_tutorial.ipynb) that also covers building and fine-tuning a MoViNet model. \n", + "\n", + "This MoViNet tutorial is part of a series of TensorFlow video tutorials. Here are the other three tutorials:\n", + "\n", + "- [Load video data](https://www.tensorflow.org/tutorials/load_data/video): This tutorial explains how to load and preprocess video data into a TensorFlow dataset pipeline from scratch.\n", + "- [Build a 3D CNN model for video classification](https://www.tensorflow.org/tutorials/video/video_classification). Note that this tutorial uses a (2+1)D CNN that decomposes the spatial and temporal aspects of 3D data; if you are using volumetric data such as an MRI scan, consider using a 3D CNN instead of a (2+1)D CNN.\n", + "- [Transfer learning for video classification with MoViNet](https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet): This tutorial explains how to use a pre-trained video classification model trained on a different dataset with the UCF-101 dataset.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3E96e1UKQ8uR" + }, + "source": [ + "![jumping jacks plot](https://storage.googleapis.com/tf_model_garden/vision/movinet/artifacts/jumpingjacks_plot.gif)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8_oLnvJy7kz5" + }, + "source": [ + "## Setup\n", + "\n", + "For inference on smaller models (A0-A2), CPU is sufficient for this Colab." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GUgUMGmY1yq-" + }, + "outputs": [], + "source": [ + "!sudo apt install -y ffmpeg\n", + "!pip install -q mediapy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "s3khsunT7kWa" + }, + "outputs": [], + "source": [ + "!pip uninstall -q -y opencv-python-headless\n", + "!pip install -q \"opencv-python-headless<4.3\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dI_1csl6Q-gH" + }, + "outputs": [], + "source": [ + "# Import libraries\n", + "import pathlib\n", + "\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "import mediapy as media\n", + "import numpy as np\n", + "import PIL\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import tqdm\n", + "\n", + "mpl.rcParams.update({\n", + " 'font.size': 10,\n", + "})" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pn8K9oWbmREi" + }, + "source": [ + "Get the kinetics 600 label list, and print the first few labels:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2VJUAcjhkfb3" + }, + "outputs": [], + "source": [ + "labels_path = tf.keras.utils.get_file(\n", + " fname='labels.txt',\n", + " origin='https://raw.githubusercontent.com/tensorflow/models/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/kinetics_600_labels.txt'\n", + ")\n", + "labels_path = pathlib.Path(labels_path)\n", + "\n", + "lines = labels_path.read_text().splitlines()\n", + "KINETICS_600_LABELS = np.array([line.strip() for line in lines])\n", + "KINETICS_600_LABELS[:20]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G9BU5XsOmaq3" + }, + "source": [ + "To provide a simple example video for classification, we can load a short gif of jumping jacks being performed.\n", + "\n", + "![jumping jacks](https://github.com/tensorflow/models/raw/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/jumpingjack.gif)\n", + "\n", + "Attribution: Footage shared by [Coach Bobby Bluford](https://www.youtube.com/watch?v=-AxHpj-EuPg) on YouTube under the CC-BY license." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8aFKMbr4mfSg" + }, + "source": [ + "Download the gif." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "w62jqXhaSb15" + }, + "outputs": [], + "source": [ + "jumpingjack_url = 'https://github.com/tensorflow/models/raw/f8af2291cced43fc9f1d9b41ddbf772ae7b0d7d2/official/projects/movinet/files/jumpingjack.gif'\n", + "jumpingjack_path = tf.keras.utils.get_file(\n", + " fname='jumpingjack.gif',\n", + " origin=jumpingjack_url,\n", + " cache_dir='.', cache_subdir='.',\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hdRS_22PebfB" + }, + "source": [ + "Define a function to read a gif file into a `tf.Tensor`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mPhmCu6oSi5f" + }, + "outputs": [], + "source": [ + "#@title\n", + "# Read and process a video\n", + "def load_gif(file_path, image_size=(224, 224)):\n", + " \"\"\"Loads a gif file into a TF tensor.\n", + "\n", + " Use images resized to match what's expected by your model.\n", + " The model pages say the \"A2\" models expect 224 x 224 images at 5 fps\n", + "\n", + " Args:\n", + " file_path: path to the location of a gif file.\n", + " image_size: a tuple of target size.\n", + "\n", + " Returns:\n", + " a video of the gif file\n", + " \"\"\"\n", + " # Load a gif file, convert it to a TF tensor\n", + " raw = tf.io.read_file(file_path)\n", + " video = tf.io.decode_gif(raw)\n", + " # Resize the video\n", + " video = tf.image.resize(video, image_size)\n", + " # change dtype to a float32\n", + " # Hub models always want images normalized to [0,1]\n", + " # ref: https://www.tensorflow.org/hub/common_signatures/images#input\n", + " video = tf.cast(video, tf.float32) / 255.\n", + " return video" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Xx7cZm8vpDJm" + }, + "source": [ + "The video's shape is `(frames, height, width, colors)`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "E7k_PmbFSkHv" + }, + "outputs": [], + "source": [ + "jumpingjack=load_gif(jumpingjack_path)\n", + "jumpingjack.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LcKFy3oedBvF" + }, + "source": [ + "## How to use the model\n", + "\n", + "This section contains a walkthrough showing how to use the [models from TensorFlow Hub](https://tfhub.dev/google/collections/movinet/1). If you just want to see the models in action, skip to the next section.\n", + "\n", + "There are two versions of each model: `base` and `streaming`.\n", + "\n", + "* The `base` version takes a video as input, and returns the probabilities averaged over the frames.\n", + "* The `streaming` version takes a video frame and an RNN state as input, and returns the predictions for that frame, and the new RNN state. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WQO6Zb8Hm-9q" + }, + "source": [ + "### The base model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RfnYU20JnPqp" + }, + "source": [ + "Download the [pretrained model from TensorFlow Hub](https://tfhub.dev/tensorflow/movinet/a2/base/kinetics-600/classification/3). " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FnpPo6HSR7qv" + }, + "outputs": [], + "source": [ + "%%time\n", + "id = 'a2'\n", + "mode = 'base'\n", + "version = '3'\n", + "hub_url = f'https://tfhub.dev/tensorflow/movinet/%7Bid%7D/%7Bmode%7D/kinetics-600/classification/%7Bversion%7D'\n", + "model = hub.load(hub_url)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jvaFwKhxndmb" + }, + "source": [ + "This version of the model has one `signature`. It takes an `image` argument which is a `tf.float32` with shape `(batch, frames, height, width, colors)`. It returns a dictionary containing one output: A `tf.float32` tensor of logits with shape `(batch, classes)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7GzZ4Y03T_gH" + }, + "outputs": [], + "source": [ + "sig = model.signatures['serving_default']\n", + "print(sig.pretty_printed_signature())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M4Xny1ANomi4" + }, + "source": [ + "To run this signature on the video you need to add the outer `batch` dimension to the video first." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LBOFEDG1XvZE" + }, + "outputs": [], + "source": [ + "#warmup\n", + "sig(image = jumpingjack[tf.newaxis, :1]);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jCeW3KycVbGn" + }, + "outputs": [], + "source": [ + "%%time\n", + "logits = sig(image = jumpingjack[tf.newaxis, ...])\n", + "logits = logits['classifier_head'][0]\n", + "\n", + "print(logits.shape)\n", + "print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AE8doqkPpxED" + }, + "source": [ + "Define a `get_top_k` function that packages the above output processing for later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OozPNO6LvZ00" + }, + "outputs": [], + "source": [ + "#@title\n", + "# Get top_k labels and probabilities\n", + "def get_top_k(probs, k=5, label_map=KINETICS_600_LABELS):\n", + " \"\"\"Outputs the top k model labels and probabilities on the given video.\n", + "\n", + " Args:\n", + " probs: probability tensor of shape (num_frames, num_classes) that represents\n", + " the probability of each class on each frame.\n", + " k: the number of top predictions to select.\n", + " label_map: a list of labels to map logit indices to label strings.\n", + "\n", + " Returns:\n", + " a tuple of the top-k labels and probabilities.\n", + " \"\"\"\n", + " # Sort predictions to find top_k\n", + " top_predictions = tf.argsort(probs, axis=-1, direction='DESCENDING')[:k]\n", + " # collect the labels of top_k predictions\n", + " top_labels = tf.gather(label_map, top_predictions, axis=-1)\n", + " # decode lablels\n", + " top_labels = [label.decode('utf8') for label in top_labels.numpy()]\n", + " # top_k probabilities of the predictions\n", + " top_probs = tf.gather(probs, top_predictions, axis=-1).numpy()\n", + " return tuple(zip(top_labels, top_probs))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kTfKMT29pP_Z" + }, + "source": [ + "Convert the `logits` to probabilities, and look up the top 5 classes for the video. The model confirms that the video is probably of `jumping jacks`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z-SrNGsGV5Mt" + }, + "outputs": [], + "source": [ + "probs = tf.nn.softmax(logits, axis=-1)\n", + "for label, p in get_top_k(probs):\n", + " print(f'{label:20s}: {p:.3f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ltdijoQpqjxZ" + }, + "source": [ + "### The streaming model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9dqdUPQXq45b" + }, + "source": [ + "The previous section used a model that runs over a whole video. Often when processing a video you don't want a single prediction at the end, you want to update predictions frame by frame. The `stream` versions of the model allow you to do this.\n", + "\n", + "Load the `stream` version of the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mxt0hRXFZkAM" + }, + "outputs": [], + "source": [ + "%%time\n", + "id = 'a2'\n", + "mode = 'stream'\n", + "version = '3'\n", + "hub_url = f'https://tfhub.dev/tensorflow/movinet/%7Bid%7D/%7Bmode%7D/kinetics-600/classification/%7Bversion%7D'\n", + "model = hub.load(hub_url)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pDswtsGgsYGS" + }, + "source": [ + "Using this model is slightly more complex than the `base` model. You have to keep track of the internal state of the model's RNNs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0fM_Vb1VsbDm" + }, + "outputs": [], + "source": [ + "list(model.signatures.keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ojr1_iYCtPvp" + }, + "source": [ + "The `init_states` signature takes the video's **shape** `(batch, frames, height, width, colors)` as input, and returns a large dictionary of tensors containing the initial RNN states: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "67loYFGpo_RP" + }, + "outputs": [], + "source": [ + "lines = model.signatures['init_states'].pretty_printed_signature().splitlines()\n", + "lines = lines[:10]\n", + "lines.append(' ...')\n", + "print('.\\n'.join(lines))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "v5lG3vejn5df" + }, + "outputs": [], + "source": [ + "initial_state = model.init_states(jumpingjack[tf.newaxis, ...].shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "J3DwmyHnuhH_" + }, + "outputs": [], + "source": [ + "type(initial_state)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K8SyiEU6tB-e" + }, + "outputs": [], + "source": [ + "list(sorted(initial_state.keys()))[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xeMCzJMBvwRF" + }, + "source": [ + "Once you have the initial state for the RNNs, you can pass the state and a video frame as input (keeping the `(batch, frames, height, width, colors)` shape for the video frame). The model returns a `(logits, state)` pair. \n", + "\n", + "After just seeing the first frame, the model is not convinced that the video is of \"jumping jacks\":" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "McSLdIgtsI3d" + }, + "outputs": [], + "source": [ + "inputs = initial_state.copy()\n", + "\n", + "# Add the batch axis, take the first frme, but keep the frame-axis.\n", + "inputs['image'] = jumpingjack[tf.newaxis, 0:1, ...] " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WlH7PqLPX664" + }, + "outputs": [], + "source": [ + "# warmup\n", + "model(inputs);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7uzNXtu7X5sr" + }, + "outputs": [], + "source": [ + "logits, new_state = model(inputs)\n", + "logits = logits[0]\n", + "probs = tf.nn.softmax(logits, axis=-1)\n", + "\n", + "for label, p in get_top_k(probs):\n", + " print(f'{label:20s}: {p:.3f}')\n", + "\n", + "print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oLU644FQwXSb" + }, + "source": [ + "If you run the model in a loop, passing the updated state with each frame, the model quickly converges to the correct result:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Fzm7T4ImmIEg" + }, + "outputs": [], + "source": [ + "%%time\n", + "state = initial_state.copy()\n", + "all_logits = []\n", + "\n", + "for n in range(len(jumpingjack)):\n", + " inputs = state\n", + " inputs['image'] = jumpingjack[tf.newaxis, n:n+1, ...]\n", + " result, state = model(inputs)\n", + " all_logits.append(logits)\n", + "\n", + "probabilities = tf.nn.softmax(all_logits, axis=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "B7UtHoSWcOT2" + }, + "outputs": [], + "source": [ + "for label, p in get_top_k(probabilities[-1]):\n", + " print(f'{label:20s}: {p:.3f}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6ffV3NhZcsrv" + }, + "outputs": [], + "source": [ + "id = tf.argmax(probabilities[-1])\n", + "plt.plot(probabilities[:, id])\n", + "plt.xlabel('Frame #')\n", + "plt.ylabel(f\"p('{KINETICS_600_LABELS[id]}')\");" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d7MZ_AfRW845" + }, + "source": [ + "You may notice that the final probability is much more certain than in the previous section where you ran the `base` model. The `base` model returns an average of the predictions over the frames." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0Wij4tsyW8dR" + }, + "outputs": [], + "source": [ + "for label, p in get_top_k(tf.reduce_mean(probabilities, axis=0)):\n", + " print(f'{label:20s}: {p:.3f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qLUoC9ejggGo" + }, + "source": [ + "## Animate the predictions over time\n", + "\n", + "The previous section went into some details about how to use these models. This section builds on top of that to produce some nice inference animations. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OnFqOXazoWgy" + }, + "source": [ + "The hidden cell below to defines helper functions used in this section." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "dx55NK3ZoZeh" + }, + "outputs": [], + "source": [ + "#@title\n", + "# Get top_k labels and probabilities predicted using MoViNets streaming model\n", + "def get_top_k_streaming_labels(probs, k=5, label_map=KINETICS_600_LABELS):\n", + " \"\"\"Returns the top-k labels over an entire video sequence.\n", + "\n", + " Args:\n", + " probs: probability tensor of shape (num_frames, num_classes) that represents\n", + " the probability of each class on each frame.\n", + " k: the number of top predictions to select.\n", + " label_map: a list of labels to map logit indices to label strings.\n", + "\n", + " Returns:\n", + " a tuple of the top-k probabilities, labels, and logit indices\n", + " \"\"\"\n", + " top_categories_last = tf.argsort(probs, -1, 'DESCENDING')[-1, :1]\n", + " # Sort predictions to find top_k\n", + " categories = tf.argsort(probs, -1, 'DESCENDING')[:, :k]\n", + " categories = tf.reshape(categories, [-1])\n", + "\n", + " counts = sorted([\n", + " (i.numpy(), tf.reduce_sum(tf.cast(categories == i, tf.int32)).numpy())\n", + " for i in tf.unique(categories)[0]\n", + " ], key=lambda x: x[1], reverse=True)\n", + "\n", + " top_probs_idx = tf.constant([i for i, _ in counts[:k]])\n", + " top_probs_idx = tf.concat([top_categories_last, top_probs_idx], 0)\n", + " # find unique indices of categories\n", + " top_probs_idx = tf.unique(top_probs_idx)[0][:k+1]\n", + " # top_k probabilities of the predictions\n", + " top_probs = tf.gather(probs, top_probs_idx, axis=-1)\n", + " top_probs = tf.transpose(top_probs, perm=(1, 0))\n", + " # collect the labels of top_k predictions\n", + " top_labels = tf.gather(label_map, top_probs_idx, axis=0)\n", + " # decode the top_k labels\n", + " top_labels = [label.decode('utf8') for label in top_labels.numpy()]\n", + "\n", + " return top_probs, top_labels, top_probs_idx\n", + "\n", + "# Plot top_k predictions at a given time step\n", + "def plot_streaming_top_preds_at_step(\n", + " top_probs,\n", + " top_labels,\n", + " step=None,\n", + " image=None,\n", + " legend_loc='lower left',\n", + " duration_seconds=10,\n", + " figure_height=500,\n", + " playhead_scale=0.8,\n", + " grid_alpha=0.3):\n", + " \"\"\"Generates a plot of the top video model predictions at a given time step.\n", + "\n", + " Args:\n", + " top_probs: a tensor of shape (k, num_frames) representing the top-k\n", + " probabilities over all frames.\n", + " top_labels: a list of length k that represents the top-k label strings.\n", + " step: the current time step in the range [0, num_frames].\n", + " image: the image frame to display at the current time step.\n", + " legend_loc: the placement location of the legend.\n", + " duration_seconds: the total duration of the video.\n", + " figure_height: the output figure height.\n", + " playhead_scale: scale value for the playhead.\n", + " grid_alpha: alpha value for the gridlines.\n", + "\n", + " Returns:\n", + " A tuple of the output numpy image, figure, and axes.\n", + " \"\"\"\n", + " # find number of top_k labels and frames in the video\n", + " num_labels, num_frames = top_probs.shape\n", + " if step is None:\n", + " step = num_frames\n", + " # Visualize frames and top_k probabilities of streaming video\n", + " fig = plt.figure(figsize=(6.5, 7), dpi=300)\n", + " gs = mpl.gridspec.GridSpec(8, 1)\n", + " ax2 = plt.subplot(gs[:-3, :])\n", + " ax = plt.subplot(gs[-3:, :])\n", + " # display the frame\n", + " if image is not None:\n", + " ax2.imshow(image, interpolation='nearest')\n", + " ax2.axis('off')\n", + " # x-axis (frame number)\n", + " preview_line_x = tf.linspace(0., duration_seconds, num_frames)\n", + " # y-axis (top_k probabilities)\n", + " preview_line_y = top_probs\n", + "\n", + " line_x = preview_line_x[:step+1]\n", + " line_y = preview_line_y[:, :step+1]\n", + "\n", + " for i in range(num_labels):\n", + " ax.plot(preview_line_x, preview_line_y[i], label=None, linewidth='1.5',\n", + " linestyle=':', color='gray')\n", + " ax.plot(line_x, line_y[i], label=top_labels[i], linewidth='2.0')\n", + "\n", + "\n", + " ax.grid(which='major', linestyle=':', linewidth='1.0', alpha=grid_alpha)\n", + " ax.grid(which='minor', linestyle=':', linewidth='0.5', alpha=grid_alpha)\n", + "\n", + " min_height = tf.reduce_min(top_probs) * playhead_scale\n", + " max_height = tf.reduce_max(top_probs)\n", + " ax.vlines(preview_line_x[step], min_height, max_height, colors='red')\n", + " ax.scatter(preview_line_x[step], max_height, color='red')\n", + "\n", + " ax.legend(loc=legend_loc)\n", + "\n", + " plt.xlim(0, duration_seconds)\n", + " plt.ylabel('Probability')\n", + " plt.xlabel('Time (s)')\n", + " plt.yscale('log')\n", + "\n", + " fig.tight_layout()\n", + " fig.canvas.draw()\n", + "\n", + " data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)\n", + " data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))\n", + " plt.close()\n", + "\n", + " figure_width = int(figure_height * data.shape[1] / data.shape[0])\n", + " image = PIL.Image.fromarray(data).resize([figure_width, figure_height])\n", + " image = np.array(image)\n", + "\n", + " return image\n", + "\n", + "# Plotting top_k predictions from MoViNets streaming model\n", + "def plot_streaming_top_preds(\n", + " probs,\n", + " video,\n", + " top_k=5,\n", + " video_fps=25.,\n", + " figure_height=500,\n", + " use_progbar=True):\n", + " \"\"\"Generates a video plot of the top video model predictions.\n", + "\n", + " Args:\n", + " probs: probability tensor of shape (num_frames, num_classes) that represents\n", + " the probability of each class on each frame.\n", + " video: the video to display in the plot.\n", + " top_k: the number of top predictions to select.\n", + " video_fps: the input video fps.\n", + " figure_fps: the output video fps.\n", + " figure_height: the height of the output video.\n", + " use_progbar: display a progress bar.\n", + "\n", + " Returns:\n", + " A numpy array representing the output video.\n", + " \"\"\"\n", + " # select number of frames per second\n", + " video_fps = 8.\n", + " # select height of the image\n", + " figure_height = 500\n", + " # number of time steps of the given video\n", + " steps = video.shape[0]\n", + " # estimate duration of the video (in seconds)\n", + " duration = steps / video_fps\n", + " # estimate top_k probabilities and corresponding labels\n", + " top_probs, top_labels, _ = get_top_k_streaming_labels(probs, k=top_k)\n", + "\n", + " images = []\n", + " step_generator = tqdm.trange(steps) if use_progbar else range(steps)\n", + " for i in step_generator:\n", + " image = plot_streaming_top_preds_at_step(\n", + " top_probs=top_probs,\n", + " top_labels=top_labels,\n", + " step=i,\n", + " image=video[i],\n", + " duration_seconds=duration,\n", + " figure_height=figure_height,\n", + " )\n", + " images.append(image)\n", + "\n", + " return np.array(images)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eLgFBslcZOQO" + }, + "source": [ + "Start by running the streaming model across the frames of the video, and collecting the logits:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tXWR13wthnK5" + }, + "outputs": [], + "source": [ + "init_states = model.init_states(jumpingjack[tf.newaxis].shape)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YqSkt7l8ltwt" + }, + "outputs": [], + "source": [ + "# Insert your video clip here\n", + "video = jumpingjack\n", + "images = tf.split(video[tf.newaxis], video.shape[0], axis=1)\n", + "\n", + "all_logits = []\n", + "\n", + "# To run on a video, pass in one frame at a time\n", + "states = init_states\n", + "for image in tqdm.tqdm(images):\n", + " # predictions for each frame\n", + " logits, states = model({**states, 'image': image})\n", + " all_logits.append(logits)\n", + "\n", + "# concatenating all the logits\n", + "logits = tf.concat(all_logits, 0)\n", + "# estimating probabilities\n", + "probs = tf.nn.softmax(logits, axis=-1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OOGcCMMJyuPl" + }, + "outputs": [], + "source": [ + "final_probs = probs[-1]\n", + "print('Top_k predictions and their probablities\\n')\n", + "for label, p in get_top_k(final_probs):\n", + " print(f'{label:20s}: {p:.3f}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GaybT0rbZct-" + }, + "source": [ + "Convert the sequence of probabilities into a video:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Xdox556CtMRb" + }, + "outputs": [], + "source": [ + "# Generate a plot and output to a video tensor\n", + "plot_video = plot_streaming_top_preds(probs, video, video_fps=8.)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NSStKE9klCs3" + }, + "outputs": [], + "source": [ + "# For gif format, set codec='gif'\n", + "media.show_video(plot_video, fps=3)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LCImgZ3OdJw7" + }, + "source": [ + "## Resources\n", + "\n", + "The pretrained models are available from [TF Hub](https://tfhub.dev/google/collections/movinet/1). The TF Hub collection also includes quantized models optimized for [TFLite](https://tensorflow.org/lite).\n", + "\n", + "The source for these models is available in the [TensorFlow Model Garden](https://github.com/tensorflow/models/tree/master/official/projects/movinet). This includes a [longer version of this tutorial](https://colab.sandbox.google.com/github/tensorflow/models/blob/master/official/projects/movinet/movinet_tutorial.ipynb) that also covers building and fine-tuning a MoViNet model. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gh5lLAo-HpVF" + }, + "source": [ + "## Next Steps\n", + "\n", + "To learn more about working with video data in TensorFlow, check out the following tutorials:\n", + "\n", + "* [Load video data](https://www.tensorflow.org/tutorials/load_data/video)\n", + "* [Build a 3D CNN model for video classification](https://www.tensorflow.org/tutorials/video/video_classification)\n", + "* [Transfer learning for video classification with MoViNet](https://www.tensorflow.org/tutorials/video/transfer_learning_with_movinet)" + ] + } + ], + "metadata": { + "colab": { + "name": "movinet.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/object_detection.ipynb b/site/en/hub/tutorials/object_detection.ipynb new file mode 100644 index 00000000000..e1262f3084c --- /dev/null +++ b/site/en/hub/tutorials/object_detection.ipynb @@ -0,0 +1,442 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "N6ZDpd9XzFeN" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "KUu4vOt5zI9d" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CxmDMK4yupqg" + }, + "source": [ + "# Object Detection\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Sy553YSVmYiK" + }, + "source": [ + "This Colab demonstrates use of a TF-Hub module trained to perform object detection." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v4XGxDrCkeip" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "6cPY9Ou4sWs_" + }, + "outputs": [], + "source": [ + "#@title Imports and function definitions\n", + "\n", + "# For running inference on the TF-Hub module.\n", + "import tensorflow as tf\n", + "\n", + "import tensorflow_hub as hub\n", + "\n", + "# For downloading the image.\n", + "import matplotlib.pyplot as plt\n", + "import tempfile\n", + "from six.moves.urllib.request import urlopen\n", + "from six import BytesIO\n", + "\n", + "# For drawing onto the image.\n", + "import numpy as np\n", + "from PIL import Image\n", + "from PIL import ImageColor\n", + "from PIL import ImageDraw\n", + "from PIL import ImageFont\n", + "from PIL import ImageOps\n", + "\n", + "# For measuring the inference time.\n", + "import time\n", + "\n", + "# Print Tensorflow version\n", + "print(tf.__version__)\n", + "\n", + "# Check available GPU devices.\n", + "print(\"The following GPU devices are available: %s\" % tf.test.gpu_device_name())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZGkrXGy62409" + }, + "source": [ + "## Example use" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vlA3CftFpRiW" + }, + "source": [ + "### Helper functions for downloading images and for visualization.\n", + "\n", + "Visualization code adapted from [TF object detection API](https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py) for the simplest required functionality." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D9IwDpOtpIHW" + }, + "outputs": [], + "source": [ + "def display_image(image):\n", + " fig = plt.figure(figsize=(20, 15))\n", + " plt.grid(False)\n", + " plt.imshow(image)\n", + "\n", + "\n", + "def download_and_resize_image(url, new_width=256, new_height=256,\n", + " display=False):\n", + " _, filename = tempfile.mkstemp(suffix=\".jpg\")\n", + " response = urlopen(url)\n", + " image_data = response.read()\n", + " image_data = BytesIO(image_data)\n", + " pil_image = Image.open(image_data)\n", + " pil_image = ImageOps.fit(pil_image, (new_width, new_height), Image.LANCZOS)\n", + " pil_image_rgb = pil_image.convert(\"RGB\")\n", + " pil_image_rgb.save(filename, format=\"JPEG\", quality=90)\n", + " print(\"Image downloaded to %s.\" % filename)\n", + " if display:\n", + " display_image(pil_image)\n", + " return filename\n", + "\n", + "\n", + "def draw_bounding_box_on_image(image,\n", + " ymin,\n", + " xmin,\n", + " ymax,\n", + " xmax,\n", + " color,\n", + " font,\n", + " thickness=4,\n", + " display_str_list=()):\n", + " \"\"\"Adds a bounding box to an image.\"\"\"\n", + " draw = ImageDraw.Draw(image)\n", + " im_width, im_height = image.size\n", + " (left, right, top, bottom) = (xmin * im_width, xmax * im_width,\n", + " ymin * im_height, ymax * im_height)\n", + " draw.line([(left, top), (left, bottom), (right, bottom), (right, top),\n", + " (left, top)],\n", + " width=thickness,\n", + " fill=color)\n", + "\n", + " # If the total height of the display strings added to the top of the bounding\n", + " # box exceeds the top of the image, stack the strings below the bounding box\n", + " # instead of above.\n", + " display_str_heights = [font.getbbox(ds)[3] for ds in display_str_list]\n", + " # Each display_str has a top and bottom margin of 0.05x.\n", + " total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)\n", + "\n", + " if top > total_display_str_height:\n", + " text_bottom = top\n", + " else:\n", + " text_bottom = top + total_display_str_height\n", + " # Reverse list and print from bottom to top.\n", + " for display_str in display_str_list[::-1]:\n", + " bbox = font.getbbox(display_str)\n", + " text_width, text_height = bbox[2], bbox[3]\n", + " margin = np.ceil(0.05 * text_height)\n", + " draw.rectangle([(left, text_bottom - text_height - 2 * margin),\n", + " (left + text_width, text_bottom)],\n", + " fill=color)\n", + " draw.text((left + margin, text_bottom - text_height - margin),\n", + " display_str,\n", + " fill=\"black\",\n", + " font=font)\n", + " text_bottom -= text_height - 2 * margin\n", + "\n", + "\n", + "def draw_boxes(image, boxes, class_names, scores, max_boxes=10, min_score=0.1):\n", + " \"\"\"Overlay labeled boxes on an image with formatted scores and label names.\"\"\"\n", + " colors = list(ImageColor.colormap.values())\n", + "\n", + " try:\n", + " font = ImageFont.truetype(\"/usr/share/fonts/truetype/liberation/LiberationSansNarrow-Regular.ttf\",\n", + " 25)\n", + " except IOError:\n", + " print(\"Font not found, using default font.\")\n", + " font = ImageFont.load_default()\n", + "\n", + " for i in range(min(boxes.shape[0], max_boxes)):\n", + " if scores[i] >= min_score:\n", + " ymin, xmin, ymax, xmax = tuple(boxes[i])\n", + " display_str = \"{}: {}%\".format(class_names[i].decode(\"ascii\"),\n", + " int(100 * scores[i]))\n", + " color = colors[hash(class_names[i]) % len(colors)]\n", + " image_pil = Image.fromarray(np.uint8(image)).convert(\"RGB\")\n", + " draw_bounding_box_on_image(\n", + " image_pil,\n", + " ymin,\n", + " xmin,\n", + " ymax,\n", + " xmax,\n", + " color,\n", + " font,\n", + " display_str_list=[display_str])\n", + " np.copyto(image, np.array(image_pil))\n", + " return image" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "D19UCu9Q2-_8" + }, + "source": [ + "## Apply module\n", + "\n", + "Load a public image from Open Images v4, save locally, and display." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "YLWNhjUY1mhg" + }, + "outputs": [], + "source": [ + "# By Heiko Gorski, Source: https://commons.wikimedia.org/wiki/File:Naxos_Taverna.jpg\n", + "image_url = \"https://upload.wikimedia.org/wikipedia/commons/6/60/Naxos_Taverna.jpg/" #@param\n", + "downloaded_image_path = download_and_resize_image(image_url, 1280, 856, True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t-VdfLbC1w51" + }, + "source": [ + "Pick an object detection module and apply on the downloaded image. Modules:\n", + "* **FasterRCNN+InceptionResNet V2**: high accuracy,\n", + "* **ssd+mobilenet V2**: small and fast." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "uazJ5ASc2_QE" + }, + "outputs": [], + "source": [ + "module_handle = \"https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1/" #@param [\"https://tfhub.dev/google/openimages_v4/ssd/mobilenet_v2/1/", \"https://tfhub.dev/google/faster_rcnn/openimages_v4/inception_resnet_v2/1/"]\n", + "\n", + "detector = hub.load(module_handle).signatures['default']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "znW8Fq1EC0x7" + }, + "outputs": [], + "source": [ + "def load_img(path):\n", + " img = tf.io.read_file(path)\n", + " img = tf.image.decode_jpeg(img, channels=3)\n", + " return img" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kwGJV96WWBLH" + }, + "outputs": [], + "source": [ + "def run_detector(detector, path):\n", + " img = load_img(path)\n", + "\n", + " converted_img = tf.image.convert_image_dtype(img, tf.float32)[tf.newaxis, ...]\n", + " start_time = time.time()\n", + " result = detector(converted_img)\n", + " end_time = time.time()\n", + "\n", + " result = {key:value.numpy() for key,value in result.items()}\n", + "\n", + " print(\"Found %d objects.\" % len(result[\"detection_scores\"]))\n", + " print(\"Inference time: \", end_time-start_time)\n", + "\n", + " image_with_boxes = draw_boxes(\n", + " img.numpy(), result[\"detection_boxes\"],\n", + " result[\"detection_class_entities\"], result[\"detection_scores\"])\n", + "\n", + " display_image(image_with_boxes)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vchaUW1XDodD" + }, + "outputs": [], + "source": [ + "run_detector(detector, downloaded_image_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WUUY3nfRX7VF" + }, + "source": [ + "### More images\n", + "Perform inference on some additional images with time tracking.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rubdr2JXfsa1" + }, + "outputs": [], + "source": [ + "image_urls = [\n", + " # Source: https://commons.wikimedia.org/wiki/File:The_Coleoptera_of_the_British_islands_(Plate_125)_(8592917784).jpg\n", + " \"https://upload.wikimedia.org/wikipedia/commons/1/1b/The_Coleoptera_of_the_British_islands_%28Plate_125%29_%288592917784%29.jpg/",\n", + " # By Américo Toledano, Source: https://commons.wikimedia.org/wiki/File:Biblioteca_Maim%C3%B3nides,_Campus_Universitario_de_Rabanales_007.jpg\n", + " \"https://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg/1024px-Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg/",\n", + " # Source: https://commons.wikimedia.org/wiki/File:The_smaller_British_birds_(8053836633).jpg\n", + " \"https://upload.wikimedia.org/wikipedia/commons/0/09/The_smaller_British_birds_%288053836633%29.jpg/",\n", + " ]\n", + "\n", + "def detect_img(image_url):\n", + " start_time = time.time()\n", + " image_path = download_and_resize_image(image_url, 640, 480)\n", + " run_detector(detector, image_path)\n", + " end_time = time.time()\n", + " print(\"Inference time:\",end_time-start_time)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "otPnrxMKIrj5" + }, + "outputs": [], + "source": [ + "detect_img(image_urls[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "H5F7DkD5NtOx" + }, + "outputs": [], + "source": [ + "detect_img(image_urls[1])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DZ18R7dWNyoU" + }, + "outputs": [], + "source": [ + "detect_img(image_urls[2])" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "object_detection.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb b/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb new file mode 100644 index 00000000000..0166a7408d5 --- /dev/null +++ b/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb @@ -0,0 +1,361 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "VFMCdVJIIraw" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "code", + "id": "ZxMYj8OpIrCp" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0fO2R2BBKx3l" + }, + "source": [ + "# Multilingual Universal Sentence Encoder Q&A Retrieval\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
\n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zsDm_WgMNlJQ" + }, + "source": [ + "This is a demo for using [Universal Encoder Multilingual Q&A model](https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3) for question-answer retrieval of text, illustrating the use of **question_encoder** and **response_encoder** of the model. We use sentences from [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) paragraphs as the demo dataset, each sentence and its context (the text surrounding the sentence) is encoded into high dimension embeddings with the **response_encoder**. These embeddings are stored in an index built using the [simpleneighbors](https://pypi.org/project/simpleneighbors/) library for question-answer retrieval.\n", + "\n", + "On retrieval a random question is selected from the [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) dataset and encoded into high dimension embedding with the **question_encoder** and query the simpleneighbors index returning a list of approximate nearest neighbors in semantic space." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U0eOW2LTWiLg" + }, + "source": [ + "### More models\n", + "You can find all currently hosted text embedding models [here](https://tfhub.dev/s?module-type=text-embedding) and all models that have been trained on SQuAD as well [here](https://tfhub.dev/s?dataset=squad)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ORy-KvWXGXBo" + }, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "x00t_uJCEbeb" + }, + "outputs": [], + "source": [ + "%%capture\n", + "#@title Setup Environment\n", + "# Install the latest Tensorflow version.\n", + "!pip install -q \"tensorflow-text==2.11.*\"\n", + "!pip install -q simpleneighbors[annoy]\n", + "!pip install -q nltk\n", + "!pip install -q tqdm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "DmeFAuVsyWxg" + }, + "outputs": [], + "source": [ + "#@title Setup common imports and functions\n", + "import json\n", + "import nltk\n", + "import os\n", + "import pprint\n", + "import random\n", + "import simpleneighbors\n", + "import urllib\n", + "from IPython.display import HTML, display\n", + "from tqdm.notebook import tqdm\n", + "\n", + "import tensorflow.compat.v2 as tf\n", + "import tensorflow_hub as hub\n", + "from tensorflow_text import SentencepieceTokenizer\n", + "\n", + "nltk.download('punkt')\n", + "\n", + "\n", + "def download_squad(url):\n", + " return json.load(urllib.request.urlopen(url))\n", + "\n", + "def extract_sentences_from_squad_json(squad):\n", + " all_sentences = []\n", + " for data in squad['data']:\n", + " for paragraph in data['paragraphs']:\n", + " sentences = nltk.tokenize.sent_tokenize(paragraph['context'])\n", + " all_sentences.extend(zip(sentences, [paragraph['context']] * len(sentences)))\n", + " return list(set(all_sentences)) # remove duplicates\n", + "\n", + "def extract_questions_from_squad_json(squad):\n", + " questions = []\n", + " for data in squad['data']:\n", + " for paragraph in data['paragraphs']:\n", + " for qas in paragraph['qas']:\n", + " if qas['answers']:\n", + " questions.append((qas['question'], qas['answers'][0]['text']))\n", + " return list(set(questions))\n", + "\n", + "def output_with_highlight(text, highlight):\n", + " output = \"
  • \"\n", + " i = text.find(highlight)\n", + " while True:\n", + " if i == -1:\n", + " output += text\n", + " break\n", + " output += text[0:i]\n", + " output += ''+text[i:i+len(highlight)]+''\n", + " text = text[i+len(highlight):]\n", + " i = text.find(highlight)\n", + " return output + \"
  • \\n\"\n", + "\n", + "def display_nearest_neighbors(query_text, answer_text=None):\n", + " query_embedding = model.signatures['question_encoder'](tf.constant([query_text]))['outputs'][0]\n", + " search_results = index.nearest(query_embedding, n=num_results)\n", + "\n", + " if answer_text:\n", + " result_md = '''\n", + "

    Random Question from SQuAD:

    \n", + "

      %s

    \n", + "

    Answer:

    \n", + "

      %s

    \n", + " ''' % (query_text , answer_text)\n", + " else:\n", + " result_md = '''\n", + "

    Question:

    \n", + "

      %s

    \n", + " ''' % query_text\n", + "\n", + " result_md += '''\n", + "

    Retrieved sentences :\n", + "

      \n", + " '''\n", + "\n", + " if answer_text:\n", + " for s in search_results:\n", + " result_md += output_with_highlight(s, answer_text)\n", + " else:\n", + " for s in search_results:\n", + " result_md += '
    1. ' + s + '
    2. \\n'\n", + "\n", + " result_md += \"
    \"\n", + " display(HTML(result_md))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1kbkT8i3FL_C" + }, + "source": [ + "Run the following code block to download and extract the SQuAD dataset into:\n", + "\n", + "* **sentences** is a list of (text, context) tuples - each paragraph from the SQuAD dataset are split into sentences using nltk library and the sentence and paragraph text forms the (text, context) tuple.\n", + "* **questions** is a list of (question, answer) tuples.\n", + "\n", + "Note: You can use this demo to index the SQuAD train dataset or the smaller dev dataset (1.1 or 2.0) by selecting the **squad_url** below.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "iYqV2GAty_Eh" + }, + "outputs": [], + "source": [ + "#@title Download and extract SQuAD data\n", + "squad_url = 'https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json' #@param [\"https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json/", \"https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json/", \"https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json/", \"https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json/"]\n", + "\n", + "squad_json = download_squad(squad_url)\n", + "sentences = extract_sentences_from_squad_json(squad_json)\n", + "questions = extract_questions_from_squad_json(squad_json)\n", + "print(\"%s sentences, %s questions extracted from SQuAD %s\" % (len(sentences), len(questions), squad_url))\n", + "\n", + "print(\"\\nExample sentence and context:\\n\")\n", + "sentence = random.choice(sentences)\n", + "print(\"sentence:\\n\")\n", + "pprint.pprint(sentence[0])\n", + "print(\"\\ncontext:\\n\")\n", + "pprint.pprint(sentence[1])\n", + "print()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9x3u-2uSGbDf" + }, + "source": [ + "The following code block setup the tensorflow graph **g** and **session** with the [Universal Encoder Multilingual Q&A model](https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3)'s **question_encoder** and **response_encoder** signatures." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "44I0uCRQRiFO" + }, + "outputs": [], + "source": [ + "#@title Load model from tensorflow hub\n", + "module_url = \"https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3/" #@param [\"https://tfhub.dev/google/universal-sentence-encoder-multilingual-qa/3/", \"https://tfhub.dev/google/universal-sentence-encoder-qa/3/"]\n", + "model = hub.load(module_url)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SCQpDmTZG0O6" + }, + "source": [ + "The following code block compute the embeddings for all the text, context tuples and store them in a [simpleneighbors](https://pypi.org/project/simpleneighbors/) index using the **response_encoder**.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FwDUryIfSLp2" + }, + "outputs": [], + "source": [ + "#@title Compute embeddings and build simpleneighbors index\n", + "batch_size = 100\n", + "\n", + "encodings = model.signatures['response_encoder'](\n", + " input=tf.constant([sentences[0][0]]),\n", + " context=tf.constant([sentences[0][1]]))\n", + "index = simpleneighbors.SimpleNeighbors(\n", + " len(encodings['outputs'][0]), metric='angular')\n", + "\n", + "print('Computing embeddings for %s sentences' % len(sentences))\n", + "slices = zip(*(iter(sentences),) * batch_size)\n", + "num_batches = int(len(sentences) / batch_size)\n", + "for s in tqdm(slices, total=num_batches):\n", + " response_batch = list([r for r, c in s])\n", + " context_batch = list([c for r, c in s])\n", + " encodings = model.signatures['response_encoder'](\n", + " input=tf.constant(response_batch),\n", + " context=tf.constant(context_batch)\n", + " )\n", + " for batch_index, batch in enumerate(response_batch):\n", + " index.add_one(batch, encodings['outputs'][batch_index])\n", + "\n", + "index.build()\n", + "print('simpleneighbors index for %s sentences built.' % len(sentences))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZkNcjoPzHJpP" + }, + "source": [ + "On retrieval, the question is encoded using the **question_encoder** and the question embedding is used to query the simpleneighbors index." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "J0xTw2w3UViK" + }, + "outputs": [], + "source": [ + "#@title Retrieve nearest neighbors for a random question from SQuAD\n", + "num_results = 25 #@param {type:\"slider\", min:5, max:40, step:1}\n", + "\n", + "query = random.choice(questions)\n", + "display_nearest_neighbors(query[0], query[1])" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "VFMCdVJIIraw" + ], + "name": "retrieval_with_tf_hub_universal_encoder_qa.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb b/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb new file mode 100644 index 00000000000..bd73cffebdf --- /dev/null +++ b/site/en/hub/tutorials/s3gan_generation_with_tf_hub.ipynb @@ -0,0 +1,429 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "BhN1AplL0Hpv" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LMgeG2swVVi6" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AqBuuwrIxlGs" + }, + "source": [ + "# Generating Images with Little Data Using S3GAN\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p5AWAusyySDA" + }, + "source": [ + "This notebook is a demo of Generative Adversarial Networks trained on ImageNet with as little as 2.5% labeled data using self- and semi-supervised learning techniques. Both generator and discriminator models are available on [TF Hub](https://tfhub.dev/s?publisher=google&q=compare_gan).\n", + "\n", + "For more information about the models and the training procedure see our [blogpost](https://ai.googleblog.com/2019/03/reducing-need-for-labeled-data-in.html) and the [paper](https://arxiv.org/abs/1903.02271) [1].\n", + "The code for training these models is available on [GitHub](https://github.com/google/compare_gan).\n", + "\n", + "To get started, connect to a runtime and follow these steps:\n", + "\n", + "1. (Optional) Select a model in the second code cell below.\n", + "2. Click **Runtime > Run all** to run each cell in order.\n", + " * Afterwards, the interactive visualizations should update automatically when you modify the settings using the sliders and dropdown menus.\n", + "\n", + "Note: if you run into any issues, you can try restarting the runtime and rerunning all cells from scratch by clicking **Runtime > Restart and run all...**.\n", + "\n", + "[1] Mario Lucic\\*, Michael Tschannen\\*, Marvin Ritter\\*, Xiaohua Zhai, Olivier\n", + " Bachem, Sylvain Gelly, [High-Fidelity Image Generation With Fewer Labels](https://arxiv.org/abs/1903.02271), ICML 2019." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_m5jsOM9kXWP" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NhlMa_tHs0_W" + }, + "outputs": [], + "source": [ + "# @title Imports and utility functions\n", + "import os\n", + "\n", + "import IPython\n", + "from IPython.display import display\n", + "import numpy as np\n", + "import PIL.Image\n", + "import pandas as pd\n", + "import six\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "\n", + "import tensorflow_hub as hub\n", + "\n", + "def imgrid(imarray, cols=8, pad=1):\n", + " pad = int(pad)\n", + " assert pad >= 0\n", + " cols = int(cols)\n", + " assert cols >= 1\n", + " N, H, W, C = imarray.shape\n", + " rows = int(np.ceil(N / float(cols)))\n", + " batch_pad = rows * cols - N\n", + " assert batch_pad >= 0\n", + " post_pad = [batch_pad, pad, pad, 0]\n", + " pad_arg = [[0, p] for p in post_pad]\n", + " imarray = np.pad(imarray, pad_arg, 'constant')\n", + " H += pad\n", + " W += pad\n", + " grid = (imarray\n", + " .reshape(rows, cols, H, W, C)\n", + " .transpose(0, 2, 1, 3, 4)\n", + " .reshape(rows*H, cols*W, C))\n", + " return grid[:-pad, :-pad]\n", + "\n", + "\n", + "def imshow(a, format='png', jpeg_fallback=True):\n", + " a = np.asarray(a, dtype=np.uint8)\n", + " if six.PY3:\n", + " str_file = six.BytesIO()\n", + " else:\n", + " str_file = six.StringIO()\n", + " PIL.Image.fromarray(a).save(str_file, format)\n", + " png_data = str_file.getvalue()\n", + " try:\n", + " disp = display(IPython.display.Image(png_data))\n", + " except IOError:\n", + " if jpeg_fallback and format != 'jpeg':\n", + " print ('Warning: image was too large to display in format \"{}\"; '\n", + " 'trying jpeg instead.').format(format)\n", + " return imshow(a, format='jpeg')\n", + " else:\n", + " raise\n", + " return disp\n", + "\n", + "\n", + "class Generator(object):\n", + "\n", + " def __init__(self, module_spec):\n", + " self._module_spec = module_spec\n", + " self._sess = None\n", + " self._graph = tf.Graph()\n", + " self._load_model()\n", + "\n", + " @property\n", + " def z_dim(self):\n", + " return self._z.shape[-1].value\n", + "\n", + " @property\n", + " def conditional(self):\n", + " return self._labels is not None\n", + "\n", + " def _load_model(self):\n", + " with self._graph.as_default():\n", + " self._generator = hub.Module(self._module_spec, name=\"gen_module\",\n", + " tags={\"gen\", \"bsNone\"})\n", + " input_info = self._generator.get_input_info_dict()\n", + " inputs = {k: tf.placeholder(v.dtype, v.get_shape().as_list(), k)\n", + " for k, v in self._generator.get_input_info_dict().items()}\n", + " self._samples = self._generator(inputs=inputs, as_dict=True)[\"generated\"]\n", + " print(\"Inputs:\", inputs)\n", + " print(\"Outputs:\", self._samples)\n", + " self._z = inputs[\"z\"]\n", + " self._labels = inputs.get(\"labels\", None)\n", + "\n", + " def _init_session(self):\n", + " if self._sess is None:\n", + " self._sess = tf.Session(graph=self._graph)\n", + " self._sess.run(tf.global_variables_initializer())\n", + "\n", + " def get_noise(self, num_samples, seed=None):\n", + " if np.isscalar(seed):\n", + " np.random.seed(seed)\n", + " return np.random.normal(size=[num_samples, self.z_dim])\n", + " z = np.empty(shape=(len(seed), self.z_dim), dtype=np.float32)\n", + " for i, s in enumerate(seed):\n", + " np.random.seed(s)\n", + " z[i] = np.random.normal(size=[self.z_dim])\n", + " return z\n", + "\n", + " def get_samples(self, z, labels=None):\n", + " with self._graph.as_default():\n", + " self._init_session()\n", + " feed_dict = {self._z: z}\n", + " if self.conditional:\n", + " assert labels is not None\n", + " assert labels.shape[0] == z.shape[0]\n", + " feed_dict[self._labels] = labels\n", + " samples = self._sess.run(self._samples, feed_dict=feed_dict)\n", + " return np.uint8(np.clip(256 * samples, 0, 255))\n", + "\n", + "\n", + "class Discriminator(object):\n", + "\n", + " def __init__(self, module_spec):\n", + " self._module_spec = module_spec\n", + " self._sess = None\n", + " self._graph = tf.Graph()\n", + " self._load_model()\n", + "\n", + " @property\n", + " def conditional(self):\n", + " return \"labels\" in self._inputs\n", + "\n", + " @property\n", + " def image_shape(self):\n", + " return self._inputs[\"images\"].shape.as_list()[1:]\n", + "\n", + " def _load_model(self):\n", + " with self._graph.as_default():\n", + " self._discriminator = hub.Module(self._module_spec, name=\"disc_module\",\n", + " tags={\"disc\", \"bsNone\"})\n", + " input_info = self._discriminator.get_input_info_dict()\n", + " self._inputs = {k: tf.placeholder(v.dtype, v.get_shape().as_list(), k)\n", + " for k, v in input_info.items()}\n", + " self._outputs = self._discriminator(inputs=self._inputs, as_dict=True)\n", + " print(\"Inputs:\", self._inputs)\n", + " print(\"Outputs:\", self._outputs)\n", + "\n", + " def _init_session(self):\n", + " if self._sess is None:\n", + " self._sess = tf.Session(graph=self._graph)\n", + " self._sess.run(tf.global_variables_initializer())\n", + "\n", + " def predict(self, images, labels=None):\n", + " with self._graph.as_default():\n", + " self._init_session()\n", + " feed_dict = {self._inputs[\"images\"]: images}\n", + " if \"labels\" in self._inputs:\n", + " assert labels is not None\n", + " assert labels.shape[0] == images.shape[0]\n", + " feed_dict[self._inputs[\"labels\"]] = labels\n", + " return self._sess.run(self._outputs, feed_dict=feed_dict)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "msTFS1UPkugr" + }, + "source": [ + "## Select a model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-hBEi9IFdoI-" + }, + "outputs": [], + "source": [ + "# @title Select a model { run: \"auto\" }\n", + "\n", + "model_name = \"S3GAN 128x128 20% labels (FID 6.9, IS 98.1)\" # @param [\"S3GAN 256x256 10% labels (FID 8.8, IS 130.7)\", \"S3GAN 128x128 2.5% labels (FID 12.6, IS 48.7)\", \"S3GAN 128x128 5% labels (FID 8.4, IS 74.0)\", \"S3GAN 128x128 10% labels (FID 7.6, IS 90.3)\", \"S3GAN 128x128 20% labels (FID 6.9, IS 98.1)\"]\n", + "models = {\n", + " \"S3GAN 256x256 10% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_10_256x256/1/",\n", + " \"S3GAN 128x128 2.5% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_2_5_128x128/1/",\n", + " \"S3GAN 128x128 5% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_5_128x128/1/",\n", + " \"S3GAN 128x128 10% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_10_128x128/1/",\n", + " \"S3GAN 128x128 20% labels\": \"https://tfhub.dev/google/compare_gan/s3gan_20_128x128/1/",\n", + "}\n", + "\n", + "module_spec = models[model_name.split(\" (\")[0]]\n", + "print(\"Module spec:\", module_spec)\n", + "\n", + "tf.reset_default_graph()\n", + "print(\"Loading model...\")\n", + "sampler = Generator(module_spec)\n", + "print(\"Model loaded.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ePQuAme_kxLj" + }, + "source": [ + "## Sample" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "kGgTXtFYq_FV" + }, + "outputs": [], + "source": [ + "# @title Sampling { run: \"auto\" }\n", + "\n", + "num_rows = 2 # @param {type: \"slider\", min:1, max:16}\n", + "num_cols = 3 # @param {type: \"slider\", min:1, max:16}\n", + "noise_seed = 23 # @param {type:\"slider\", min:0, max:100, step:1}\n", + "label_str = \"980) volcano\" # @param [\"-1) Random\", \"0) tench, Tinca tinca\", \"1) goldfish, Carassius auratus\", \"2) great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias\", \"3) tiger shark, Galeocerdo cuvieri\", \"4) hammerhead, hammerhead shark\", \"5) electric ray, crampfish, numbfish, torpedo\", \"6) stingray\", \"7) cock\", \"8) hen\", \"9) ostrich, Struthio camelus\", \"10) brambling, Fringilla montifringilla\", \"11) goldfinch, Carduelis carduelis\", \"12) house finch, linnet, Carpodacus mexicanus\", \"13) junco, snowbird\", \"14) indigo bunting, indigo finch, indigo bird, Passerina cyanea\", \"15) robin, American robin, Turdus migratorius\", \"16) bulbul\", \"17) jay\", \"18) magpie\", \"19) chickadee\", \"20) water ouzel, dipper\", \"21) kite\", \"22) bald eagle, American eagle, Haliaeetus leucocephalus\", \"23) vulture\", \"24) great grey owl, great gray owl, Strix nebulosa\", \"25) European fire salamander, Salamandra salamandra\", \"980) volcano\"]\n", + "\n", + "num_samples = num_rows * num_cols\n", + "z = sampler.get_noise(num_samples, seed=noise_seed)\n", + "\n", + "label = int(label_str.split(')')[0])\n", + "if label == -1:\n", + " labels = np.random.randint(0, num_classes, size=(num_samples))\n", + "else:\n", + " labels = np.asarray([label] * num_samples)\n", + "\n", + "samples = sampler.get_samples(z, labels)\n", + "imshow(imgrid(samples, cols=num_cols))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "vCffdVZvTtxL" + }, + "outputs": [], + "source": [ + "# @title Interpolation { run: \"auto\" }\n", + "\n", + "num_samples = 1 # @param {type: \"slider\", min: 1, max: 6, step: 1}\n", + "num_interps = 6 # @param {type: \"slider\", min: 2, max: 10, step: 1}\n", + "noise_seed_A = 11 # @param {type: \"slider\", min: 0, max: 100, step: 1}\n", + "noise_seed_B = 0 # @param {type: \"slider\", min: 0, max: 100, step: 1}\n", + "label_str = \"1) goldfish, Carassius auratus\" # @param [\"0) tench, Tinca tinca\", \"1) goldfish, Carassius auratus\", \"2) great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias\", \"3) tiger shark, Galeocerdo cuvieri\", \"4) hammerhead, hammerhead shark\", \"5) electric ray, crampfish, numbfish, torpedo\", \"6) stingray\", \"7) cock\", \"8) hen\", \"9) ostrich, Struthio camelus\", \"10) brambling, Fringilla montifringilla\", \"11) goldfinch, Carduelis carduelis\", \"12) house finch, linnet, Carpodacus mexicanus\", \"13) junco, snowbird\", \"14) indigo bunting, indigo finch, indigo bird, Passerina cyanea\", \"15) robin, American robin, Turdus migratorius\", \"16) bulbul\", \"17) jay\", \"18) magpie\", \"19) chickadee\", \"20) water ouzel, dipper\", \"21) kite\", \"22) bald eagle, American eagle, Haliaeetus leucocephalus\", \"23) vulture\", \"24) great grey owl, great gray owl, Strix nebulosa\", \"25) European fire salamander, Salamandra salamandra\"]\n", + "\n", + "\n", + "def interpolate(A, B, num_interps):\n", + " alphas = np.linspace(0, 1, num_interps)\n", + " if A.shape != B.shape:\n", + " raise ValueError('A and B must have the same shape to interpolate.')\n", + " return np.array([((1-a)*A + a*B)/np.sqrt(a**2 + (1-a)**2) for a in alphas])\n", + "\n", + "\n", + "def interpolate_and_shape(A, B, num_interps):\n", + " interps = interpolate(A, B, num_interps)\n", + " return (interps.transpose(1, 0, *range(2, len(interps.shape)))\n", + " .reshape(num_samples * num_interps, -1))\n", + "\n", + "label = int(label_str.split(')')[0])\n", + "labels = np.asarray([label] * num_samples * num_interps)\n", + "\n", + "\n", + "z_A = sampler.get_noise(num_samples, seed=noise_seed_A)\n", + "z_B = sampler.get_noise(num_samples, seed=noise_seed_B)\n", + "z = interpolate_and_shape(z_A, z_B, num_interps)\n", + "\n", + "samples = sampler.get_samples(z, labels)\n", + "imshow(imgrid(samples, cols=num_interps))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "esW0Up95Ob6U" + }, + "source": [ + "## Discriminator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ButxPSq0OzgL" + }, + "outputs": [], + "source": [ + "disc = Discriminator(module_spec)\n", + "\n", + "batch_size = 4\n", + "num_classes = 1000\n", + "images = np.random.random(size=[batch_size] + disc.image_shape)\n", + "labels = np.random.randint(0, num_classes, size=(batch_size))\n", + "\n", + "disc.predict(images, labels=labels)" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "BhN1AplL0Hpv" + ], + "name": "s3gan_generation_with_tf_hub.ipynb", + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb b/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb new file mode 100644 index 00000000000..55bcebcc447 --- /dev/null +++ b/site/en/hub/tutorials/semantic_approximate_nearest_neighbors.ipynb @@ -0,0 +1,882 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ACbjNjyO4f_8" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MCM50vaM4jiK" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9qOVy-_vmuUP" + }, + "source": [ + "# Semantic Search with Approximate Nearest Neighbors and Text Embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7Hks9F5qq6m2" + }, + "source": [ + "This tutorial illustrates how to generate embeddings from a [TensorFlow Hub](https://tfhub.dev) (TF-Hub) module given input data, and build an approximate nearest neighbours (ANN) index using the extracted embeddings. The index can then be used for real-time similarity matching and retrieval. \n", + "\n", + "When dealing with a large corpus of data, it's not efficient to perform exact matching by scanning the whole repository to find the most similar items to a given query in real-time. Thus, we use an approximate similarity matching algorithm which allows us to trade off a little bit of accuracy in finding exact nearest neighbor matches for a significant boost in speed. \n", + "\n", + "In this tutorial, we show an example of real-time text search over a corpus of news headlines to find the headlines that are most similar to a query. Unlike keyword search, this captures the semantic similarity encoded in the text embedding.\n", + "\n", + "The steps of this tutorial are:\n", + "1. Download sample data.\n", + "2. Generate embeddings for the data using a TF-Hub module\n", + "3. Build an ANN index for the embeddings\n", + "4. Use the index for similarity matching\n", + "\n", + "We use [Apache Beam](https://beam.apache.org/documentation/programming-guide/) with [TensorFlow Transform](https://www.tensorflow.org/tfx/tutorials/transform/simple) (TF-Transform) to generate the embeddings from the TF-Hub module. We also use Spotify's [ANNOY](https://github.com/spotify/annoy) library to build the approximate nearest neighbours index. You can find benchmarking of ANN framework in this [Github repository](https://github.com/erikbern/ann-benchmarks).\n", + "\n", + "This tutorial uses TensorFlow 1.0 and works only with TF1 [Hub modules](https://www.tensorflow.org/hub/tf1_hub_module) from TF-Hub. See the updated [TF2 version of this tutorial](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q0jr0QK9qO5P" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "whMRj9qeqed4" + }, + "source": [ + "Install the required libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qmXkLPoaqS--" + }, + "outputs": [], + "source": [ + "!pip install -q apache_beam\n", + "!pip install -q 'scikit_learn~=0.23.0' # For gaussian_random_matrix.\n", + "!pip install -q annoy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A-vBZiCCqld0" + }, + "source": [ + "Import the required libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6NTYbdWcseuK" + }, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import pathlib\n", + "import pickle\n", + "from collections import namedtuple\n", + "from datetime import datetime\n", + "\n", + "import numpy as np\n", + "import apache_beam as beam\n", + "import annoy\n", + "from sklearn.random_projection import gaussian_random_matrix\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "import tensorflow_hub as hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_GF0GnLqGdPQ" + }, + "outputs": [], + "source": [ + "# TFT needs to be installed afterwards\n", + "!pip install -q tensorflow_transform==0.24\n", + "import tensorflow_transform as tft\n", + "import tensorflow_transform.beam as tft_beam" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tx0SZa6-7b-f" + }, + "outputs": [], + "source": [ + "print('TF version: {}'.format(tf.__version__))\n", + "print('TF-Hub version: {}'.format(hub.__version__))\n", + "print('TF-Transform version: {}'.format(tft.__version__))\n", + "print('Apache Beam version: {}'.format(beam.__version__))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P6Imq876rLWx" + }, + "source": [ + "## 1. Download Sample Data\n", + "\n", + "[A Million News Headlines](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/SYBGZL#) dataset contains news headlines published over a period of 15 years sourced from the reputable Australian Broadcasting Corp. (ABC). This news dataset has a summarised historical record of noteworthy events in the globe from early-2003 to end-2017 with a more granular focus on Australia. \n", + "\n", + "**Format**: Tab-separated two-column data: 1) publication date and 2) headline text. We are only interested in the headline text.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OpF57n8e5C9D" + }, + "outputs": [], + "source": [ + "!wget 'https://dataverse.harvard.edu/api/access/datafile/3450625?format=tab&gbrecs=true' -O raw.tsv\n", + "!wc -l raw.tsv\n", + "!head raw.tsv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Reeoc9z0zTxJ" + }, + "source": [ + "For simplicity, we only keep the headline text and remove the publication date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "INPWa4upv_yJ" + }, + "outputs": [], + "source": [ + "!rm -r corpus\n", + "!mkdir corpus\n", + "\n", + "with open('corpus/text.txt', 'w') as out_file:\n", + " with open('raw.tsv', 'r') as in_file:\n", + " for line in in_file:\n", + " headline = line.split('\\t')[1].strip().strip('\"')\n", + " out_file.write(headline+\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5-oedX40z6o2" + }, + "outputs": [], + "source": [ + "!tail corpus/text.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ls0Zh7kYz3PM" + }, + "source": [ + "## Helper function to load a TF-Hub module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vSt_jmyKz3Xp" + }, + "outputs": [], + "source": [ + "def load_module(module_url):\n", + " embed_module = hub.Module(module_url)\n", + " placeholder = tf.placeholder(dtype=tf.string)\n", + " embed = embed_module(placeholder)\n", + " session = tf.Session()\n", + " session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n", + " print('TF-Hub module is loaded.')\n", + "\n", + " def _embeddings_fn(sentences):\n", + " computed_embeddings = session.run(\n", + " embed, feed_dict={placeholder: sentences})\n", + " return computed_embeddings\n", + "\n", + " return _embeddings_fn" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2AngMtH50jNb" + }, + "source": [ + "## 2. Generate Embeddings for the Data.\n", + "\n", + "In this tutorial, we use the [Universal Sentence Encoder](https://tfhub.dev/google/universal-sentence-encoder/2) to generate embeddings for the headline data. The sentence embeddings can then be easily used to compute sentence level meaning similarity. We run the embedding generation process using Apache Beam and TF-Transform." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_DvXnDB1pEX" + }, + "source": [ + "### Embedding extraction method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yL7OEY1E0A35" + }, + "outputs": [], + "source": [ + "encoder = None\n", + "\n", + "def embed_text(text, module_url, random_projection_matrix):\n", + " # Beam will run this function in different processes that need to\n", + " # import hub and load embed_fn (if not previously loaded)\n", + " global encoder\n", + " if not encoder:\n", + " encoder = hub.Module(module_url)\n", + " embedding = encoder(text)\n", + " if random_projection_matrix is not None:\n", + " # Perform random projection for the embedding\n", + " embedding = tf.matmul(\n", + " embedding, tf.cast(random_projection_matrix, embedding.dtype))\n", + " return embedding\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_don5gXy9D59" + }, + "source": [ + "### Make TFT preprocess_fn method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fwYlrzzK9ECE" + }, + "outputs": [], + "source": [ + "def make_preprocess_fn(module_url, random_projection_matrix=None):\n", + " '''Makes a tft preprocess_fn'''\n", + "\n", + " def _preprocess_fn(input_features):\n", + " '''tft preprocess_fn'''\n", + " text = input_features['text']\n", + " # Generate the embedding for the input text\n", + " embedding = embed_text(text, module_url, random_projection_matrix)\n", + " \n", + " output_features = {\n", + " 'text': text, \n", + " 'embedding': embedding\n", + " }\n", + " \n", + " return output_features\n", + " \n", + " return _preprocess_fn" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SQ492LN7A-NZ" + }, + "source": [ + "### Create dataset metadata" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d2D4332VA-2V" + }, + "outputs": [], + "source": [ + "def create_metadata():\n", + " '''Creates metadata for the raw data'''\n", + " from tensorflow_transform.tf_metadata import dataset_metadata\n", + " from tensorflow_transform.tf_metadata import schema_utils\n", + " feature_spec = {'text': tf.FixedLenFeature([], dtype=tf.string)}\n", + " schema = schema_utils.schema_from_feature_spec(feature_spec)\n", + " metadata = dataset_metadata.DatasetMetadata(schema)\n", + " return metadata" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5zlSLPzRBm6H" + }, + "source": [ + "### Beam pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jCGUIB172m2G" + }, + "outputs": [], + "source": [ + "def run_hub2emb(args):\n", + " '''Runs the embedding generation pipeline'''\n", + "\n", + " options = beam.options.pipeline_options.PipelineOptions(**args)\n", + " args = namedtuple(\"options\", args.keys())(*args.values())\n", + "\n", + " raw_metadata = create_metadata()\n", + " converter = tft.coders.CsvCoder(\n", + " column_names=['text'], schema=raw_metadata.schema)\n", + "\n", + " with beam.Pipeline(args.runner, options=options) as pipeline:\n", + " with tft_beam.Context(args.temporary_dir):\n", + " # Read the sentences from the input file\n", + " sentences = ( \n", + " pipeline\n", + " | 'Read sentences from files' >> beam.io.ReadFromText(\n", + " file_pattern=args.data_dir)\n", + " | 'Convert to dictionary' >> beam.Map(converter.decode)\n", + " )\n", + "\n", + " sentences_dataset = (sentences, raw_metadata)\n", + " preprocess_fn = make_preprocess_fn(args.module_url, args.random_projection_matrix)\n", + " # Generate the embeddings for the sentence using the TF-Hub module\n", + " embeddings_dataset, _ = (\n", + " sentences_dataset\n", + " | 'Extract embeddings' >> tft_beam.AnalyzeAndTransformDataset(preprocess_fn)\n", + " )\n", + "\n", + " embeddings, transformed_metadata = embeddings_dataset\n", + " # Write the embeddings to TFRecords files\n", + " embeddings | 'Write embeddings to TFRecords' >> beam.io.tfrecordio.WriteToTFRecord(\n", + " file_path_prefix='{}/emb'.format(args.output_dir),\n", + " file_name_suffix='.tfrecords',\n", + " coder=tft.coders.ExampleProtoCoder(transformed_metadata.schema))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uHbq4t2gCDAG" + }, + "source": [ + "### Generaring Random Projection Weight Matrix\n", + "\n", + "[Random projection](https://en.wikipedia.org/wiki/Random_projection) is a simple, yet powerfull technique used to reduce the dimensionality of a set of points which lie in Euclidean space. For a theoretical background, see the [Johnson-Lindenstrauss lemma](https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma).\n", + "\n", + "Reducing the dimensionality of the embeddings with random projection means less time needed to build and query the ANN index.\n", + "\n", + "In this tutorial we use [Gaussian Random Projection](https://en.wikipedia.org/wiki/Random_projection#Gaussian_random_projection) from the [Scikit-learn](https://scikit-learn.org/stable/modules/random_projection.html#gaussian-random-projection) library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "T1aYPeOUCDIP" + }, + "outputs": [], + "source": [ + "def generate_random_projection_weights(original_dim, projected_dim):\n", + " random_projection_matrix = None\n", + " if projected_dim and original_dim > projected_dim:\n", + " random_projection_matrix = gaussian_random_matrix(\n", + " n_components=projected_dim, n_features=original_dim).T\n", + " print(\"A Gaussian random weight matrix was creates with shape of {}\".format(random_projection_matrix.shape))\n", + " print('Storing random projection matrix to disk...')\n", + " with open('random_projection_matrix', 'wb') as handle:\n", + " pickle.dump(random_projection_matrix, \n", + " handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + " \n", + " return random_projection_matrix" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CHxZX2Z3Nk64" + }, + "source": [ + "### Set parameters\n", + "If you want to build an index using the original embedding space without random projection, set the `projected_dim` parameter to `None`. Note that this will slow down the indexing step for high-dimensional embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "feMVXFL0NlIM" + }, + "outputs": [], + "source": [ + "module_url = 'https://tfhub.dev/google/universal-sentence-encoder/2' #@param {type:\"string\"}\n", + "projected_dim = 64 #@param {type:\"number\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "On-MbzD922kb" + }, + "source": [ + "### Run pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y3I1Wv4i21yY" + }, + "outputs": [], + "source": [ + "import tempfile\n", + "\n", + "output_dir = pathlib.Path(tempfile.mkdtemp())\n", + "temporary_dir = pathlib.Path(tempfile.mkdtemp())\n", + "\n", + "g = tf.Graph()\n", + "with g.as_default():\n", + " original_dim = load_module(module_url)(['']).shape[1]\n", + " random_projection_matrix = None\n", + "\n", + " if projected_dim:\n", + " random_projection_matrix = generate_random_projection_weights(\n", + " original_dim, projected_dim)\n", + "\n", + "args = {\n", + " 'job_name': 'hub2emb-{}'.format(datetime.utcnow().strftime('%y%m%d-%H%M%S')),\n", + " 'runner': 'DirectRunner',\n", + " 'batch_size': 1024,\n", + " 'data_dir': 'corpus/*.txt',\n", + " 'output_dir': output_dir,\n", + " 'temporary_dir': temporary_dir,\n", + " 'module_url': module_url,\n", + " 'random_projection_matrix': random_projection_matrix,\n", + "}\n", + "\n", + "print(\"Pipeline args are set.\")\n", + "args" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iS9obmeP4ZOA" + }, + "outputs": [], + "source": [ + "!rm -r {output_dir}\n", + "!rm -r {temporary_dir}\n", + "\n", + "print(\"Running pipeline...\")\n", + "%time run_hub2emb(args)\n", + "print(\"Pipeline is done.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JAwOo7gQWvVd" + }, + "outputs": [], + "source": [ + "!ls {output_dir}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HVnee4e6U90u" + }, + "source": [ + "Read some of the generated embeddings..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-K7pGXlXOj1N" + }, + "outputs": [], + "source": [ + "import itertools\n", + "\n", + "embed_file = os.path.join(output_dir, 'emb-00000-of-00001.tfrecords')\n", + "sample = 5\n", + "record_iterator = tf.io.tf_record_iterator(path=embed_file)\n", + "for string_record in itertools.islice(record_iterator, sample):\n", + " example = tf.train.Example()\n", + " example.ParseFromString(string_record)\n", + " text = example.features.feature['text'].bytes_list.value\n", + " embedding = np.array(example.features.feature['embedding'].float_list.value)\n", + " print(\"Embedding dimensions: {}\".format(embedding.shape[0]))\n", + " print(\"{}: {}\".format(text, embedding[:10]))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "agGoaMSgY8wN" + }, + "source": [ + "## 3. Build the ANN Index for the Embeddings\n", + "\n", + "[ANNOY](https://github.com/spotify/annoy) (Approximate Nearest Neighbors Oh Yeah) is a C++ library with Python bindings to search for points in space that are close to a given query point. It also creates large read-only file-based data structures that are mmapped into memory. It is built and used by [Spotify](https://www.spotify.com) for music recommendations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UcPDspU3WjgH" + }, + "outputs": [], + "source": [ + "def build_index(embedding_files_pattern, index_filename, vector_length, \n", + " metric='angular', num_trees=100):\n", + " '''Builds an ANNOY index'''\n", + "\n", + " annoy_index = annoy.AnnoyIndex(vector_length, metric=metric)\n", + " # Mapping between the item and its identifier in the index\n", + " mapping = {}\n", + "\n", + " embed_files = tf.gfile.Glob(embedding_files_pattern)\n", + " print('Found {} embedding file(s).'.format(len(embed_files)))\n", + "\n", + " item_counter = 0\n", + " for f, embed_file in enumerate(embed_files):\n", + " print('Loading embeddings in file {} of {}...'.format(\n", + " f+1, len(embed_files)))\n", + " record_iterator = tf.io.tf_record_iterator(\n", + " path=embed_file)\n", + "\n", + " for string_record in record_iterator:\n", + " example = tf.train.Example()\n", + " example.ParseFromString(string_record)\n", + " text = example.features.feature['text'].bytes_list.value[0].decode(\"utf-8\")\n", + " mapping[item_counter] = text\n", + " embedding = np.array(\n", + " example.features.feature['embedding'].float_list.value)\n", + " annoy_index.add_item(item_counter, embedding)\n", + " item_counter += 1\n", + " if item_counter % 100000 == 0:\n", + " print('{} items loaded to the index'.format(item_counter))\n", + "\n", + " print('A total of {} items added to the index'.format(item_counter))\n", + "\n", + " print('Building the index with {} trees...'.format(num_trees))\n", + " annoy_index.build(n_trees=num_trees)\n", + " print('Index is successfully built.')\n", + " \n", + " print('Saving index to disk...')\n", + " annoy_index.save(index_filename)\n", + " print('Index is saved to disk.')\n", + " print(\"Index file size: {} GB\".format(\n", + " round(os.path.getsize(index_filename) / float(1024 ** 3), 2)))\n", + " annoy_index.unload()\n", + "\n", + " print('Saving mapping to disk...')\n", + " with open(index_filename + '.mapping', 'wb') as handle:\n", + " pickle.dump(mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + " print('Mapping is saved to disk.')\n", + " print(\"Mapping file size: {} MB\".format(\n", + " round(os.path.getsize(index_filename + '.mapping') / float(1024 ** 2), 2)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AgyOQhUq6FNE" + }, + "outputs": [], + "source": [ + "embedding_files = \"{}/emb-*.tfrecords\".format(output_dir)\n", + "embedding_dimension = projected_dim\n", + "index_filename = \"index\"\n", + "\n", + "!rm {index_filename}\n", + "!rm {index_filename}.mapping\n", + "\n", + "%time build_index(embedding_files, index_filename, embedding_dimension)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ic31Tm5cgAd5" + }, + "outputs": [], + "source": [ + "!ls" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "maGxDl8ufP-p" + }, + "source": [ + "## 4. Use the Index for Similarity Matching\n", + "Now we can use the ANN index to find news headlines that are semantically close to an input query." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_dIs8W78fYPp" + }, + "source": [ + "### Load the index and the mapping files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jlTTrbQHayvb" + }, + "outputs": [], + "source": [ + "index = annoy.AnnoyIndex(embedding_dimension)\n", + "index.load(index_filename, prefault=True)\n", + "print('Annoy index is loaded.')\n", + "with open(index_filename + '.mapping', 'rb') as handle:\n", + " mapping = pickle.load(handle)\n", + "print('Mapping file is loaded.')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y6liFMSUh08J" + }, + "source": [ + "### Similarity matching method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mUxjTag8hc16" + }, + "outputs": [], + "source": [ + "def find_similar_items(embedding, num_matches=5):\n", + " '''Finds similar items to a given embedding in the ANN index'''\n", + " ids = index.get_nns_by_vector(\n", + " embedding, num_matches, search_k=-1, include_distances=False)\n", + " items = [mapping[i] for i in ids]\n", + " return items" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hjerNpmZja0A" + }, + "source": [ + "### Extract embedding from a given query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a0IIXzfBjZ19" + }, + "outputs": [], + "source": [ + "# Load the TF-Hub module\n", + "print(\"Loading the TF-Hub module...\")\n", + "g = tf.Graph()\n", + "with g.as_default():\n", + " embed_fn = load_module(module_url)\n", + "print(\"TF-Hub module is loaded.\")\n", + "\n", + "random_projection_matrix = None\n", + "if os.path.exists('random_projection_matrix'):\n", + " print(\"Loading random projection matrix...\")\n", + " with open('random_projection_matrix', 'rb') as handle:\n", + " random_projection_matrix = pickle.load(handle)\n", + " print('random projection matrix is loaded.')\n", + "\n", + "def extract_embeddings(query):\n", + " '''Generates the embedding for the query'''\n", + " query_embedding = embed_fn([query])[0]\n", + " if random_projection_matrix is not None:\n", + " query_embedding = query_embedding.dot(random_projection_matrix)\n", + " return query_embedding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kCoCNROujEIO" + }, + "outputs": [], + "source": [ + "extract_embeddings(\"Hello Machine Learning!\")[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nE_Q60nCk_ZB" + }, + "source": [ + "### Enter a query to find the most similar items" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "wC0uLjvfk5nB" + }, + "outputs": [], + "source": [ + "#@title { run: \"auto\" }\n", + "query = \"confronting global challenges\" #@param {type:\"string\"}\n", + "print(\"Generating embedding for the query...\")\n", + "%time query_embedding = extract_embeddings(query)\n", + "\n", + "print(\"\")\n", + "print(\"Finding relevant items in the index...\")\n", + "%time items = find_similar_items(query_embedding, 10)\n", + "\n", + "print(\"\")\n", + "print(\"Results:\")\n", + "print(\"=========\")\n", + "for item in items:\n", + " print(item)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wwtMtyOeDKwt" + }, + "source": [ + "## Want to learn more?\n", + "\n", + "You can learn more about TensorFlow at [tensorflow.org](https://www.tensorflow.org/) and see the TF-Hub API documentation at [tensorflow.org/hub](https://www.tensorflow.org/hub/). Find available TensorFlow Hub modules at [tfhub.dev](https://tfhub.dev/) including more text embedding modules and image feature vector modules.\n", + "\n", + "Also check out the [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/) which is Google's fast-paced, practical introduction to machine learning." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "ls0Zh7kYz3PM", + "_don5gXy9D59", + "SQ492LN7A-NZ" + ], + "name": "semantic_approximate_nearest_neighbors.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb new file mode 100644 index 00000000000..0c2874bc030 --- /dev/null +++ b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb @@ -0,0 +1,363 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "RUymE2l9GZfO" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "code", + "id": "JMyTNwSJGGWg" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "co7MV6sX7Xto" + }, + "source": [ + "# Universal Sentence Encoder\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eAVQGidpL8v5" + }, + "source": [ + "This notebook illustrates how to access the Universal Sentence Encoder and use it for sentence similarity and sentence classification tasks.\n", + "\n", + "The Universal Sentence Encoder makes getting sentence level embeddings as easy as it has historically been to lookup the embeddings for individual words. The sentence embeddings can then be trivially used to compute sentence level meaning similarity as well as to enable better performance on downstream classification tasks using less supervised training data.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pOTzp8O36CyQ" + }, + "source": [ + "## Setup\n", + "\n", + "This section sets up the environment for access to the Universal Sentence Encoder on TF Hub and provides examples of applying the encoder to words, sentences, and paragraphs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lVjNK8shFKOC" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!pip3 install seaborn" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "63Pd3nJnTl-i" + }, + "source": [ + "More detailed information about installing Tensorflow can be found at [https://www.tensorflow.org/install/](https://www.tensorflow.org/install/)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "zwty8Z6mAkdV" + }, + "outputs": [], + "source": [ + "#@title Load the Universal Sentence Encoder's TF Hub module\n", + "from absl import logging\n", + "\n", + "import tensorflow as tf\n", + "\n", + "import tensorflow_hub as hub\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", + "import re\n", + "import seaborn as sns\n", + "\n", + "module_url = \"https://tfhub.dev/google/universal-sentence-encoder/4/" #@param [\"https://tfhub.dev/google/universal-sentence-encoder/4/", \"https://tfhub.dev/google/universal-sentence-encoder-large/5/"]\n", + "model = hub.load(module_url)\n", + "print (\"module %s loaded\" % module_url)\n", + "def embed(input):\n", + " return model(input)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Q8F4LNGFqOiq" + }, + "outputs": [], + "source": [ + "#@title Compute a representation for each message, showing various lengths supported.\n", + "word = \"Elephant\"\n", + "sentence = \"I am a sentence for which I would like to get its embedding.\"\n", + "paragraph = (\n", + " \"Universal Sentence Encoder embeddings also support short paragraphs. \"\n", + " \"There is no hard limit on how long the paragraph is. Roughly, the longer \"\n", + " \"the more 'diluted' the embedding will be.\")\n", + "messages = [word, sentence, paragraph]\n", + "\n", + "# Reduce logging output.\n", + "logging.set_verbosity(logging.ERROR)\n", + "\n", + "message_embeddings = embed(messages)\n", + "\n", + "for i, message_embedding in enumerate(np.array(message_embeddings).tolist()):\n", + " print(\"Message: {}\".format(messages[i]))\n", + " print(\"Embedding size: {}\".format(len(message_embedding)))\n", + " message_embedding_snippet = \", \".join(\n", + " (str(x) for x in message_embedding[:3]))\n", + " print(\"Embedding: [{}, ...]\\n\".format(message_embedding_snippet))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BnvjATdy64eR" + }, + "source": [ + "# Semantic Textual Similarity Task Example\n", + "\n", + "The embeddings produced by the Universal Sentence Encoder are approximately normalized. The semantic similarity of two sentences can be trivially computed as the inner product of the encodings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "h1FFCTKm7ba4" + }, + "outputs": [], + "source": [ + "def plot_similarity(labels, features, rotation):\n", + " corr = np.inner(features, features)\n", + " sns.set(font_scale=1.2)\n", + " g = sns.heatmap(\n", + " corr,\n", + " xticklabels=labels,\n", + " yticklabels=labels,\n", + " vmin=0,\n", + " vmax=1,\n", + " cmap=\"YlOrRd\")\n", + " g.set_xticklabels(labels, rotation=rotation)\n", + " g.set_title(\"Semantic Textual Similarity\")\n", + "\n", + "def run_and_plot(messages_):\n", + " message_embeddings_ = embed(messages_)\n", + " plot_similarity(messages_, message_embeddings_, 90)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "339tuJ5Pwqqv" + }, + "source": [ + "## Similarity Visualized\n", + "Here we show the similarity in a heat map. The final graph is a 9x9 matrix where each entry `[i, j]` is colored based on the inner product of the encodings for sentence `i` and `j`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cPMCaxrZwp7t" + }, + "outputs": [], + "source": [ + "messages = [\n", + " # Smartphones\n", + " \"I like my phone\",\n", + " \"My phone is not good.\",\n", + " \"Your cellphone looks great.\",\n", + "\n", + " # Weather\n", + " \"Will it snow tomorrow?\",\n", + " \"Recently a lot of hurricanes have hit the US\",\n", + " \"Global warming is real\",\n", + "\n", + " # Food and health\n", + " \"An apple a day, keeps the doctors away\",\n", + " \"Eating strawberries is healthy\",\n", + " \"Is paleo better than keto?\",\n", + "\n", + " # Asking about age\n", + " \"How old are you?\",\n", + " \"what is your age?\",\n", + "]\n", + "\n", + "run_and_plot(messages)\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6FjdeCqPJeg-" + }, + "source": [ + "## Evaluation: STS (Semantic Textual Similarity) Benchmark\n", + "\n", + "The [**STS Benchmark**](https://ixa2.si.ehu.eus/stswiki/stswiki.html#STS_benchmark) provides an intrinsic evaluation of the degree to which similarity scores computed using sentence embeddings align with human judgements. The benchmark requires systems to return similarity scores for a diverse selection of sentence pairs. [Pearson correlation](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) is then used to evaluate the quality of the machine similarity scores against human judgements." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q5nuBbI1iFQR" + }, + "source": [ + "### Download data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VOs8ZfOnJeBF" + }, + "outputs": [], + "source": [ + "import pandas\n", + "import scipy\n", + "import math\n", + "import csv\n", + "\n", + "sts_dataset = tf.keras.utils.get_file(\n", + " fname=\"Stsbenchmark.tar.gz\",\n", + " origin=\"http://ixa2.si.ehu.es/stswiki/images/4/48/Stsbenchmark.tar.gz/",\n", + " extract=True)\n", + "sts_dev = pandas.read_table(\n", + " os.path.join(os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-dev.csv\"),\n", + " skip_blank_lines=True,\n", + " usecols=[4, 5, 6],\n", + " names=[\"sim\", \"sent_1\", \"sent_2\"])\n", + "sts_test = pandas.read_table(\n", + " os.path.join(\n", + " os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-test.csv\"),\n", + " quoting=csv.QUOTE_NONE,\n", + " skip_blank_lines=True,\n", + " usecols=[4, 5, 6],\n", + " names=[\"sim\", \"sent_1\", \"sent_2\"])\n", + "# cleanup some NaN values in sts_dev\n", + "sts_dev = sts_dev[[isinstance(s, str) for s in sts_dev['sent_2']]]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8OKy8WhnKRe_" + }, + "source": [ + "### Evaluate Sentence Embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "W-q2r7jyZGb7" + }, + "outputs": [], + "source": [ + "sts_data = sts_dev #@param [\"sts_dev\", \"sts_test\"] {type:\"raw\"}\n", + "\n", + "def run_sts_benchmark(batch):\n", + " sts_encode1 = tf.nn.l2_normalize(embed(tf.constant(batch['sent_1'].tolist())), axis=1)\n", + " sts_encode2 = tf.nn.l2_normalize(embed(tf.constant(batch['sent_2'].tolist())), axis=1)\n", + " cosine_similarities = tf.reduce_sum(tf.multiply(sts_encode1, sts_encode2), axis=1)\n", + " clip_cosine_similarities = tf.clip_by_value(cosine_similarities, -1.0, 1.0)\n", + " scores = 1.0 - tf.acos(clip_cosine_similarities) / math.pi\n", + " \"\"\"Returns the similarity scores\"\"\"\n", + " return scores\n", + "\n", + "dev_scores = sts_data['sim'].tolist()\n", + "scores = []\n", + "for batch in np.array_split(sts_data, 10):\n", + " scores.extend(run_sts_benchmark(batch))\n", + "\n", + "pearson_correlation = scipy.stats.pearsonr(scores, dev_scores)\n", + "print('Pearson correlation coefficient = {0}\\np-value = {1}'.format(\n", + " pearson_correlation[0], pearson_correlation[1]))" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "RUymE2l9GZfO" + ], + "name": "semantic_similarity_with_tf_hub_universal_encoder.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb new file mode 100644 index 00000000000..78d4eebadb0 --- /dev/null +++ b/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb @@ -0,0 +1,537 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "IJhWonqQN7u0" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MegtYH2UN8tT" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MlHqSdgSEwPE" + }, + "source": [ + "# Universal Sentence Encoder-Lite demo\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "j0HuiScHQ3OK" + }, + "source": [ + "This Colab illustrates how to use the Universal Sentence Encoder-Lite for sentence similarity task. This module is very similar to [Universal Sentence Encoder](https://www.tensorflow.org/hub/modules/google/universal-sentence-encoder/2) with the only difference that you need to run [SentencePiece](https://github.com/google/sentencepiece) processing on your input sentences.\n", + "\n", + "The Universal Sentence Encoder makes getting sentence level embeddings as easy as it has historically been to lookup the embeddings for individual words. The sentence embeddings can then be trivially used to compute sentence level meaning similarity as well as to enable better performance on downstream classification tasks using less supervised training data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wqCB2pyK-WSU" + }, + "source": [ + "# Getting started" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rWeEjoO5M0Cx" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f5_potQBMzcU" + }, + "outputs": [], + "source": [ + "# Install seaborn for pretty visualizations\n", + "!pip3 install --quiet seaborn\n", + "# Install SentencePiece package\n", + "# SentencePiece package is needed for Universal Sentence Encoder Lite. We'll\n", + "# use it for all the text processing and sentence feature ID lookup.\n", + "!pip3 install --quiet sentencepiece" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dMTa6V4a-cmf" + }, + "outputs": [], + "source": [ + "from absl import logging\n", + "\n", + "import tensorflow.compat.v1 as tf\n", + "tf.disable_v2_behavior()\n", + "\n", + "import tensorflow_hub as hub\n", + "import sentencepiece as spm\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", + "import re\n", + "import seaborn as sns" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WPXYQDBiFJHd" + }, + "source": [ + "## Load the module from TF-Hub" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HEWUT-lmAkxM" + }, + "outputs": [], + "source": [ + "module = hub.Module(\"https://tfhub.dev/google/universal-sentence-encoder-lite/2/")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5277Z-9qARYF" + }, + "outputs": [], + "source": [ + "input_placeholder = tf.sparse_placeholder(tf.int64, shape=[None, None])\n", + "encodings = module(\n", + " inputs=dict(\n", + " values=input_placeholder.values,\n", + " indices=input_placeholder.indices,\n", + " dense_shape=input_placeholder.dense_shape))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Yydbhuba_nek" + }, + "source": [ + "## Load SentencePiece model from the TF-Hub Module\n", + "The SentencePiece model is conveniently stored inside the module's assets. It has to be loaded in order to initialize the processor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2CyUjKzE_tcJ" + }, + "outputs": [], + "source": [ + "with tf.Session() as sess:\n", + " spm_path = sess.run(module(signature=\"spm_path\"))\n", + "\n", + "sp = spm.SentencePieceProcessor()\n", + "with tf.io.gfile.GFile(spm_path, mode=\"rb\") as f:\n", + " sp.LoadFromSerializedProto(f.read())\n", + "print(\"SentencePiece model loaded at {}.\".format(spm_path))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6y5kkN-l-5QV" + }, + "outputs": [], + "source": [ + "def process_to_IDs_in_sparse_format(sp, sentences):\n", + " # An utility method that processes sentences with the sentence piece processor\n", + " # 'sp' and returns the results in tf.SparseTensor-similar format:\n", + " # (values, indices, dense_shape)\n", + " ids = [sp.EncodeAsIds(x) for x in sentences]\n", + " max_len = max(len(x) for x in ids)\n", + " dense_shape=(len(ids), max_len)\n", + " values=[item for sublist in ids for item in sublist]\n", + " indices=[[row,col] for row in range(len(ids)) for col in range(len(ids[row]))]\n", + " return (values, indices, dense_shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PVpHEWrPAdxR" + }, + "source": [ + "### Test the module with a few examples" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pSkjuGYoCBfU" + }, + "outputs": [], + "source": [ + "# Compute a representation for each message, showing various lengths supported.\n", + "word = \"Elephant\"\n", + "sentence = \"I am a sentence for which I would like to get its embedding.\"\n", + "paragraph = (\n", + " \"Universal Sentence Encoder embeddings also support short paragraphs. \"\n", + " \"There is no hard limit on how long the paragraph is. Roughly, the longer \"\n", + " \"the more 'diluted' the embedding will be.\")\n", + "messages = [word, sentence, paragraph]\n", + "\n", + "values, indices, dense_shape = process_to_IDs_in_sparse_format(sp, messages)\n", + "\n", + "# Reduce logging output.\n", + "logging.set_verbosity(logging.ERROR)\n", + "\n", + "with tf.Session() as session:\n", + " session.run([tf.global_variables_initializer(), tf.tables_initializer()])\n", + " message_embeddings = session.run(\n", + " encodings,\n", + " feed_dict={input_placeholder.values: values,\n", + " input_placeholder.indices: indices,\n", + " input_placeholder.dense_shape: dense_shape})\n", + "\n", + " for i, message_embedding in enumerate(np.array(message_embeddings).tolist()):\n", + " print(\"Message: {}\".format(messages[i]))\n", + " print(\"Embedding size: {}\".format(len(message_embedding)))\n", + " message_embedding_snippet = \", \".join(\n", + " (str(x) for x in message_embedding[:3]))\n", + " print(\"Embedding: [{}, ...]\\n\".format(message_embedding_snippet))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "46jrIgHyFDz9" + }, + "source": [ + "# Semantic Textual Similarity (STS) task example\n", + "\n", + "The embeddings produced by the Universal Sentence Encoder are approximately normalized. The semantic similarity of two sentences can be trivially computed as the inner product of the encodings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OIQudHgWBGSk" + }, + "outputs": [], + "source": [ + "def plot_similarity(labels, features, rotation):\n", + " corr = np.inner(features, features)\n", + " sns.set(font_scale=1.2)\n", + " g = sns.heatmap(\n", + " corr,\n", + " xticklabels=labels,\n", + " yticklabels=labels,\n", + " vmin=0,\n", + " vmax=1,\n", + " cmap=\"YlOrRd\")\n", + " g.set_xticklabels(labels, rotation=rotation)\n", + " g.set_title(\"Semantic Textual Similarity\")\n", + "\n", + "\n", + "def run_and_plot(session, input_placeholder, messages):\n", + " values, indices, dense_shape = process_to_IDs_in_sparse_format(sp,messages)\n", + "\n", + " message_embeddings = session.run(\n", + " encodings,\n", + " feed_dict={input_placeholder.values: values,\n", + " input_placeholder.indices: indices,\n", + " input_placeholder.dense_shape: dense_shape})\n", + " \n", + " plot_similarity(messages, message_embeddings, 90)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wlDqttNcE0Bx" + }, + "source": [ + "## Similarity visualized\n", + "Here we show the similarity in a heat map. The final graph is a 9x9 matrix where each entry `[i, j]` is colored based on the inner product of the encodings for sentence `i` and `j`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_GSCW5QIBKVe" + }, + "outputs": [], + "source": [ + "messages = [\n", + " # Smartphones\n", + " \"I like my phone\",\n", + " \"My phone is not good.\",\n", + " \"Your cellphone looks great.\",\n", + "\n", + " # Weather\n", + " \"Will it snow tomorrow?\",\n", + " \"Recently a lot of hurricanes have hit the US\",\n", + " \"Global warming is real\",\n", + "\n", + " # Food and health\n", + " \"An apple a day, keeps the doctors away\",\n", + " \"Eating strawberries is healthy\",\n", + " \"Is paleo better than keto?\",\n", + "\n", + " # Asking about age\n", + " \"How old are you?\",\n", + " \"what is your age?\",\n", + "]\n", + "\n", + "\n", + "with tf.Session() as session:\n", + " session.run(tf.global_variables_initializer())\n", + " session.run(tf.tables_initializer())\n", + " run_and_plot(session, input_placeholder, messages)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QkZ4sRBYBnL8" + }, + "source": [ + "## Evaluation: STS (Semantic Textual Similarity) Benchmark\n", + "\n", + "The [**STS Benchmark**](https://ixa2.si.ehu.es/stswiki/index.php/STSbenchmark) provides an intristic evaluation of the degree to which similarity scores computed using sentence embeddings align with human judgements. The benchmark requires systems to return similarity scores for a diverse selection of sentence pairs. [Pearson correlation](https://en.wikipedia.org/wiki/Pearson_correlation_coefficient) is then used to evaluate the quality of the machine similarity scores against human judgements." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kNMVfSelBsHW" + }, + "source": [ + "### Download data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8zAWVzBMBptq" + }, + "outputs": [], + "source": [ + "import pandas\n", + "import scipy\n", + "import math\n", + "\n", + "\n", + "def load_sts_dataset(filename):\n", + " # Loads a subset of the STS dataset into a DataFrame. In particular both\n", + " # sentences and their human rated similarity score.\n", + " sent_pairs = []\n", + " with tf.gfile.GFile(filename, \"r\") as f:\n", + " for line in f:\n", + " ts = line.strip().split(\"\\t\")\n", + " # (sent_1, sent_2, similarity_score)\n", + " sent_pairs.append((ts[5], ts[6], float(ts[4])))\n", + " return pandas.DataFrame(sent_pairs, columns=[\"sent_1\", \"sent_2\", \"sim\"])\n", + "\n", + "\n", + "def download_and_load_sts_data():\n", + " sts_dataset = tf.keras.utils.get_file(\n", + " fname=\"Stsbenchmark.tar.gz\",\n", + " origin=\"http://ixa2.si.ehu.es/stswiki/images/4/48/Stsbenchmark.tar.gz/",\n", + " extract=True)\n", + "\n", + " sts_dev = load_sts_dataset(\n", + " os.path.join(os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-dev.csv\"))\n", + " sts_test = load_sts_dataset(\n", + " os.path.join(\n", + " os.path.dirname(sts_dataset), \"stsbenchmark\", \"sts-test.csv\"))\n", + "\n", + " return sts_dev, sts_test\n", + "\n", + "\n", + "sts_dev, sts_test = download_and_load_sts_data()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l8lEawD6B4Fr" + }, + "source": [ + "### Build evaluation graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "etiZUkP-B6bR" + }, + "outputs": [], + "source": [ + "sts_input1 = tf.sparse_placeholder(tf.int64, shape=(None, None))\n", + "sts_input2 = tf.sparse_placeholder(tf.int64, shape=(None, None))\n", + "\n", + "# For evaluation we use exactly normalized rather than\n", + "# approximately normalized.\n", + "sts_encode1 = tf.nn.l2_normalize(\n", + " module(\n", + " inputs=dict(values=sts_input1.values,\n", + " indices=sts_input1.indices,\n", + " dense_shape=sts_input1.dense_shape)),\n", + " axis=1)\n", + "sts_encode2 = tf.nn.l2_normalize(\n", + " module(\n", + " inputs=dict(values=sts_input2.values,\n", + " indices=sts_input2.indices,\n", + " dense_shape=sts_input2.dense_shape)),\n", + " axis=1)\n", + "\n", + "sim_scores = -tf.acos(tf.reduce_sum(tf.multiply(sts_encode1, sts_encode2), axis=1))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e4Q34ssLB-rw" + }, + "source": [ + "### Evaluate sentence embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "-vRFEFPJPyeF" + }, + "outputs": [], + "source": [ + "#@title Choose dataset for benchmark\n", + "dataset = sts_dev #@param [\"sts_dev\", \"sts_test\"] {type:\"raw\"}\n", + "\n", + "values1, indices1, dense_shape1 = process_to_IDs_in_sparse_format(sp, dataset['sent_1'].tolist())\n", + "values2, indices2, dense_shape2 = process_to_IDs_in_sparse_format(sp, dataset['sent_2'].tolist())\n", + "similarity_scores = dataset['sim'].tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_QJ2DI85CBDh" + }, + "outputs": [], + "source": [ + "def run_sts_benchmark(session):\n", + " \"\"\"Returns the similarity scores\"\"\"\n", + " scores = session.run(\n", + " sim_scores,\n", + " feed_dict={\n", + " sts_input1.values: values1,\n", + " sts_input1.indices: indices1,\n", + " sts_input1.dense_shape: dense_shape1,\n", + " sts_input2.values: values2,\n", + " sts_input2.indices: indices2,\n", + " sts_input2.dense_shape: dense_shape2,\n", + " })\n", + " return scores\n", + "\n", + "\n", + "with tf.Session() as session:\n", + " session.run(tf.global_variables_initializer())\n", + " session.run(tf.tables_initializer())\n", + " scores = run_sts_benchmark(session)\n", + "\n", + "pearson_correlation = scipy.stats.pearsonr(scores, similarity_scores)\n", + "print('Pearson correlation coefficient = {0}\\np-value = {1}'.format(\n", + " pearson_correlation[0], pearson_correlation[1]))" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "IJhWonqQN7u0" + ], + "name": "semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb b/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb new file mode 100644 index 00000000000..c33dce64c92 --- /dev/null +++ b/site/en/hub/tutorials/senteval_for_universal_sentence_encoder_cmlm.ipynb @@ -0,0 +1,248 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "CGyzr0tfeUTQ" + }, + "source": [ + "**Copyright 2021 The TensorFlow Hub Authors.**\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zV1OQAGReaGQ" + }, + "outputs": [], + "source": [ + "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L5bsDhkRfTpq" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "owWqOcw1e-RZ" + }, + "source": [ + "# Universal Sentence Encoder SentEval demo\n", + "This colab demostrates the [Universal Sentence Encoder CMLM model](https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1) using the [SentEval](https://github.com/facebookresearch/SentEval) toolkit, which is a library for measuring the quality of sentence embeddings. The SentEval toolkit includes a diverse set of downstream tasks that are able to evaluate the generalization power of an embedding model and to evaluate the linguistic properties encoded.\n", + "\n", + "Run the first two code blocks to setup the environment, in the third code block you can pick a SentEval task to evaluate the model. A GPU runtime is recommended to run this Colab.\n", + "\n", + "To learn more about the Universal Sentence Encoder CMLM model, see https://openreview.net/forum?id=WDVD4lUCTzU." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-CerULCLsjzV" + }, + "outputs": [], + "source": [ + "#@title Install dependencies\n", + "!pip install --quiet \"tensorflow-text==2.11.*\"\n", + "!pip install --quiet torch==1.8.1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LjqkqD6aiZGU" + }, + "source": [ + "## Download SentEval and task data\n", + "This step download SentEval from github and execute the data script to download the task data. It may take up to 5 minutes to complete." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3UwhHQiKJmSc" + }, + "outputs": [], + "source": [ + "#@title Install SentEval and download task data\n", + "!rm -rf ./SentEval\n", + "!git clone https://github.com/facebookresearch/SentEval.git\n", + "!cd $PWD/SentEval/data/downstream && bash get_transfer_data.bash > /dev/null 2>&1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7a2ohPn8vMe2" + }, + "source": [ + "#Execute a SentEval evaluation task\n", + "The following code block executes a SentEval task and output the results, choose one of the following tasks to evaluate the USE CMLM model:\n", + "\n", + "```\n", + "MR\tCR\tSUBJ\tMPQA\tSST\tTREC\tMRPC\tSICK-E\n", + "```\n", + "\n", + "Select a model, params and task to run. The rapid prototyping params can be used for reducing computation time for faster result.\n", + "\n", + "It typically takes 5-15 mins to complete a task with the **'rapid prototyping'** params and up to an hour with the **'slower, best performance'** params.\n", + "\n", + "```\n", + "params = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}\n", + "params['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,\n", + " 'tenacity': 3, 'epoch_size': 2}\n", + "```\n", + "\n", + "For better result, use the slower **'slower, best performance'** params, computation may take up to 1 hour:\n", + "\n", + "```\n", + "params = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 10}\n", + "params['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': 16,\n", + " 'tenacity': 5, 'epoch_size': 6}\n", + "```\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nenCcawjwowt" + }, + "outputs": [], + "source": [ + "import os\n", + "os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'\n", + "\n", + "import sys\n", + "sys.path.append(f'{os.getcwd()}/SentEval')\n", + "\n", + "import tensorflow as tf\n", + "\n", + "# Prevent TF from claiming all GPU memory so there is some left for pytorch.\n", + "gpus = tf.config.list_physical_devices('GPU')\n", + "if gpus:\n", + " # Memory growth needs to be the same across GPUs.\n", + " for gpu in gpus:\n", + " tf.config.experimental.set_memory_growth(gpu, True)\n", + "\n", + "import tensorflow_hub as hub\n", + "import tensorflow_text\n", + "import senteval\n", + "import time\n", + "\n", + "PATH_TO_DATA = f'{os.getcwd()}/SentEval/data'\n", + "MODEL = 'https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1' #@param ['https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1', 'https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-large/1']\n", + "PARAMS = 'rapid prototyping' #@param ['slower, best performance', 'rapid prototyping']\n", + "TASK = 'CR' #@param ['CR','MR', 'MPQA', 'MRPC', 'SICKEntailment', 'SNLI', 'SST2', 'SUBJ', 'TREC']\n", + "\n", + "params_prototyping = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 5}\n", + "params_prototyping['classifier'] = {'nhid': 0, 'optim': 'rmsprop', 'batch_size': 128,\n", + " 'tenacity': 3, 'epoch_size': 2}\n", + "\n", + "params_best = {'task_path': PATH_TO_DATA, 'usepytorch': True, 'kfold': 10}\n", + "params_best['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': 16,\n", + " 'tenacity': 5, 'epoch_size': 6}\n", + "\n", + "params = params_best if PARAMS == 'slower, best performance' else params_prototyping\n", + "\n", + "preprocessor = hub.KerasLayer(\n", + " \"https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3/")\n", + "encoder = hub.KerasLayer(\n", + " \"https://tfhub.dev/google/universal-sentence-encoder-cmlm/en-base/1/")\n", + "\n", + "inputs = tf.keras.Input(shape=tf.shape(''), dtype=tf.string)\n", + "outputs = encoder(preprocessor(inputs))\n", + "\n", + "model = tf.keras.Model(inputs=inputs, outputs=outputs)\n", + "\n", + "def prepare(params, samples):\n", + " return\n", + "\n", + "def batcher(_, batch):\n", + " batch = [' '.join(sent) if sent else '.' for sent in batch]\n", + " return model.predict(tf.constant(batch))[\"default\"]\n", + "\n", + "\n", + "se = senteval.engine.SE(params, batcher, prepare)\n", + "print(\"Evaluating task %s with %s parameters\" % (TASK, PARAMS))\n", + "start = time.time()\n", + "results = se.eval(TASK)\n", + "end = time.time()\n", + "print('Time took on task %s : %.1f. seconds' % (TASK, end - start))\n", + "print(results)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SNvsY6Hsvs0_" + }, + "source": [ + "#Learn More\n", + "\n", + "* Find more text embedding models on [TensorFlow Hub](https://tfhub.dev)\n", + "* See also the [Multilingual Universal Sentence Encoder CMLM model](https://tfhub.dev/google/universal-sentence-encoder-cmlm/multilingual-base-br/1)\n", + "* Check out other [Universal Sentence Encoder models](https://tfhub.dev/google/collections/universal-sentence-encoder/1)\n", + "\n", + "## Reference\n", + "\n", + "* Ziyi Yang, Yinfei Yang, Daniel Cer, Jax Law, Eric Darve. [Universal Sentence Representations Learning with Conditional Masked Language Model. November 2020](https://openreview.net/forum?id=WDVD4lUCTzU)\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "senteval_for_universal_sentence_encoder_cmlm.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/spice.ipynb b/site/en/hub/tutorials/spice.ipynb new file mode 100644 index 00000000000..9ff6cd3bd62 --- /dev/null +++ b/site/en/hub/tutorials/spice.ipynb @@ -0,0 +1,937 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "aXehiGc3Kr2I" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "-6LKjmi8Ktoh" + }, + "outputs": [], + "source": [ + "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sPQKw4x4bL8w" + }, + "source": [ + "# Pitch Detection with SPICE\n", + "\n", + "This colab will show you how to use the SPICE model downloaded from TensorFlow Hub." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rfKwZlPnPwD1" + }, + "outputs": [], + "source": [ + "!sudo apt-get install -q -y timidity libsndfile1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dYrIdOS8SW3b" + }, + "outputs": [], + "source": [ + "# All the imports to deal with sound data\n", + "!pip install pydub librosa music21" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p09o78LGYdnz" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import librosa\n", + "from librosa import display as librosadisplay\n", + "\n", + "import logging\n", + "import math\n", + "import statistics\n", + "import sys\n", + "\n", + "from IPython.display import Audio, Javascript\n", + "from scipy.io import wavfile\n", + "\n", + "from base64 import b64decode\n", + "\n", + "import music21\n", + "from pydub import AudioSegment\n", + "\n", + "logger = logging.getLogger()\n", + "logger.setLevel(logging.ERROR)\n", + "\n", + "print(\"tensorflow: %s\" % tf.__version__)\n", + "#print(\"librosa: %s\" % librosa.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wHxox8hXc3w1" + }, + "source": [ + "# The audio input file\n", + "Now the hardest part: Record your singing! :)\n", + "\n", + "We provide four methods to obtain an audio file:\n", + "\n", + "1. Record audio directly in colab\n", + "2. Upload from your computer\n", + "3. Use a file saved on Google Drive\n", + "4. Download the file from the web\n", + "\n", + "Choose one of the four methods below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "HaCAHOqiVu5B" + }, + "outputs": [], + "source": [ + "#@title [Run this] Definition of the JS code to record audio straight from the browser\n", + "\n", + "RECORD = \"\"\"\n", + "const sleep = time => new Promise(resolve => setTimeout(resolve, time))\n", + "const b2text = blob => new Promise(resolve => {\n", + " const reader = new FileReader()\n", + " reader.onloadend = e => resolve(e.srcElement.result)\n", + " reader.readAsDataURL(blob)\n", + "})\n", + "var record = time => new Promise(async resolve => {\n", + " stream = await navigator.mediaDevices.getUserMedia({ audio: true })\n", + " recorder = new MediaRecorder(stream)\n", + " chunks = []\n", + " recorder.ondataavailable = e => chunks.push(e.data)\n", + " recorder.start()\n", + " await sleep(time)\n", + " recorder.onstop = async ()=>{\n", + " blob = new Blob(chunks)\n", + " text = await b2text(blob)\n", + " resolve(text)\n", + " }\n", + " recorder.stop()\n", + "})\n", + "\"\"\"\n", + "\n", + "def record(sec=5):\n", + " try:\n", + " from google.colab import output\n", + " except ImportError:\n", + " print('No possible to import output from google.colab')\n", + " return ''\n", + " else:\n", + " print('Recording')\n", + " display(Javascript(RECORD))\n", + " s = output.eval_js('record(%d)' % (sec*1000))\n", + " fname = 'recorded_audio.wav'\n", + " print('Saving to', fname)\n", + " b = b64decode(s.split(',')[1])\n", + " with open(fname, 'wb') as f:\n", + " f.write(b)\n", + " return fname" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "sBpWWkTzfUYR" + }, + "outputs": [], + "source": [ + "#@title Select how to input your audio { run: \"auto\" }\n", + "INPUT_SOURCE = 'https://storage.googleapis.com/download.tensorflow.org/data/c-scale-metronome.wav' #@param [\"https://storage.googleapis.com/download.tensorflow.org/data/c-scale-metronome.wav/", \"RECORD\", \"UPLOAD\", \"./drive/My Drive/YOUR_MUSIC_FILE.wav\"] {allow-input: true}\n", + "\n", + "print('You selected', INPUT_SOURCE)\n", + "\n", + "if INPUT_SOURCE == 'RECORD':\n", + " uploaded_file_name = record(5)\n", + "elif INPUT_SOURCE == 'UPLOAD':\n", + " try:\n", + " from google.colab import files\n", + " except ImportError:\n", + " print(\"ImportError: files from google.colab seems to not be available\")\n", + " else:\n", + " uploaded = files.upload()\n", + " for fn in uploaded.keys():\n", + " print('User uploaded file \"{name}\" with length {length} bytes'.format(\n", + " name=fn, length=len(uploaded[fn])))\n", + " uploaded_file_name = next(iter(uploaded))\n", + " print('Uploaded file: ' + uploaded_file_name)\n", + "elif INPUT_SOURCE.startswith('./drive/'):\n", + " try:\n", + " from google.colab import drive\n", + " except ImportError:\n", + " print(\"ImportError: files from google.colab seems to not be available\")\n", + " else:\n", + " drive.mount('/content/drive')\n", + " # don't forget to change the name of the file you\n", + " # will you here!\n", + " gdrive_audio_file = 'YOUR_MUSIC_FILE.wav'\n", + " uploaded_file_name = INPUT_SOURCE\n", + "elif INPUT_SOURCE.startswith('http'):\n", + " !wget --no-check-certificate 'https://storage.googleapis.com/download.tensorflow.org/data/c-scale-metronome.wav' -O c-scale.wav\n", + " uploaded_file_name = 'c-scale.wav'\n", + "else:\n", + " print('Unrecognized input format!')\n", + " print('Please select \"RECORD\", \"UPLOAD\", or specify a file hosted on Google Drive or a file from the web to download file to download')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4S2BvIoDf9nf" + }, + "source": [ + "# Preparing the audio data\n", + "\n", + "Now we have the audio, let's convert it to the expected format and then listen to it!\n", + "\n", + "The SPICE model needs as input an audio file at a sampling rate of 16kHz and with only one channel (mono). \n", + "\n", + "To help you with this part, we created a function (`convert_audio_for_model`) to convert any wav file you have to the model's expected format:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bQ1362i-JoFI" + }, + "outputs": [], + "source": [ + "# Function that converts the user-created audio to the format that the model \n", + "# expects: bitrate 16kHz and only one channel (mono).\n", + "\n", + "EXPECTED_SAMPLE_RATE = 16000\n", + "\n", + "def convert_audio_for_model(user_file, output_file='converted_audio_file.wav'):\n", + " audio = AudioSegment.from_file(user_file)\n", + " audio = audio.set_frame_rate(EXPECTED_SAMPLE_RATE).set_channels(1)\n", + " audio.export(output_file, format=\"wav\")\n", + " return output_file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oL9pftZ2nPm9" + }, + "outputs": [], + "source": [ + "# Converting to the expected format for the model\n", + "# in all the input 4 input method before, the uploaded file name is at\n", + "# the variable uploaded_file_name\n", + "converted_audio_file = convert_audio_for_model(uploaded_file_name)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TslkX2AOZN0p" + }, + "outputs": [], + "source": [ + "# Loading audio samples from the wav file:\n", + "sample_rate, audio_samples = wavfile.read(converted_audio_file, 'rb')\n", + "\n", + "# Show some basic information about the audio.\n", + "duration = len(audio_samples)/sample_rate\n", + "print(f'Sample rate: {sample_rate} Hz')\n", + "print(f'Total duration: {duration:.2f}s')\n", + "print(f'Size of the input: {len(audio_samples)}')\n", + "\n", + "# Let's listen to the wav file.\n", + "Audio(audio_samples, rate=sample_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iBicZu5AgcpR" + }, + "source": [ + "First thing, let's take a look at the waveform of our singing." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aAa2M3CLZcWW" + }, + "outputs": [], + "source": [ + "# We can visualize the audio as a waveform.\n", + "_ = plt.plot(audio_samples)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J1eI0b8qgn08" + }, + "source": [ + "A more informative visualization is the [spectrogram](https://en.wikipedia.org/wiki/Spectrogram), which shows frequencies present over time.\n", + "\n", + "Here, we use a logarithmic frequency scale, to make the singing more clearly visible.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fGR4UZtpZvWI" + }, + "outputs": [], + "source": [ + "MAX_ABS_INT16 = 32768.0\n", + "\n", + "def plot_stft(x, sample_rate, show_black_and_white=False):\n", + " x_stft = np.abs(librosa.stft(x, n_fft=2048))\n", + " fig, ax = plt.subplots()\n", + " fig.set_size_inches(20, 10)\n", + " x_stft_db = librosa.amplitude_to_db(x_stft, ref=np.max)\n", + " if(show_black_and_white):\n", + " librosadisplay.specshow(data=x_stft_db, y_axis='log', \n", + " sr=sample_rate, cmap='gray_r')\n", + " else:\n", + " librosadisplay.specshow(data=x_stft_db, y_axis='log', sr=sample_rate)\n", + "\n", + " plt.colorbar(format='%+2.0f dB')\n", + "\n", + "plot_stft(audio_samples / MAX_ABS_INT16 , sample_rate=EXPECTED_SAMPLE_RATE)\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MGCzo_cjjH-7" + }, + "source": [ + "We need one last conversion here. The audio samples are in int16 format. They need to be normalized to floats between -1 and 1." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dv4H4O1Xb8T8" + }, + "outputs": [], + "source": [ + "audio_samples = audio_samples / float(MAX_ABS_INT16)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yTdo_TwljVUV" + }, + "source": [ + "# Executing the Model\n", + "Now is the easy part, let's load the model with **TensorFlow Hub**, and feed the audio to it.\n", + "SPICE will give us two outputs: pitch and uncertainty\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xUptYSTAbc3I" + }, + "source": [ + "**TensorFlow Hub** is a library for the publication, discovery, and consumption of reusable parts of machine learning models. It makes easy to use machine learning to solve your challenges.\n", + "\n", + "To load the model you just need the Hub module and the URL pointing to the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ri0A0DSXY_Yd" + }, + "outputs": [], + "source": [ + "# Loading the SPICE model is easy:\n", + "model = hub.load(\"https://tfhub.dev/google/spice/2/")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kQV5H6J4suMT" + }, + "source": [ + "**Note:** An interesting detail here is that all the model urls from Hub can be used for download and also to read the documentation, so if you point your browser to that link you can read documentation on how to use the model and learn more about how it was trained." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GUVICjIps9hI" + }, + "source": [ + "With the model loaded, data prepared, we need 3 lines to get the result: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tP55fXBYcBhb" + }, + "outputs": [], + "source": [ + "# We now feed the audio to the SPICE tf.hub model to obtain pitch and uncertainty outputs as tensors.\n", + "model_output = model.signatures[\"serving_default\"](tf.constant(audio_samples, tf.float32))\n", + "\n", + "pitch_outputs = model_output[\"pitch\"]\n", + "uncertainty_outputs = model_output[\"uncertainty\"]\n", + "\n", + "# 'Uncertainty' basically means the inverse of confidence.\n", + "confidence_outputs = 1.0 - uncertainty_outputs\n", + "\n", + "fig, ax = plt.subplots()\n", + "fig.set_size_inches(20, 10)\n", + "plt.plot(pitch_outputs, label='pitch')\n", + "plt.plot(confidence_outputs, label='confidence')\n", + "plt.legend(loc=\"lower right\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "blJwFWR4kMul" + }, + "source": [ + "Let's make the results easier to understand by removing all pitch estimates with low confidence (confidence < 0.9) and plot the remaining ones.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d1MRmcm2cEkM" + }, + "outputs": [], + "source": [ + "confidence_outputs = list(confidence_outputs)\n", + "pitch_outputs = [ float(x) for x in pitch_outputs]\n", + "\n", + "indices = range(len (pitch_outputs))\n", + "confident_pitch_outputs = [ (i,p) \n", + " for i, p, c in zip(indices, pitch_outputs, confidence_outputs) if c >= 0.9 ]\n", + "confident_pitch_outputs_x, confident_pitch_outputs_y = zip(*confident_pitch_outputs)\n", + " \n", + "fig, ax = plt.subplots()\n", + "fig.set_size_inches(20, 10)\n", + "ax.set_ylim([0, 1])\n", + "plt.scatter(confident_pitch_outputs_x, confident_pitch_outputs_y, )\n", + "plt.scatter(confident_pitch_outputs_x, confident_pitch_outputs_y, c=\"r\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vNBZ7ZblkxOm" + }, + "source": [ + "The pitch values returned by SPICE are in the range from 0 to 1. Let's convert them to absolute pitch values in Hz." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "n-CnpKzmcQi9" + }, + "outputs": [], + "source": [ + "def output2hz(pitch_output):\n", + " # Constants taken from https://tfhub.dev/google/spice/2\n", + " PT_OFFSET = 25.58\n", + " PT_SLOPE = 63.07\n", + " FMIN = 10.0;\n", + " BINS_PER_OCTAVE = 12.0;\n", + " cqt_bin = pitch_output * PT_SLOPE + PT_OFFSET;\n", + " return FMIN * 2.0 ** (1.0 * cqt_bin / BINS_PER_OCTAVE)\n", + " \n", + "confident_pitch_values_hz = [ output2hz(p) for p in confident_pitch_outputs_y ]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "24yK0a6HjCSZ" + }, + "source": [ + "Now, let's see how good the prediction is: We will overlay the predicted pitches over the original spectrogram. To make the pitch predictions more visible, we changed the spectrogram to black and white." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "L1kaAcX9rrDo" + }, + "outputs": [], + "source": [ + "plot_stft(audio_samples / MAX_ABS_INT16 , \n", + " sample_rate=EXPECTED_SAMPLE_RATE, show_black_and_white=True)\n", + "# Note: conveniently, since the plot is in log scale, the pitch outputs \n", + "# also get converted to the log scale automatically by matplotlib.\n", + "plt.scatter(confident_pitch_outputs_x, confident_pitch_values_hz, c=\"r\")\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NskqpiHLxq6V" + }, + "source": [ + "# Converting to musical notes\n", + "\n", + "Now that we have the pitch values, let's convert them to notes!\n", + "This is part is challenging by itself. We have to take into account two things:\n", + "1. the rests (when there's no singing) \n", + "2. the size of each note (offsets) " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KDOlm9PLTTjt" + }, + "source": [ + "### 1: Adding zeros to the output to indicate when there's no singing" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9uSQ3bJmTZmo" + }, + "outputs": [], + "source": [ + "pitch_outputs_and_rests = [\n", + " output2hz(p) if c >= 0.9 else 0\n", + " for i, p, c in zip(indices, pitch_outputs, confidence_outputs)\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9fM0UwlsTt4w" + }, + "source": [ + "### 2: Adding note offsets\n", + "\n", + "When a person sings freely, the melody may have an offset to the absolute pitch values that notes can represent.\n", + "Hence, to convert predictions to notes, one needs to correct for this possible offset.\n", + "This is what the following code computes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fsJu-P5ksdFW" + }, + "outputs": [], + "source": [ + "A4 = 440\n", + "C0 = A4 * pow(2, -4.75)\n", + "note_names = [\"C\", \"C#\", \"D\", \"D#\", \"E\", \"F\", \"F#\", \"G\", \"G#\", \"A\", \"A#\", \"B\"]\n", + "\n", + "def hz2offset(freq):\n", + " # This measures the quantization error for a single note.\n", + " if freq == 0: # Rests always have zero error.\n", + " return None\n", + " # Quantized note.\n", + " h = round(12 * math.log2(freq / C0))\n", + " return 12 * math.log2(freq / C0) - h\n", + "\n", + "\n", + "# The ideal offset is the mean quantization error for all the notes\n", + "# (excluding rests):\n", + "offsets = [hz2offset(p) for p in pitch_outputs_and_rests if p != 0]\n", + "print(\"offsets: \", offsets)\n", + "\n", + "ideal_offset = statistics.mean(offsets)\n", + "print(\"ideal offset: \", ideal_offset)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K17It_qT2DtE" + }, + "source": [ + "We can now use some heuristics to try and estimate the most likely sequence of notes that were sung.\n", + "The ideal offset computed above is one ingredient - but we also need to know the speed (how many predictions make, say, an eighth?), and the time offset to start quantizing. To keep it simple, we'll just try different speeds and time offsets and measure the quantization error, using in the end the values that minimize this error." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eMULTI4L52ZHA" + }, + "outputs": [], + "source": [ + "def quantize_predictions(group, ideal_offset):\n", + " # Group values are either 0, or a pitch in Hz.\n", + " non_zero_values = [v for v in group if v != 0]\n", + " zero_values_count = len(group) - len(non_zero_values)\n", + "\n", + " # Create a rest if 80% is silent, otherwise create a note.\n", + " if zero_values_count > 0.8 * len(group):\n", + " # Interpret as a rest. Count each dropped note as an error, weighted a bit\n", + " # worse than a badly sung note (which would 'cost' 0.5).\n", + " return 0.51 * len(non_zero_values), \"Rest\"\n", + " else:\n", + " # Interpret as note, estimating as mean of non-rest predictions.\n", + " h = round(\n", + " statistics.mean([\n", + " 12 * math.log2(freq / C0) - ideal_offset for freq in non_zero_values\n", + " ]))\n", + " octave = h // 12\n", + " n = h % 12\n", + " note = note_names[n] + str(octave)\n", + " # Quantization error is the total difference from the quantized note.\n", + " error = sum([\n", + " abs(12 * math.log2(freq / C0) - ideal_offset - h)\n", + " for freq in non_zero_values\n", + " ])\n", + " return error, note\n", + "\n", + "\n", + "def get_quantization_and_error(pitch_outputs_and_rests, predictions_per_eighth,\n", + " prediction_start_offset, ideal_offset):\n", + " # Apply the start offset - we can just add the offset as rests.\n", + " pitch_outputs_and_rests = [0] * prediction_start_offset + \\\n", + " pitch_outputs_and_rests\n", + " # Collect the predictions for each note (or rest).\n", + " groups = [\n", + " pitch_outputs_and_rests[i:i + predictions_per_eighth]\n", + " for i in range(0, len(pitch_outputs_and_rests), predictions_per_eighth)\n", + " ]\n", + "\n", + " quantization_error = 0\n", + "\n", + " notes_and_rests = []\n", + " for group in groups:\n", + " error, note_or_rest = quantize_predictions(group, ideal_offset)\n", + " quantization_error += error\n", + " notes_and_rests.append(note_or_rest)\n", + "\n", + " return quantization_error, notes_and_rests\n", + "\n", + "\n", + "best_error = float(\"inf\")\n", + "best_notes_and_rests = None\n", + "best_predictions_per_note = None\n", + "\n", + "for predictions_per_note in range(20, 65, 1):\n", + " for prediction_start_offset in range(predictions_per_note):\n", + "\n", + " error, notes_and_rests = get_quantization_and_error(\n", + " pitch_outputs_and_rests, predictions_per_note,\n", + " prediction_start_offset, ideal_offset)\n", + "\n", + " if error < best_error: \n", + " best_error = error\n", + " best_notes_and_rests = notes_and_rests\n", + " best_predictions_per_note = predictions_per_note\n", + "\n", + "# At this point, best_notes_and_rests contains the best quantization.\n", + "# Since we don't need to have rests at the beginning, let's remove these:\n", + "while best_notes_and_rests[0] == 'Rest':\n", + " best_notes_and_rests = best_notes_and_rests[1:]\n", + "# Also remove silence at the end.\n", + "while best_notes_and_rests[-1] == 'Rest':\n", + " best_notes_and_rests = best_notes_and_rests[:-1]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vMZbWA3aVqee" + }, + "source": [ + "Now let's write the quantized notes as sheet music score!\n", + "\n", + "To do it we will use two libraries: [music21](http://web.mit.edu/music21/) and [Open Sheet Music Display](https://github.com/opensheetmusicdisplay/opensheetmusicdisplay)\n", + "\n", + "**Note:** for simplicity, we assume here that all notes have the same duration (a half note)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yVrk_IOIzpQR" + }, + "outputs": [], + "source": [ + "# Creating the sheet music score.\n", + "sc = music21.stream.Score()\n", + "# Adjust the speed to match the actual singing.\n", + "bpm = 60 * 60 / best_predictions_per_note\n", + "print ('bpm: ', bpm)\n", + "a = music21.tempo.MetronomeMark(number=bpm)\n", + "sc.insert(0,a)\n", + "\n", + "for snote in best_notes_and_rests: \n", + " d = 'half'\n", + " if snote == 'Rest': \n", + " sc.append(music21.note.Rest(type=d))\n", + " else:\n", + " sc.append(music21.note.Note(snote, type=d))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "CEleCWHtG2s4" + }, + "outputs": [], + "source": [ + "#@title [Run this] Helper function to use Open Sheet Music Display (JS code) to show a music score\n", + "\n", + "from IPython.core.display import display, HTML, Javascript\n", + "import json, random\n", + "\n", + "def showScore(score):\n", + " xml = open(score.write('musicxml')).read()\n", + " showMusicXML(xml)\n", + " \n", + "def showMusicXML(xml):\n", + " DIV_ID = \"OSMD_div\"\n", + " display(HTML('
    loading OpenSheetMusicDisplay
    '))\n", + " script = \"\"\"\n", + " var div_id = %%DIV_ID%%;\n", + " function loadOSMD() { \n", + " return new Promise(function(resolve, reject){\n", + " if (window.opensheetmusicdisplay) {\n", + " return resolve(window.opensheetmusicdisplay)\n", + " }\n", + " // OSMD script has a 'define' call which conflicts with requirejs\n", + " var _define = window.define // save the define object \n", + " window.define = undefined // now the loaded script will ignore requirejs\n", + " var s = document.createElement( 'script' );\n", + " s.setAttribute( 'src', \"https://cdn.jsdelivr.net/npm/opensheetmusicdisplay@0.7.6/build/opensheetmusicdisplay.min.js/" );\n", + " //s.setAttribute( 'src', \"/custom/opensheetmusicdisplay.js\" );\n", + " s.onload=function(){\n", + " window.define = _define\n", + " resolve(opensheetmusicdisplay);\n", + " };\n", + " document.body.appendChild( s ); // browser will try to load the new script tag\n", + " }) \n", + " }\n", + " loadOSMD().then((OSMD)=>{\n", + " window.openSheetMusicDisplay = new OSMD.OpenSheetMusicDisplay(div_id, {\n", + " drawingParameters: \"compacttight\"\n", + " });\n", + " openSheetMusicDisplay\n", + " .load(%%data%%)\n", + " .then(\n", + " function() {\n", + " openSheetMusicDisplay.render();\n", + " }\n", + " );\n", + " })\n", + " \"\"\".replace('%%DIV_ID%%',DIV_ID).replace('%%data%%',json.dumps(xml))\n", + " display(Javascript(script))\n", + " return" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WTu4phq4WeAI" + }, + "outputs": [], + "source": [ + "# rendering the music score\n", + "showScore(sc)\n", + "print(best_notes_and_rests)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fGPXm6Z83U2g" + }, + "source": [ + "Let's convert the music notes to a MIDI file and listen to it.\n", + "\n", + "To create this file, we can use the stream we created before." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "klYoWjgmPaod" + }, + "outputs": [], + "source": [ + "# Saving the recognized musical notes as a MIDI file\n", + "converted_audio_file_as_midi = converted_audio_file[:-4] + '.mid'\n", + "fp = sc.write('midi', fp=converted_audio_file_as_midi)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tz7Mj3Qx1lpR" + }, + "outputs": [], + "source": [ + "wav_from_created_midi = converted_audio_file_as_midi.replace(' ', '_') + \"_midioutput.wav\"\n", + "print(wav_from_created_midi)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ahss5EOiWDDp" + }, + "source": [ + "To listen to it on colab, we need to convert it back to wav. An easy way of doing that is using Timidity." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "XmeJ-UITV2nq" + }, + "outputs": [], + "source": [ + "!timidity $converted_audio_file_as_midi -Ow -o $wav_from_created_midi" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bnvwmyNj7kCC" + }, + "source": [ + "And finally, listen the audio, created from notes, created via MIDI from the predicted pitches, inferred by the model!\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qNLBB0zJV6vN" + }, + "outputs": [], + "source": [ + "Audio(wav_from_created_midi)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "spice.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb b/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb new file mode 100644 index 00000000000..e2985bda51e --- /dev/null +++ b/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb @@ -0,0 +1,477 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "N6ZDpd9XzFeN" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "KUu4vOt5zI9d" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ok9PfyoQ2rH_" + }, + "source": [ + "# How to solve a problem on Kaggle with TF-Hub\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "556YQZLUO4Ih" + }, + "source": [ + "TF-Hub is a platform to share machine learning expertise packaged in reusable resources, notably pre-trained **modules**. In this tutorial, we will use a TF-Hub text embedding module to train a simple sentiment classifier with a reasonable baseline accuracy. We will then submit the predictions to Kaggle.\n", + "\n", + "For more detailed tutorial on text classification with TF-Hub and further steps for improving the accuracy, take a look at [Text classification with TF-Hub](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9KyLct9rq0lo" + }, + "outputs": [], + "source": [ + "!pip install -q kaggle" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "v7hy0bhngTUp" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import zipfile\n", + "\n", + "from sklearn import model_selection" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JvgBdeMsuu_3" + }, + "source": [ + "Since this tutorial will be using a dataset from Kaggle, it requires [creating an API Token](https://github.com/Kaggle/kaggle-api) for your Kaggle account, and uploading it to the Colab environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nI7C-Zc4urOH" + }, + "outputs": [], + "source": [ + "import os\n", + "import pathlib\n", + "\n", + "# Upload the API token.\n", + "def get_kaggle():\n", + " try:\n", + " import kaggle\n", + " return kaggle\n", + " except OSError:\n", + " pass\n", + "\n", + " token_file = pathlib.Path(\"~/.kaggle/kaggle.json\").expanduser()\n", + " token_file.parent.mkdir(exist_ok=True, parents=True)\n", + "\n", + " try:\n", + " from google.colab import files\n", + " except ImportError:\n", + " raise ValueError(\"Could not find kaggle token.\")\n", + "\n", + " uploaded = files.upload()\n", + " token_content = uploaded.get('kaggle.json', None)\n", + " if token_content:\n", + " token_file.write_bytes(token_content)\n", + " token_file.chmod(0o600)\n", + " else:\n", + " raise ValueError('Need a file named \"kaggle.json\"')\n", + " \n", + " import kaggle\n", + " return kaggle\n", + "\n", + "\n", + "kaggle = get_kaggle()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6OPyVxHuiTEE" + }, + "source": [ + "# Getting started\n", + "\n", + "## Data\n", + "We will try to solve the [Sentiment Analysis on Movie Reviews](https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews/data) task from Kaggle. The dataset consists of syntactic subphrases of the Rotten Tomatoes movie reviews. The task is to label the phrases as **negative** or **positive** on the scale from 1 to 5.\n", + "\n", + "You must [accept the competition rules](https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews/data) before you can use the API to download the data.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "rKzc-fOGV72G" + }, + "outputs": [], + "source": [ + "SENTIMENT_LABELS = [\n", + " \"negative\", \"somewhat negative\", \"neutral\", \"somewhat positive\", \"positive\"\n", + "]\n", + "\n", + "# Add a column with readable values representing the sentiment.\n", + "def add_readable_labels_column(df, sentiment_value_column):\n", + " df[\"SentimentLabel\"] = df[sentiment_value_column].replace(\n", + " range(5), SENTIMENT_LABELS)\n", + " \n", + "# Download data from Kaggle and create a DataFrame.\n", + "def load_data_from_zip(path):\n", + " with zipfile.ZipFile(path, \"r\") as zip_ref:\n", + " name = zip_ref.namelist()[0]\n", + " with zip_ref.open(name) as zf:\n", + " return pd.read_csv(zf, sep=\"\\t\", index_col=0)\n", + "\n", + "\n", + "# The data does not come with a validation set so we'll create one from the\n", + "# training set.\n", + "def get_data(competition, train_file, test_file, validation_set_ratio=0.1):\n", + " data_path = pathlib.Path(\"data\")\n", + " kaggle.api.competition_download_files(competition, data_path)\n", + " competition_path = (data_path/competition)\n", + " competition_path.mkdir(exist_ok=True, parents=True)\n", + " competition_zip_path = competition_path.with_suffix(\".zip\")\n", + "\n", + " with zipfile.ZipFile(competition_zip_path, \"r\") as zip_ref:\n", + " zip_ref.extractall(competition_path)\n", + " \n", + " train_df = load_data_from_zip(competition_path/train_file)\n", + " test_df = load_data_from_zip(competition_path/test_file)\n", + "\n", + " # Add a human readable label.\n", + " add_readable_labels_column(train_df, \"Sentiment\")\n", + "\n", + " # We split by sentence ids, because we don't want to have phrases belonging\n", + " # to the same sentence in both training and validation set.\n", + " train_indices, validation_indices = model_selection.train_test_split(\n", + " np.unique(train_df[\"SentenceId\"]),\n", + " test_size=validation_set_ratio,\n", + " random_state=0)\n", + "\n", + " validation_df = train_df[train_df[\"SentenceId\"].isin(validation_indices)]\n", + " train_df = train_df[train_df[\"SentenceId\"].isin(train_indices)]\n", + " print(\"Split the training data into %d training and %d validation examples.\" %\n", + " (len(train_df), len(validation_df)))\n", + "\n", + " return train_df, validation_df, test_df\n", + "\n", + "\n", + "train_df, validation_df, test_df = get_data(\n", + " \"sentiment-analysis-on-movie-reviews\",\n", + " \"train.tsv.zip\", \"test.tsv.zip\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DFq_EyS1BEyK" + }, + "source": [ + "Note: In this competition the task is not to rate entire reviews, but individual phrases from within the reviews. This is a much harder task." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "42hgsiWNq5y9" + }, + "outputs": [], + "source": [ + "train_df.head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YPuHgx3BWBOg" + }, + "source": [ + "## Training an Model\n", + "\n", + "*Note: We could model this task also as a regression, see [Text classification with TF-Hub](https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub.ipynb).*" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "23U30yEkVq4w" + }, + "outputs": [], + "source": [ + "class MyModel(tf.keras.Model):\n", + " def __init__(self, hub_url):\n", + " super().__init__()\n", + " self.hub_url = hub_url\n", + " self.embed = hub.load(self.hub_url).signatures['default']\n", + " self.sequential = tf.keras.Sequential([\n", + " tf.keras.layers.Dense(500),\n", + " tf.keras.layers.Dense(100),\n", + " tf.keras.layers.Dense(5),\n", + " ])\n", + "\n", + " def call(self, inputs):\n", + " phrases = inputs['Phrase'][:,0]\n", + " embedding = 5*self.embed(phrases)['default']\n", + " return self.sequential(embedding)\n", + "\n", + " def get_config(self):\n", + " return {\"hub_url\":self.hub_url}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JE--GDMM2tSp" + }, + "outputs": [], + "source": [ + "model = MyModel(\"https://tfhub.dev/google/nnlm-en-dim128/1/")\n", + "model.compile(\n", + " loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True),\n", + " optimizer=tf.optimizers.Adam(), \n", + " metrics = [tf.keras.metrics.SparseCategoricalAccuracy(name=\"accuracy\")])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SRr-lvhstiNw" + }, + "outputs": [], + "source": [ + "history = model.fit(x=dict(train_df), y=train_df['Sentiment'],\n", + " validation_data=(dict(validation_df), validation_df['Sentiment']),\n", + " epochs = 25)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "s8j7YTRSe7Pj" + }, + "source": [ + "# Prediction\n", + "\n", + "Run predictions for the validation set and training set." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iGqVNSl87bgN" + }, + "outputs": [], + "source": [ + "plt.plot(history.history['accuracy'])\n", + "plt.plot(history.history['val_accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zbLg5LzGwAfC" + }, + "outputs": [], + "source": [ + "train_eval_result = model.evaluate(dict(train_df), train_df['Sentiment'])\n", + "validation_eval_result = model.evaluate(dict(validation_df), validation_df['Sentiment'])\n", + "\n", + "print(f\"Training set accuracy: {train_eval_result[1]}\")\n", + "print(f\"Validation set accuracy: {validation_eval_result[1]}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DR2IsTF5vuAX" + }, + "source": [ + "## Confusion matrix\n", + "\n", + "Another very interesting statistic, especially for multiclass problems, is the [confusion matrix](https://en.wikipedia.org/wiki/Confusion_matrix). The confusion matrix allows visualization of the proportion of correctly and incorrectly labelled examples. We can easily see how much our classifier is biased and whether the distribution of labels makes sense. Ideally the largest fraction of predictions should be distributed along the diagonal." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yKUnJFYY8bO_" + }, + "outputs": [], + "source": [ + "predictions = model.predict(dict(validation_df))\n", + "predictions = tf.argmax(predictions, axis=-1)\n", + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fjAs8W_Z9BvP" + }, + "outputs": [], + "source": [ + "cm = tf.math.confusion_matrix(validation_df['Sentiment'], predictions)\n", + "cm = cm/cm.numpy().sum(axis=1)[:, tf.newaxis]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nT71CtArpsKz" + }, + "outputs": [], + "source": [ + "sns.heatmap(\n", + " cm, annot=True,\n", + " xticklabels=SENTIMENT_LABELS,\n", + " yticklabels=SENTIMENT_LABELS)\n", + "plt.xlabel(\"Predicted\")\n", + "plt.ylabel(\"True\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Pic7o2m04weY" + }, + "source": [ + "We can easily submit the predictions back to Kaggle by pasting the following code to a code cell and executing it:\n", + "\n", + "``` python\n", + "test_predictions = model.predict(dict(test_df))\n", + "test_predictions = np.argmax(test_predictions, axis=-1)\n", + "\n", + "result_df = test_df.copy()\n", + "\n", + "result_df[\"Predictions\"] = test_predictions\n", + "\n", + "result_df.to_csv(\n", + " \"predictions.csv\",\n", + " columns=[\"Predictions\"],\n", + " header=[\"Sentiment\"])\n", + "kaggle.api.competition_submit(\"predictions.csv\", \"Submitted from Colab\",\n", + " \"sentiment-analysis-on-movie-reviews\")\n", + "```\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "50BLu-JX_dlm" + }, + "source": [ + "After submitting, [check the leaderboard](https://www.kaggle.com/c/sentiment-analysis-on-movie-reviews/leaderboard) to see how you did." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "text_classification_with_tf_hub_on_kaggle.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/text_cookbook.md b/site/en/hub/tutorials/text_cookbook.md new file mode 100644 index 00000000000..dee9c1cf466 --- /dev/null +++ b/site/en/hub/tutorials/text_cookbook.md @@ -0,0 +1,101 @@ +# Text Cookbook + +This page lists a set of known guides and tools solving problems in the text +domain with TensorFlow Hub. It is a starting place for anybody who wants to +solve typical ML problems using pre-trained ML components rather than starting +from scratch. + +## Classification + +When we want to predict a class for a given example, for example **sentiment**, +**toxicity**, **article category**, or any other characteristic. + +![Text Classification Graphic](https://www.gstatic.com/aihub/tfhub/universal-sentence-encoder/example-classification.png) + +The tutorials below are solving the same task from different perspectives and +using different tools. + +### Keras + +[Text classification with Keras](https://www.tensorflow.org/tutorials/keras/text_classification_with_hub) - +example for building an IMDB sentiment classifier with Keras and TensorFlow +Datasets. + +### Estimator + +[Text classification](https://github.com/tensorflow/docs/blob/master/g3doc/en/hub/tutorials/text_classification_with_tf_hub.ipynb) - +example for building an IMDB sentiment classifier with Estimator. Contains +multiple tips for improvement and a module comparison section. + +### BERT +[Predicting Movie Review Sentiment with BERT on TF Hub](https://github.com/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb) - +shows how to use a BERT module for classification. Includes use of `bert` +library for tokenization and preprocessing. + +### Kaggle + +[IMDB classification on Kaggle](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb) - +shows how to easily interact with a Kaggle competition from a Colab, including +downloading the data and submitting the results. + + | Estimator | Keras | TF2 | TF Datasets | BERT | Kaggle APIs +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | ----------- +[Text classification](https://www.tensorflow.org/hub/tutorials/text_classification_with_tf_hub) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | | | | | +[Text classification with Keras](https://www.tensorflow.org/tutorials/keras/text_classification_with_hub) | | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | | +[Predicting Movie Review Sentiment with BERT on TF Hub](https://github.com/google-research/bert/blob/master/predicting_movie_reviews_with_bert_on_tf_hub.ipynb) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | | | | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | +[IMDB classification on Kaggle](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/text_classification_with_tf_hub_on_kaggle.ipynb) | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) | | | | | ![done](https://www.gstatic.com/images/icons/material/system_gm/1x/bigtop_done_googblue_18dp.png) + +### Bangla task with FastText embeddings +TensorFlow Hub does not currently offer a module in every language. The +following tutorial shows how to leverage TensorFlow Hub for fast experimentation +and modular ML development. + +[Bangla Article Classifier](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/bangla_article_classifier.ipynb) - +demonstrates how to create a reusable TensorFlow Hub text embedding, and use it +to train a Keras classifier for +[BARD Bangla Article dataset](https://github.com/tanvirfahim15/BARD-Bangla-Article-Classifier). + +## Semantic similarity + +When we want to find out which sentences correlate with each other in zero-shot +setup (no training examples). + +![Semantic Similarity Graphic](https://www.gstatic.com/aihub/tfhub/universal-sentence-encoder/example-similarity.png) + +### Basic + +[Semantic similarity](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder.ipynb) - +shows how to use the sentence encoder module to compute sentence similarity. + +### Cross-lingual + +[Cross-lingual semantic similarity](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/cross_lingual_similarity_with_tf_hub_multilingual_universal_encoder.ipynb) - +shows how to use one of the cross-lingual sentence encoders to compute sentence +similarity across languages. + +### Semantic retrieval + +[Semantic retrieval](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/retrieval_with_tf_hub_universal_encoder_qa.ipynb) - +shows how to use Q/A sentence encoder to index a collection of documents for +retrieval based on semantic similarity. + +### SentencePiece input + +[Semantic similarity with universal encoder lite](https://github.com/tensorflow/docs/blob/master/site/en/hub/tutorials/semantic_similarity_with_tf_hub_universal_encoder_lite.ipynb) - +shows how to use sentence encoder modules that accept +[SentencePiece](https://github.com/google/sentencepiece) ids on input instead of +text. + +## Module creation +Instead of using only modules on [tfhub.dev](https://tfhub.dev), there are ways +to create own modules. This can be a useful tool for better ML codebase +modularity and for further sharing. + +### Wrapping existing pre-trained embeddings +[Text embedding module exporter](https://github.com/tensorflow/hub/blob/master/examples/text_embeddings/export.py) - +a tool to wrap an existing pre-trained embedding into a module. Shows how to +include text pre-processing ops into the module. This allows to create a +sentence embedding module from token embeddings. + +[Text embedding module exporter v2](https://github.com/tensorflow/hub/blob/master/examples/text_embeddings_v2/export_v2.py) - +same as above, but compatible with TensorFlow 2 and eager execution. diff --git a/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb b/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb new file mode 100644 index 00000000000..52fb3c0e4ab --- /dev/null +++ b/site/en/hub/tutorials/text_to_video_retrieval_with_s3d_milnce.ipynb @@ -0,0 +1,275 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8JSGdaDHc_f4" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z2_BHI6XdJ30" + }, + "source": [ + "# Text-to-Video retrieval with S3D MIL-NCE" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Rm0K9ZTgfISB" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bC_xJPpQd-LO" + }, + "outputs": [], + "source": [ + "!pip install -q opencv-python\n", + "\n", + "import os\n", + "\n", + "import tensorflow.compat.v2 as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "import numpy as np\n", + "import cv2\n", + "from IPython import display\n", + "import math" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZxwaK-jf7qkW" + }, + "source": [ + "## Import TF-Hub model\n", + "\n", + "This tutorial demonstrates how to use the [S3D MIL-NCE model](https://tfhub.dev/deepmind/mil-nce/s3d/1) from TensorFlow Hub to do **text-to-video retrieval** to find the most similar videos for a given text query.\n", + "\n", + "The model has 2 signatures, one for generating *video embeddings* and one for generating *text embeddings*. We will use these embedding to find the nearest neighbors in the embedding space." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nwv4ZQ4qmak5" + }, + "outputs": [], + "source": [ + "# Load the model once from TF-Hub.\n", + "hub_handle = 'https://tfhub.dev/deepmind/mil-nce/s3d/1'\n", + "hub_model = hub.load(hub_handle)\n", + "\n", + "def generate_embeddings(model, input_frames, input_words):\n", + " \"\"\"Generate embeddings from the model from video frames and input words.\"\"\"\n", + " # Input_frames must be normalized in [0, 1] and of the shape Batch x T x H x W x 3\n", + " vision_output = model.signatures['video'](tf.constant(tf.cast(input_frames, dtype=tf.float32)))\n", + " text_output = model.signatures['text'](tf.constant(input_words))\n", + " return vision_output['video_embedding'], text_output['text_embedding']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EOZzu9ddekEj" + }, + "outputs": [], + "source": [ + "# @title Define video loading and visualization functions { display-mode: \"form\" }\n", + "\n", + "# Utilities to open video files using CV2\n", + "def crop_center_square(frame):\n", + " y, x = frame.shape[0:2]\n", + " min_dim = min(y, x)\n", + " start_x = (x // 2) - (min_dim // 2)\n", + " start_y = (y // 2) - (min_dim // 2)\n", + " return frame[start_y:start_y+min_dim,start_x:start_x+min_dim]\n", + "\n", + "\n", + "def load_video(video_url, max_frames=32, resize=(224, 224)):\n", + " path = tf.keras.utils.get_file(os.path.basename(video_url)[-128:], video_url)\n", + " cap = cv2.VideoCapture(path)\n", + " frames = []\n", + " try:\n", + " while True:\n", + " ret, frame = cap.read()\n", + " if not ret:\n", + " break\n", + " frame = crop_center_square(frame)\n", + " frame = cv2.resize(frame, resize)\n", + " frame = frame[:, :, [2, 1, 0]]\n", + " frames.append(frame)\n", + "\n", + " if len(frames) == max_frames:\n", + " break\n", + " finally:\n", + " cap.release()\n", + " frames = np.array(frames)\n", + " if len(frames) < max_frames:\n", + " n_repeat = int(math.ceil(max_frames / float(len(frames))))\n", + " frames = frames.repeat(n_repeat, axis=0)\n", + " frames = frames[:max_frames]\n", + " return frames / 255.0\n", + "\n", + "def display_video(urls):\n", + " html = ''\n", + " html += ''\n", + " for url in urls:\n", + " html += ''\n", + " html += '
    Video 1Video 2Video 3
    '\n", + " html += ''.format(url)\n", + " html += '
    '\n", + " return display.HTML(html)\n", + "\n", + "def display_query_and_results_video(query, urls, scores):\n", + " \"\"\"Display a text query and the top result videos and scores.\"\"\"\n", + " sorted_ix = np.argsort(-scores)\n", + " html = ''\n", + " html += '

    Input query: {}

    '.format(query)\n", + " html += 'Results:
    '\n", + " html += ''\n", + " html += ''.format(scores[sorted_ix[0]])\n", + " html += ''.format(scores[sorted_ix[1]])\n", + " html += ''.format(scores[sorted_ix[2]])\n", + " for i, idx in enumerate(sorted_ix):\n", + " url = urls[sorted_ix[i]];\n", + " html += ''\n", + " html += '
    Rank #1, Score:{:.2f}Rank #2, Score:{:.2f}Rank #3, Score:{:.2f}
    '\n", + " html += ''.format(url)\n", + " html += '
    '\n", + " return html\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ime5V4kDewh8" + }, + "outputs": [], + "source": [ + "# @title Load example videos and define text queries { display-mode: \"form\" }\n", + "\n", + "video_1_url = 'https://upload.wikimedia.org/wikipedia/commons/b/b0/YosriAirTerjun.gif' # @param {type:\"string\"}\n", + "video_2_url = 'https://upload.wikimedia.org/wikipedia/commons/e/e6/Guitar_solo_gif.gif' # @param {type:\"string\"}\n", + "video_3_url = 'https://upload.wikimedia.org/wikipedia/commons/3/30/2009-08-16-autodrift-by-RalfR-gif-by-wau.gif' # @param {type:\"string\"}\n", + "\n", + "video_1 = load_video(video_1_url)\n", + "video_2 = load_video(video_2_url)\n", + "video_3 = load_video(video_3_url)\n", + "all_videos = [video_1, video_2, video_3]\n", + "\n", + "query_1_video = 'waterfall' # @param {type:\"string\"}\n", + "query_2_video = 'playing guitar' # @param {type:\"string\"}\n", + "query_3_video = 'car drifting' # @param {type:\"string\"}\n", + "all_queries_video = [query_1_video, query_2_video, query_3_video]\n", + "all_videos_urls = [video_1_url, video_2_url, video_3_url]\n", + "display_video(all_videos_urls)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NCLKv_L_8Anc" + }, + "source": [ + "## Demonstrate text to video retrieval\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9oX8ItFUjybi" + }, + "outputs": [], + "source": [ + "# Prepare video inputs.\n", + "videos_np = np.stack(all_videos, axis=0)\n", + "\n", + "# Prepare text input.\n", + "words_np = np.array(all_queries_video)\n", + "\n", + "# Generate the video and text embeddings.\n", + "video_embd, text_embd = generate_embeddings(hub_model, videos_np, words_np)\n", + "\n", + "# Scores between video and text is computed by dot products.\n", + "all_scores = np.dot(text_embd, tf.transpose(video_embd))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "d4AwYmODmE9Y" + }, + "outputs": [], + "source": [ + "# Display results.\n", + "html = ''\n", + "for i, words in enumerate(words_np):\n", + " html += display_query_and_results_video(words, all_videos_urls, all_scores[i, :])\n", + " html += '
    '\n", + "display.HTML(html)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "text_to_video_retrieval_with_s3d_milnce.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb b/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb new file mode 100644 index 00000000000..3a0cb09113e --- /dev/null +++ b/site/en/hub/tutorials/tf2_arbitrary_image_stylization.ipynb @@ -0,0 +1,375 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ScitaPqhKtuW" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jvztxQ6VsK2k" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oXlcl8lqBgAD" + }, + "source": [ + "# Fast Style Transfer for Arbitrary Styles\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YeeuYzbZcJzs" + }, + "source": [ + "Based on the model code in [magenta](https://github.com/tensorflow/magenta/tree/master/magenta/models/arbitrary_image_stylization) and the publication:\n", + "\n", + "[Exploring the structure of a real-time, arbitrary neural artistic stylization\n", + "network](https://arxiv.org/abs/1705.06830).\n", + "*Golnaz Ghiasi, Honglak Lee,\n", + "Manjunath Kudlur, Vincent Dumoulin, Jonathon Shlens*,\n", + "Proceedings of the British Machine Vision Conference (BMVC), 2017.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TaM8BVxrCA2E" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "J65jog2ncJzt" + }, + "source": [ + "Let's start with importing TF2 and all relevant dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "v-KXRY5XBu2u" + }, + "outputs": [], + "source": [ + "import functools\n", + "import os\n", + "\n", + "from matplotlib import gridspec\n", + "import matplotlib.pylab as plt\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "print(\"TF Version: \", tf.__version__)\n", + "print(\"TF Hub version: \", hub.__version__)\n", + "print(\"Eager mode enabled: \", tf.executing_eagerly())\n", + "print(\"GPU available: \", tf.config.list_physical_devices('GPU'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tsoDv_9geoZn" + }, + "outputs": [], + "source": [ + "# @title Define image loading and visualization functions { display-mode: \"form\" }\n", + "\n", + "def crop_center(image):\n", + " \"\"\"Returns a cropped square image.\"\"\"\n", + " shape = image.shape\n", + " new_shape = min(shape[1], shape[2])\n", + " offset_y = max(shape[1] - shape[2], 0) // 2\n", + " offset_x = max(shape[2] - shape[1], 0) // 2\n", + " image = tf.image.crop_to_bounding_box(\n", + " image, offset_y, offset_x, new_shape, new_shape)\n", + " return image\n", + "\n", + "@functools.lru_cache(maxsize=None)\n", + "def load_image(image_url, image_size=(256, 256), preserve_aspect_ratio=True):\n", + " \"\"\"Loads and preprocesses images.\"\"\"\n", + " # Cache image file locally.\n", + " image_path = tf.keras.utils.get_file(os.path.basename(image_url)[-128:], image_url)\n", + " # Load and convert to float32 numpy array, add batch dimension, and normalize to range [0, 1].\n", + " img = tf.io.decode_image(\n", + " tf.io.read_file(image_path),\n", + " channels=3, dtype=tf.float32)[tf.newaxis, ...]\n", + " img = crop_center(img)\n", + " img = tf.image.resize(img, image_size, preserve_aspect_ratio=True)\n", + " return img\n", + "\n", + "def show_n(images, titles=('',)):\n", + " n = len(images)\n", + " image_sizes = [image.shape[1] for image in images]\n", + " w = (image_sizes[0] * 6) // 320\n", + " plt.figure(figsize=(w * n, w))\n", + " gs = gridspec.GridSpec(1, n, width_ratios=image_sizes)\n", + " for i in range(n):\n", + " plt.subplot(gs[i])\n", + " plt.imshow(images[i][0], aspect='equal')\n", + " plt.axis('off')\n", + " plt.title(titles[i] if len(titles) > i else '')\n", + " plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8etHh05-CJHc" + }, + "source": [ + "Let's get as well some images to play with." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dRc0vat3Alzo" + }, + "outputs": [], + "source": [ + "# @title Load example images { display-mode: \"form\" }\n", + "\n", + "content_image_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/f/fd/Golden_Gate_Bridge_from_Battery_Spencer.jpg/640px-Golden_Gate_Bridge_from_Battery_Spencer.jpg' # @param {type:\"string\"}\n", + "style_image_url = 'https://upload.wikimedia.org/wikipedia/commons/0/0a/The_Great_Wave_off_Kanagawa.jpg' # @param {type:\"string\"}\n", + "output_image_size = 384 # @param {type:\"integer\"}\n", + "\n", + "# The content image size can be arbitrary.\n", + "content_img_size = (output_image_size, output_image_size)\n", + "# The style prediction model was trained with image size 256 and it's the \n", + "# recommended image size for the style image (though, other sizes work as \n", + "# well but will lead to different results).\n", + "style_img_size = (256, 256) # Recommended to keep it at 256.\n", + "\n", + "content_image = load_image(content_image_url, content_img_size)\n", + "style_image = load_image(style_image_url, style_img_size)\n", + "style_image = tf.nn.avg_pool(style_image, ksize=[3,3], strides=[1,1], padding='SAME')\n", + "show_n([content_image, style_image], ['Content image', 'Style image'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yL2Bn5ThR1nY" + }, + "source": [ + "## Import TF Hub module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "467AVDSuzBPc" + }, + "outputs": [], + "source": [ + "# Load TF Hub module.\n", + "\n", + "hub_handle = 'https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2'\n", + "hub_module = hub.load(hub_handle)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uAR70_3wLEDB" + }, + "source": [ + "The signature of this hub module for image stylization is:\n", + "```\n", + "outputs = hub_module(content_image, style_image)\n", + "stylized_image = outputs[0]\n", + "```\n", + "Where `content_image`, `style_image`, and `stylized_image` are expected to be 4-D Tensors with shapes `[batch_size, image_height, image_width, 3]`.\n", + "\n", + "In the current example we provide only single images and therefore the batch dimension is 1, but one can use the same module to process more images at the same time.\n", + "\n", + "The input and output values of the images should be in the range [0, 1].\n", + "\n", + "The shapes of content and style image don't have to match. Output image shape\n", + "is the same as the content image shape." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qEhYJno1R7rP" + }, + "source": [ + "## Demonstrate image stylization" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lnAv-F3O9fLV" + }, + "outputs": [], + "source": [ + "# Stylize content image with given style image.\n", + "# This is pretty fast within a few milliseconds on a GPU.\n", + "\n", + "outputs = hub_module(tf.constant(content_image), tf.constant(style_image))\n", + "stylized_image = outputs[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OEAPEdq698gs" + }, + "outputs": [], + "source": [ + "# Visualize input images and the generated stylized image.\n", + "\n", + "show_n([content_image, style_image, stylized_image], titles=['Original content image', 'Style image', 'Stylized image'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v-gYvjTWK-lx" + }, + "source": [ + "## Let's try it on more images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WSMaY0YBNfkK" + }, + "outputs": [], + "source": [ + "# @title To Run: Load more images { display-mode: \"form\" }\n", + "\n", + "content_urls = dict(\n", + " sea_turtle='https://upload.wikimedia.org/wikipedia/commons/d/d7/Green_Sea_Turtle_grazing_seagrass.jpg',\n", + " tuebingen='https://upload.wikimedia.org/wikipedia/commons/0/00/Tuebingen_Neckarfront.jpg',\n", + " grace_hopper='https://storage.googleapis.com/download.tensorflow.org/example_images/grace_hopper.jpg',\n", + " )\n", + "style_urls = dict(\n", + " kanagawa_great_wave='https://upload.wikimedia.org/wikipedia/commons/0/0a/The_Great_Wave_off_Kanagawa.jpg',\n", + " kandinsky_composition_7='https://upload.wikimedia.org/wikipedia/commons/b/b4/Vassily_Kandinsky%2C_1913_-_Composition_7.jpg',\n", + " hubble_pillars_of_creation='https://upload.wikimedia.org/wikipedia/commons/6/68/Pillars_of_creation_2014_HST_WFC3-UVIS_full-res_denoised.jpg',\n", + " van_gogh_starry_night='https://upload.wikimedia.org/wikipedia/commons/thumb/e/ea/Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg/1024px-Van_Gogh_-_Starry_Night_-_Google_Art_Project.jpg',\n", + " turner_nantes='https://upload.wikimedia.org/wikipedia/commons/b/b7/JMW_Turner_-_Nantes_from_the_Ile_Feydeau.jpg',\n", + " munch_scream='https://upload.wikimedia.org/wikipedia/commons/c/c5/Edvard_Munch%2C_1893%2C_The_Scream%2C_oil%2C_tempera_and_pastel_on_cardboard%2C_91_x_73_cm%2C_National_Gallery_of_Norway.jpg',\n", + " picasso_demoiselles_avignon='https://upload.wikimedia.org/wikipedia/en/4/4c/Les_Demoiselles_d%27Avignon.jpg',\n", + " picasso_violin='https://upload.wikimedia.org/wikipedia/en/3/3c/Pablo_Picasso%2C_1911-12%2C_Violon_%28Violin%29%2C_oil_on_canvas%2C_Kr%C3%B6ller-M%C3%BCller_Museum%2C_Otterlo%2C_Netherlands.jpg',\n", + " picasso_bottle_of_rum='https://upload.wikimedia.org/wikipedia/en/7/7f/Pablo_Picasso%2C_1911%2C_Still_Life_with_a_Bottle_of_Rum%2C_oil_on_canvas%2C_61.3_x_50.5_cm%2C_Metropolitan_Museum_of_Art%2C_New_York.jpg',\n", + " fire='https://upload.wikimedia.org/wikipedia/commons/3/36/Large_bonfire.jpg',\n", + " derkovits_woman_head='https://upload.wikimedia.org/wikipedia/commons/0/0d/Derkovits_Gyula_Woman_head_1922.jpg',\n", + " amadeo_style_life='https://upload.wikimedia.org/wikipedia/commons/8/8e/Untitled_%28Still_life%29_%281913%29_-_Amadeo_Souza-Cardoso_%281887-1918%29_%2817385824283%29.jpg',\n", + " derkovtis_talig='https://upload.wikimedia.org/wikipedia/commons/3/37/Derkovits_Gyula_Talig%C3%A1s_1920.jpg',\n", + " amadeo_cardoso='https://upload.wikimedia.org/wikipedia/commons/7/7d/Amadeo_de_Souza-Cardoso%2C_1915_-_Landscape_with_black_figure.jpg'\n", + ")\n", + "\n", + "content_image_size = 384\n", + "style_image_size = 256\n", + "content_images = {k: load_image(v, (content_image_size, content_image_size)) for k, v in content_urls.items()}\n", + "style_images = {k: load_image(v, (style_image_size, style_image_size)) for k, v in style_urls.items()}\n", + "style_images = {k: tf.nn.avg_pool(style_image, ksize=[3,3], strides=[1,1], padding='SAME') for k, style_image in style_images.items()}\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dqB6aNTLNVkK" + }, + "outputs": [], + "source": [ + "#@title Specify the main content image and the style you want to use. { display-mode: \"form\" }\n", + "\n", + "content_name = 'sea_turtle' # @param ['sea_turtle', 'tuebingen', 'grace_hopper']\n", + "style_name = 'munch_scream' # @param ['kanagawa_great_wave', 'kandinsky_composition_7', 'hubble_pillars_of_creation', 'van_gogh_starry_night', 'turner_nantes', 'munch_scream', 'picasso_demoiselles_avignon', 'picasso_violin', 'picasso_bottle_of_rum', 'fire', 'derkovits_woman_head', 'amadeo_style_life', 'derkovtis_talig', 'amadeo_cardoso']\n", + "\n", + "stylized_image = hub_module(tf.constant(content_images[content_name]),\n", + " tf.constant(style_images[style_name]))[0]\n", + "\n", + "show_n([content_images[content_name], style_images[style_name], stylized_image],\n", + " titles=['Original content image', 'Style image', 'Stylized image'])" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "tf2_arbitrary_image_stylization.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf2_image_retraining.ipynb b/site/en/hub/tutorials/tf2_image_retraining.ipynb new file mode 100644 index 00000000000..0266f4683c1 --- /dev/null +++ b/site/en/hub/tutorials/tf2_image_retraining.ipynb @@ -0,0 +1,605 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ScitaPqhKtuW" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jvztxQ6VsK2k" + }, + "outputs": [], + "source": [ + "# Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oYM61xrTsP5d" + }, + "source": [ + "# Retraining an Image Classifier\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L1otmJgmbahf" + }, + "source": [ + "## Introduction\n", + "\n", + "Image classification models have millions of parameters. Training them from\n", + "scratch requires a lot of labeled training data and a lot of computing power. Transfer learning is a technique that shortcuts much of this by taking a piece of a model that has already been trained on a related task and reusing it in a new model.\n", + "\n", + "This Colab demonstrates how to build a Keras model for classifying five species of flowers by using a pre-trained TF2 SavedModel from TensorFlow Hub for image feature extraction, trained on the much larger and more general ImageNet dataset. Optionally, the feature extractor can be trained (\"fine-tuned\") alongside the newly added classifier.\n", + "\n", + "### Looking for a tool instead?\n", + "\n", + "This is a TensorFlow coding tutorial. If you want a tool that just builds the TensorFlow or TFLite model for, take a look at the [make_image_classifier](https://github.com/tensorflow/hub/tree/master/tensorflow_hub/tools/make_image_classifier) command-line tool that gets [installed](https://www.tensorflow.org/hub/installation) by the PIP package `tensorflow-hub[make_image_classifier]`, or at [this](https://colab.sandbox.google.com/github/tensorflow/examples/blob/master/tensorflow_examples/lite/model_maker/demo/image_classification.ipynb) TFLite colab.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bL54LWCHt5q5" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dlauq-4FWGZM" + }, + "outputs": [], + "source": [ + "import itertools\n", + "import os\n", + "\n", + "import matplotlib.pylab as plt\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "print(\"TF version:\", tf.__version__)\n", + "print(\"Hub version:\", hub.__version__)\n", + "print(\"GPU is\", \"available\" if tf.config.list_physical_devices('GPU') else \"NOT AVAILABLE\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mmaHHH7Pvmth" + }, + "source": [ + "## Select the TF2 SavedModel module to use\n", + "\n", + "For starters, use [https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4](https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4). The same URL can be used in code to identify the SavedModel and in your browser to show its documentation. (Note that models in TF1 Hub format won't work here.)\n", + "\n", + "You can find more TF2 models that generate image feature vectors [here](https://tfhub.dev/s?module-type=image-feature-vector&tf-version=tf2).\n", + "\n", + "There are multiple possible models to try. All you need to do is select a different one on the cell below and follow up with the notebook." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FlsEcKVeuCnf" + }, + "outputs": [], + "source": [ + "#@title\n", + "\n", + "model_name = \"efficientnetv2-xl-21k\" # @param ['efficientnetv2-s', 'efficientnetv2-m', 'efficientnetv2-l', 'efficientnetv2-s-21k', 'efficientnetv2-m-21k', 'efficientnetv2-l-21k', 'efficientnetv2-xl-21k', 'efficientnetv2-b0-21k', 'efficientnetv2-b1-21k', 'efficientnetv2-b2-21k', 'efficientnetv2-b3-21k', 'efficientnetv2-s-21k-ft1k', 'efficientnetv2-m-21k-ft1k', 'efficientnetv2-l-21k-ft1k', 'efficientnetv2-xl-21k-ft1k', 'efficientnetv2-b0-21k-ft1k', 'efficientnetv2-b1-21k-ft1k', 'efficientnetv2-b2-21k-ft1k', 'efficientnetv2-b3-21k-ft1k', 'efficientnetv2-b0', 'efficientnetv2-b1', 'efficientnetv2-b2', 'efficientnetv2-b3', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3', 'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'bit_s-r50x1', 'inception_v3', 'inception_resnet_v2', 'resnet_v1_50', 'resnet_v1_101', 'resnet_v1_152', 'resnet_v2_50', 'resnet_v2_101', 'resnet_v2_152', 'nasnet_large', 'nasnet_mobile', 'pnasnet_large', 'mobilenet_v2_100_224', 'mobilenet_v2_130_224', 'mobilenet_v2_140_224', 'mobilenet_v3_small_100_224', 'mobilenet_v3_small_075_224', 'mobilenet_v3_large_100_224', 'mobilenet_v3_large_075_224']\n", + "\n", + "model_handle_map = {\n", + " \"efficientnetv2-s\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/feature_vector/2/",\n", + " \"efficientnetv2-m\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_m/feature_vector/2/",\n", + " \"efficientnetv2-l\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_l/feature_vector/2/",\n", + " \"efficientnetv2-s-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/feature_vector/2/",\n", + " \"efficientnetv2-m-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_m/feature_vector/2/",\n", + " \"efficientnetv2-l-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_l/feature_vector/2/",\n", + " \"efficientnetv2-xl-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_xl/feature_vector/2/",\n", + " \"efficientnetv2-b0-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b0/feature_vector/2/",\n", + " \"efficientnetv2-b1-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b1/feature_vector/2/",\n", + " \"efficientnetv2-b2-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b2/feature_vector/2/",\n", + " \"efficientnetv2-b3-21k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_b3/feature_vector/2/",\n", + " \"efficientnetv2-s-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_s/feature_vector/2/",\n", + " \"efficientnetv2-m-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_m/feature_vector/2/",\n", + " \"efficientnetv2-l-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_l/feature_vector/2/",\n", + " \"efficientnetv2-xl-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_xl/feature_vector/2/",\n", + " \"efficientnetv2-b0-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b0/feature_vector/2/",\n", + " \"efficientnetv2-b1-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b1/feature_vector/2/",\n", + " \"efficientnetv2-b2-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b2/feature_vector/2/",\n", + " \"efficientnetv2-b3-21k-ft1k\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b3/feature_vector/2/",\n", + " \"efficientnetv2-b0\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b0/feature_vector/2/",\n", + " \"efficientnetv2-b1\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b1/feature_vector/2/",\n", + " \"efficientnetv2-b2\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b2/feature_vector/2/",\n", + " \"efficientnetv2-b3\": \"https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_b3/feature_vector/2/",\n", + " \"efficientnet_b0\": \"https://tfhub.dev/tensorflow/efficientnet/b0/feature-vector/1/",\n", + " \"efficientnet_b1\": \"https://tfhub.dev/tensorflow/efficientnet/b1/feature-vector/1/",\n", + " \"efficientnet_b2\": \"https://tfhub.dev/tensorflow/efficientnet/b2/feature-vector/1/",\n", + " \"efficientnet_b3\": \"https://tfhub.dev/tensorflow/efficientnet/b3/feature-vector/1/",\n", + " \"efficientnet_b4\": \"https://tfhub.dev/tensorflow/efficientnet/b4/feature-vector/1/",\n", + " \"efficientnet_b5\": \"https://tfhub.dev/tensorflow/efficientnet/b5/feature-vector/1/",\n", + " \"efficientnet_b6\": \"https://tfhub.dev/tensorflow/efficientnet/b6/feature-vector/1/",\n", + " \"efficientnet_b7\": \"https://tfhub.dev/tensorflow/efficientnet/b7/feature-vector/1/",\n", + " \"bit_s-r50x1\": \"https://tfhub.dev/google/bit/s-r50x1/1/",\n", + " \"inception_v3\": \"https://tfhub.dev/google/imagenet/inception_v3/feature-vector/4/",\n", + " \"inception_resnet_v2\": \"https://tfhub.dev/google/imagenet/inception_resnet_v2/feature-vector/4/",\n", + " \"resnet_v1_50\": \"https://tfhub.dev/google/imagenet/resnet_v1_50/feature-vector/4/",\n", + " \"resnet_v1_101\": \"https://tfhub.dev/google/imagenet/resnet_v1_101/feature-vector/4/",\n", + " \"resnet_v1_152\": \"https://tfhub.dev/google/imagenet/resnet_v1_152/feature-vector/4/",\n", + " \"resnet_v2_50\": \"https://tfhub.dev/google/imagenet/resnet_v2_50/feature-vector/4/",\n", + " \"resnet_v2_101\": \"https://tfhub.dev/google/imagenet/resnet_v2_101/feature-vector/4/",\n", + " \"resnet_v2_152\": \"https://tfhub.dev/google/imagenet/resnet_v2_152/feature-vector/4/",\n", + " \"nasnet_large\": \"https://tfhub.dev/google/imagenet/nasnet_large/feature_vector/4/",\n", + " \"nasnet_mobile\": \"https://tfhub.dev/google/imagenet/nasnet_mobile/feature_vector/4/",\n", + " \"pnasnet_large\": \"https://tfhub.dev/google/imagenet/pnasnet_large/feature_vector/4/",\n", + " \"mobilenet_v2_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4/",\n", + " \"mobilenet_v2_130_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_130_224/feature_vector/4/",\n", + " \"mobilenet_v2_140_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v2_140_224/feature_vector/4/",\n", + " \"mobilenet_v3_small_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_100_224/feature_vector/5/",\n", + " \"mobilenet_v3_small_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_small_075_224/feature_vector/5/",\n", + " \"mobilenet_v3_large_100_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_100_224/feature_vector/5/",\n", + " \"mobilenet_v3_large_075_224\": \"https://tfhub.dev/google/imagenet/mobilenet_v3_large_075_224/feature_vector/5/",\n", + "}\n", + "\n", + "model_image_size_map = {\n", + " \"efficientnetv2-s\": 384,\n", + " \"efficientnetv2-m\": 480,\n", + " \"efficientnetv2-l\": 480,\n", + " \"efficientnetv2-b0\": 224,\n", + " \"efficientnetv2-b1\": 240,\n", + " \"efficientnetv2-b2\": 260,\n", + " \"efficientnetv2-b3\": 300,\n", + " \"efficientnetv2-s-21k\": 384,\n", + " \"efficientnetv2-m-21k\": 480,\n", + " \"efficientnetv2-l-21k\": 480,\n", + " \"efficientnetv2-xl-21k\": 512,\n", + " \"efficientnetv2-b0-21k\": 224,\n", + " \"efficientnetv2-b1-21k\": 240,\n", + " \"efficientnetv2-b2-21k\": 260,\n", + " \"efficientnetv2-b3-21k\": 300,\n", + " \"efficientnetv2-s-21k-ft1k\": 384,\n", + " \"efficientnetv2-m-21k-ft1k\": 480,\n", + " \"efficientnetv2-l-21k-ft1k\": 480,\n", + " \"efficientnetv2-xl-21k-ft1k\": 512,\n", + " \"efficientnetv2-b0-21k-ft1k\": 224,\n", + " \"efficientnetv2-b1-21k-ft1k\": 240,\n", + " \"efficientnetv2-b2-21k-ft1k\": 260,\n", + " \"efficientnetv2-b3-21k-ft1k\": 300, \n", + " \"efficientnet_b0\": 224,\n", + " \"efficientnet_b1\": 240,\n", + " \"efficientnet_b2\": 260,\n", + " \"efficientnet_b3\": 300,\n", + " \"efficientnet_b4\": 380,\n", + " \"efficientnet_b5\": 456,\n", + " \"efficientnet_b6\": 528,\n", + " \"efficientnet_b7\": 600,\n", + " \"inception_v3\": 299,\n", + " \"inception_resnet_v2\": 299,\n", + " \"nasnet_large\": 331,\n", + " \"pnasnet_large\": 331,\n", + "}\n", + "\n", + "model_handle = model_handle_map.get(model_name)\n", + "pixels = model_image_size_map.get(model_name, 224)\n", + "\n", + "print(f\"Selected model: {model_name} : {model_handle}\")\n", + "\n", + "IMAGE_SIZE = (pixels, pixels)\n", + "print(f\"Input size {IMAGE_SIZE}\")\n", + "\n", + "BATCH_SIZE = 16#@param {type:\"integer\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yTY8qzyYv3vl" + }, + "source": [ + "## Set up the Flowers dataset\n", + "\n", + "Inputs are suitably resized for the selected module. Dataset augmentation (i.e., random distortions of an image each time it is read) improves training, esp. when fine-tuning." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WBtFK1hO8KsO" + }, + "outputs": [], + "source": [ + "data_dir = tf.keras.utils.get_file(\n", + " 'flower_photos',\n", + " 'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',\n", + " untar=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "umB5tswsfTEQ" + }, + "outputs": [], + "source": [ + "def build_dataset(subset):\n", + " return tf.keras.preprocessing.image_dataset_from_directory(\n", + " data_dir,\n", + " validation_split=.20,\n", + " subset=subset,\n", + " label_mode=\"categorical\",\n", + " # Seed needs to provided when using validation_split and shuffle = True.\n", + " # A fixed seed is used so that the validation set is stable across runs.\n", + " seed=123,\n", + " image_size=IMAGE_SIZE,\n", + " batch_size=1)\n", + "\n", + "train_ds = build_dataset(\"training\")\n", + "class_names = tuple(train_ds.class_names)\n", + "train_size = train_ds.cardinality().numpy()\n", + "train_ds = train_ds.unbatch().batch(BATCH_SIZE)\n", + "train_ds = train_ds.repeat()\n", + "\n", + "normalization_layer = tf.keras.layers.Rescaling(1. / 255)\n", + "preprocessing_model = tf.keras.Sequential([normalization_layer])\n", + "do_data_augmentation = False #@param {type:\"boolean\"}\n", + "if do_data_augmentation:\n", + " preprocessing_model.add(\n", + " tf.keras.layers.RandomRotation(40))\n", + " preprocessing_model.add(\n", + " tf.keras.layers.RandomTranslation(0, 0.2))\n", + " preprocessing_model.add(\n", + " tf.keras.layers.RandomTranslation(0.2, 0))\n", + " # Like the old tf.keras.preprocessing.image.ImageDataGenerator(),\n", + " # image sizes are fixed when reading, and then a random zoom is applied.\n", + " # If all training inputs are larger than image_size, one could also use\n", + " # RandomCrop with a batch size of 1 and rebatch later.\n", + " preprocessing_model.add(\n", + " tf.keras.layers.RandomZoom(0.2, 0.2))\n", + " preprocessing_model.add(\n", + " tf.keras.layers.RandomFlip(mode=\"horizontal\"))\n", + "train_ds = train_ds.map(lambda images, labels:\n", + " (preprocessing_model(images), labels))\n", + "\n", + "val_ds = build_dataset(\"validation\")\n", + "valid_size = val_ds.cardinality().numpy()\n", + "val_ds = val_ds.unbatch().batch(BATCH_SIZE)\n", + "val_ds = val_ds.map(lambda images, labels:\n", + " (normalization_layer(images), labels))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FS_gVStowW3G" + }, + "source": [ + "## Defining the model\n", + "\n", + "All it takes is to put a linear classifier on top of the `feature_extractor_layer` with the Hub module.\n", + "\n", + "For speed, we start out with a non-trainable `feature_extractor_layer`, but you can also enable fine-tuning for greater accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RaJW3XrPyFiF" + }, + "outputs": [], + "source": [ + "do_fine_tuning = False #@param {type:\"boolean\"}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "50FYNIb1dmJH" + }, + "outputs": [], + "source": [ + "print(\"Building model with\", model_handle)\n", + "model = tf.keras.Sequential([\n", + " # Explicitly define the input shape so the model can be properly\n", + " # loaded by the TFLiteConverter\n", + " tf.keras.layers.InputLayer(input_shape=IMAGE_SIZE + (3,)),\n", + " hub.KerasLayer(model_handle, trainable=do_fine_tuning),\n", + " tf.keras.layers.Dropout(rate=0.2),\n", + " tf.keras.layers.Dense(len(class_names),\n", + " kernel_regularizer=tf.keras.regularizers.l2(0.0001))\n", + "])\n", + "model.build((None,)+IMAGE_SIZE+(3,))\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u2e5WupIw2N2" + }, + "source": [ + "## Training the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9f3yBUvkd_VJ" + }, + "outputs": [], + "source": [ + "model.compile(\n", + " optimizer=tf.keras.optimizers.SGD(learning_rate=0.005, momentum=0.9), \n", + " loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.1),\n", + " metrics=['accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "w_YKX2Qnfg6x" + }, + "outputs": [], + "source": [ + "steps_per_epoch = train_size // BATCH_SIZE\n", + "validation_steps = valid_size // BATCH_SIZE\n", + "hist = model.fit(\n", + " train_ds,\n", + " epochs=5, steps_per_epoch=steps_per_epoch,\n", + " validation_data=val_ds,\n", + " validation_steps=validation_steps).history" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CYOw0fTO1W4x" + }, + "outputs": [], + "source": [ + "plt.figure()\n", + "plt.ylabel(\"Loss (training and validation)\")\n", + "plt.xlabel(\"Training Steps\")\n", + "plt.ylim([0,2])\n", + "plt.plot(hist[\"loss\"])\n", + "plt.plot(hist[\"val_loss\"])\n", + "\n", + "plt.figure()\n", + "plt.ylabel(\"Accuracy (training and validation)\")\n", + "plt.xlabel(\"Training Steps\")\n", + "plt.ylim([0,1])\n", + "plt.plot(hist[\"accuracy\"])\n", + "plt.plot(hist[\"val_accuracy\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jZ8DKKgeKv4-" + }, + "source": [ + "Try out the model on an image from the validation data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oi1iCNB9K1Ai" + }, + "outputs": [], + "source": [ + "x, y = next(iter(val_ds))\n", + "image = x[0, :, :, :]\n", + "true_index = np.argmax(y[0])\n", + "plt.imshow(image)\n", + "plt.axis('off')\n", + "plt.show()\n", + "\n", + "# Expand the validation image to (1, 224, 224, 3) before predicting the label\n", + "prediction_scores = model.predict(np.expand_dims(image, axis=0))\n", + "predicted_index = np.argmax(prediction_scores)\n", + "print(\"True label: \" + class_names[true_index])\n", + "print(\"Predicted label: \" + class_names[predicted_index])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YCsAsQM1IRvA" + }, + "source": [ + "Finally, the trained model can be saved for deployment to TF Serving or TFLite (on mobile) as follows." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LGvTi69oIc2d" + }, + "outputs": [], + "source": [ + "saved_model_path = f\"/tmp/saved_flowers_model_{model_name}\"\n", + "tf.saved_model.save(model, saved_model_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QzW4oNRjILaq" + }, + "source": [ + "## Optional: Deployment to TensorFlow Lite\n", + "\n", + "[TensorFlow Lite](https://www.tensorflow.org/lite) lets you deploy TensorFlow models to mobile and IoT devices. The code below shows how to convert the trained model to TFLite and apply post-training tools from the [TensorFlow Model Optimization Toolkit](https://www.tensorflow.org/model_optimization). Finally, it runs it in the TFLite Interpreter to examine the resulting quality\n", + "\n", + " * Converting without optimization provides the same results as before (up to roundoff error).\n", + " * Converting with optimization without any data quantizes the model weights to 8 bits, but inference still uses floating-point computation for the neural network activations. This reduces model size almost by a factor of 4 and improves CPU latency on mobile devices.\n", + " * On top, computation of the neural network activations can be quantized to 8-bit integers as well if a small reference dataset is provided to calibrate the quantization range. On a mobile device, this accelerates inference further and makes it possible to run on accelerators like Edge TPU." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Va1Vo92fSyV6" + }, + "outputs": [], + "source": [ + "#@title Optimization settings\n", + "optimize_lite_model = False #@param {type:\"boolean\"}\n", + "#@markdown Setting a value greater than zero enables quantization of neural network activations. A few dozen is already a useful amount.\n", + "num_calibration_examples = 60 #@param {type:\"slider\", min:0, max:1000, step:1}\n", + "representative_dataset = None\n", + "if optimize_lite_model and num_calibration_examples:\n", + " # Use a bounded number of training examples without labels for calibration.\n", + " # TFLiteConverter expects a list of input tensors, each with batch size 1.\n", + " representative_dataset = lambda: itertools.islice(\n", + " ([image[None, ...]] for batch, _ in train_ds for image in batch),\n", + " num_calibration_examples)\n", + "\n", + "converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_path)\n", + "if optimize_lite_model:\n", + " converter.optimizations = [tf.lite.Optimize.DEFAULT]\n", + " if representative_dataset: # This is optional, see above.\n", + " converter.representative_dataset = representative_dataset\n", + "lite_model_content = converter.convert()\n", + "\n", + "with open(f\"/tmp/lite_flowers_model_{model_name}.tflite\", \"wb\") as f:\n", + " f.write(lite_model_content)\n", + "print(\"Wrote %sTFLite model of %d bytes.\" %\n", + " (\"optimized \" if optimize_lite_model else \"\", len(lite_model_content)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_wqEmD0xIqeG" + }, + "outputs": [], + "source": [ + "interpreter = tf.lite.Interpreter(model_content=lite_model_content)\n", + "# This little helper wraps the TFLite Interpreter as a numpy-to-numpy function.\n", + "def lite_model(images):\n", + " interpreter.allocate_tensors()\n", + " interpreter.set_tensor(interpreter.get_input_details()[0]['index'], images)\n", + " interpreter.invoke()\n", + " return interpreter.get_tensor(interpreter.get_output_details()[0]['index'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JMMK-fZrKrk8" + }, + "outputs": [], + "source": [ + "#@markdown For rapid experimentation, start with a moderate number of examples.\n", + "num_eval_examples = 50 #@param {type:\"slider\", min:0, max:700}\n", + "eval_dataset = ((image, label) # TFLite expects batch size 1.\n", + " for batch in train_ds\n", + " for (image, label) in zip(*batch))\n", + "count = 0\n", + "count_lite_tf_agree = 0\n", + "count_lite_correct = 0\n", + "for image, label in eval_dataset:\n", + " probs_lite = lite_model(image[None, ...])[0]\n", + " probs_tf = model(image[None, ...]).numpy()[0]\n", + " y_lite = np.argmax(probs_lite)\n", + " y_tf = np.argmax(probs_tf)\n", + " y_true = np.argmax(label)\n", + " count +=1\n", + " if y_lite == y_tf: count_lite_tf_agree += 1\n", + " if y_lite == y_true: count_lite_correct += 1\n", + " if count >= num_eval_examples: break\n", + "print(\"TFLite model agrees with original model on %d of %d examples (%g%%).\" %\n", + " (count_lite_tf_agree, count, 100.0 * count_lite_tf_agree / count))\n", + "print(\"TFLite model is accurate on %d of %d examples (%g%%).\" %\n", + " (count_lite_correct, count, 100.0 * count_lite_correct / count))" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "ScitaPqhKtuW" + ], + "name": "tf2_image_retraining.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf2_object_detection.ipynb b/site/en/hub/tutorials/tf2_object_detection.ipynb new file mode 100644 index 00000000000..d06ad401824 --- /dev/null +++ b/site/en/hub/tutorials/tf2_object_detection.ipynb @@ -0,0 +1,616 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "98rds-2OU-Rd" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "1c95xMGcU5_Z" + }, + "outputs": [], + "source": [ + "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V1UUX8SUUiMO" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rOvvWAVTkMR7" + }, + "source": [ + "# TensorFlow Hub Object Detection Colab\n", + "\n", + "Welcome to the TensorFlow Hub Object Detection Colab! This notebook will take you through the steps of running an \"out-of-the-box\" object detection model on images." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IRImnk_7WOq1" + }, + "source": [ + "### More models\n", + "[This](https://tfhub.dev/tensorflow/collections/object_detection/1) collection contains TF2 object detection models that have been trained on the COCO 2017 dataset. [Here](https://tfhub.dev/s?module-type=image-object-detection) you can find all object detection models that are currently hosted on [tfhub.dev](https://tfhub.dev/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vPs64QA1Zdov" + }, + "source": [ + "## Imports and Setup\n", + "\n", + "Let's start with the base imports." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Xk4FU-jx9kc3" + }, + "outputs": [], + "source": [ + "# This Colab requires a recent numpy version.\n", + "!pip install numpy==1.24.3\n", + "!pip install protobuf==3.20.3\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yn5_uV1HLvaz" + }, + "outputs": [], + "source": [ + "import os\n", + "import pathlib\n", + "\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "\n", + "import io\n", + "import scipy.misc\n", + "import numpy as np\n", + "from six import BytesIO\n", + "from PIL import Image, ImageDraw, ImageFont\n", + "from six.moves.urllib.request import urlopen\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "tf.get_logger().setLevel('ERROR')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IogyryF2lFBL" + }, + "source": [ + "## Utilities\n", + "\n", + "Run the following cell to create some utils that will be needed later:\n", + "\n", + "- Helper method to load an image\n", + "- Map of Model Name to TF Hub handle\n", + "- List of tuples with Human Keypoints for the COCO 2017 dataset. This is needed for models with keypoints." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "-y9R0Xllefec" + }, + "outputs": [], + "source": [ + "# @title Run this!!\n", + "\n", + "def load_image_into_numpy_array(path):\n", + " \"\"\"Load an image from file into a numpy array.\n", + "\n", + " Puts image into numpy array to feed into tensorflow graph.\n", + " Note that by convention we put it into a numpy array with shape\n", + " (height, width, channels), where channels=3 for RGB.\n", + "\n", + " Args:\n", + " path: the file path to the image\n", + "\n", + " Returns:\n", + " uint8 numpy array with shape (img_height, img_width, 3)\n", + " \"\"\"\n", + " image = None\n", + " if(path.startswith('http')):\n", + " response = urlopen(path)\n", + " image_data = response.read()\n", + " image_data = BytesIO(image_data)\n", + " image = Image.open(image_data)\n", + " else:\n", + " image_data = tf.io.gfile.GFile(path, 'rb').read()\n", + " image = Image.open(BytesIO(image_data))\n", + "\n", + " (im_width, im_height) = image.size\n", + " return np.array(image.getdata()).reshape(\n", + " (1, im_height, im_width, 3)).astype(np.uint8)\n", + "\n", + "\n", + "ALL_MODELS = {\n", + "'CenterNet HourGlass104 512x512' : 'https://tfhub.dev/tensorflow/centernet/hourglass_512x512/1',\n", + "'CenterNet HourGlass104 Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/hourglass_512x512_kpts/1',\n", + "'CenterNet HourGlass104 1024x1024' : 'https://tfhub.dev/tensorflow/centernet/hourglass_1024x1024/1',\n", + "'CenterNet HourGlass104 Keypoints 1024x1024' : 'https://tfhub.dev/tensorflow/centernet/hourglass_1024x1024_kpts/1',\n", + "'CenterNet Resnet50 V1 FPN 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512/1',\n", + "'CenterNet Resnet50 V1 FPN Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v1_fpn_512x512_kpts/1',\n", + "'CenterNet Resnet101 V1 FPN 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet101v1_fpn_512x512/1',\n", + "'CenterNet Resnet50 V2 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v2_512x512/1',\n", + "'CenterNet Resnet50 V2 Keypoints 512x512' : 'https://tfhub.dev/tensorflow/centernet/resnet50v2_512x512_kpts/1',\n", + "'EfficientDet D0 512x512' : 'https://tfhub.dev/tensorflow/efficientdet/d0/1',\n", + "'EfficientDet D1 640x640' : 'https://tfhub.dev/tensorflow/efficientdet/d1/1',\n", + "'EfficientDet D2 768x768' : 'https://tfhub.dev/tensorflow/efficientdet/d2/1',\n", + "'EfficientDet D3 896x896' : 'https://tfhub.dev/tensorflow/efficientdet/d3/1',\n", + "'EfficientDet D4 1024x1024' : 'https://tfhub.dev/tensorflow/efficientdet/d4/1',\n", + "'EfficientDet D5 1280x1280' : 'https://tfhub.dev/tensorflow/efficientdet/d5/1',\n", + "'EfficientDet D6 1280x1280' : 'https://tfhub.dev/tensorflow/efficientdet/d6/1',\n", + "'EfficientDet D7 1536x1536' : 'https://tfhub.dev/tensorflow/efficientdet/d7/1',\n", + "'SSD MobileNet v2 320x320' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/2',\n", + "'SSD MobileNet V1 FPN 640x640' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v1/fpn_640x640/1',\n", + "'SSD MobileNet V2 FPNLite 320x320' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_320x320/1',\n", + "'SSD MobileNet V2 FPNLite 640x640' : 'https://tfhub.dev/tensorflow/ssd_mobilenet_v2/fpnlite_640x640/1',\n", + "'SSD ResNet50 V1 FPN 640x640 (RetinaNet50)' : 'https://tfhub.dev/tensorflow/retinanet/resnet50_v1_fpn_640x640/1',\n", + "'SSD ResNet50 V1 FPN 1024x1024 (RetinaNet50)' : 'https://tfhub.dev/tensorflow/retinanet/resnet50_v1_fpn_1024x1024/1',\n", + "'SSD ResNet101 V1 FPN 640x640 (RetinaNet101)' : 'https://tfhub.dev/tensorflow/retinanet/resnet101_v1_fpn_640x640/1',\n", + "'SSD ResNet101 V1 FPN 1024x1024 (RetinaNet101)' : 'https://tfhub.dev/tensorflow/retinanet/resnet101_v1_fpn_1024x1024/1',\n", + "'SSD ResNet152 V1 FPN 640x640 (RetinaNet152)' : 'https://tfhub.dev/tensorflow/retinanet/resnet152_v1_fpn_640x640/1',\n", + "'SSD ResNet152 V1 FPN 1024x1024 (RetinaNet152)' : 'https://tfhub.dev/tensorflow/retinanet/resnet152_v1_fpn_1024x1024/1',\n", + "'Faster R-CNN ResNet50 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_640x640/1',\n", + "'Faster R-CNN ResNet50 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_1024x1024/1',\n", + "'Faster R-CNN ResNet50 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet50_v1_800x1333/1',\n", + "'Faster R-CNN ResNet101 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_640x640/1',\n", + "'Faster R-CNN ResNet101 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_1024x1024/1',\n", + "'Faster R-CNN ResNet101 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet101_v1_800x1333/1',\n", + "'Faster R-CNN ResNet152 V1 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_640x640/1',\n", + "'Faster R-CNN ResNet152 V1 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_1024x1024/1',\n", + "'Faster R-CNN ResNet152 V1 800x1333' : 'https://tfhub.dev/tensorflow/faster_rcnn/resnet152_v1_800x1333/1',\n", + "'Faster R-CNN Inception ResNet V2 640x640' : 'https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_640x640/1',\n", + "'Faster R-CNN Inception ResNet V2 1024x1024' : 'https://tfhub.dev/tensorflow/faster_rcnn/inception_resnet_v2_1024x1024/1',\n", + "'Mask R-CNN Inception ResNet V2 1024x1024' : 'https://tfhub.dev/tensorflow/mask_rcnn/inception_resnet_v2_1024x1024/1'\n", + "}\n", + "\n", + "IMAGES_FOR_TEST = {\n", + " 'Beach' : 'models/research/object_detection/test_images/image2.jpg',\n", + " 'Dogs' : 'models/research/object_detection/test_images/image1.jpg',\n", + " # By Heiko Gorski, Source: https://commons.wikimedia.org/wiki/File:Naxos_Taverna.jpg\n", + " 'Naxos Taverna' : 'https://upload.wikimedia.org/wikipedia/commons/6/60/Naxos_Taverna.jpg',\n", + " # Source: https://commons.wikimedia.org/wiki/File:The_Coleoptera_of_the_British_islands_(Plate_125)_(8592917784).jpg\n", + " 'Beatles' : 'https://upload.wikimedia.org/wikipedia/commons/1/1b/The_Coleoptera_of_the_British_islands_%28Plate_125%29_%288592917784%29.jpg',\n", + " # By Américo Toledano, Source: https://commons.wikimedia.org/wiki/File:Biblioteca_Maim%C3%B3nides,_Campus_Universitario_de_Rabanales_007.jpg\n", + " 'Phones' : 'https://upload.wikimedia.org/wikipedia/commons/thumb/0/0d/Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg/1024px-Biblioteca_Maim%C3%B3nides%2C_Campus_Universitario_de_Rabanales_007.jpg',\n", + " # Source: https://commons.wikimedia.org/wiki/File:The_smaller_British_birds_(8053836633).jpg\n", + " 'Birds' : 'https://upload.wikimedia.org/wikipedia/commons/0/09/The_smaller_British_birds_%288053836633%29.jpg',\n", + "}\n", + "\n", + "COCO17_HUMAN_POSE_KEYPOINTS = [(0, 1),\n", + " (0, 2),\n", + " (1, 3),\n", + " (2, 4),\n", + " (0, 5),\n", + " (0, 6),\n", + " (5, 7),\n", + " (7, 9),\n", + " (6, 8),\n", + " (8, 10),\n", + " (5, 6),\n", + " (5, 11),\n", + " (6, 12),\n", + " (11, 12),\n", + " (11, 13),\n", + " (13, 15),\n", + " (12, 14),\n", + " (14, 16)]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "14bNk1gzh0TN" + }, + "source": [ + "## Visualization tools\n", + "\n", + "To visualize the images with the proper detected boxes, keypoints and segmentation, we will use the TensorFlow Object Detection API. To install it we will clone the repo." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oi28cqGGFWnY" + }, + "outputs": [], + "source": [ + "# Clone the tensorflow models repository\n", + "!git clone --depth 1 https://github.com/tensorflow/models" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yX3pb_pXDjYA" + }, + "source": [ + "Installing the Object Detection API" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NwdsBdGhFanc" + }, + "outputs": [], + "source": [ + "%%bash\n", + "sudo apt install -y protobuf-compiler\n", + "cd models/research/\n", + "protoc object_detection/protos/*.proto --python_out=.\n", + "cp object_detection/packages/tf2/setup.py .\n", + "python -m pip install .\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3yDNgIx-kV7X" + }, + "source": [ + "Now we can import the dependencies we will need later" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2JCeQU3fkayh" + }, + "outputs": [], + "source": [ + "from object_detection.utils import label_map_util\n", + "from object_detection.utils import visualization_utils as viz_utils\n", + "from object_detection.utils import ops as utils_ops\n", + "\n", + "%matplotlib inline" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NKtD0IeclbL5" + }, + "source": [ + "### Load label map data (for plotting).\n", + "\n", + "Label maps correspond index numbers to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine.\n", + "\n", + "We are going, for simplicity, to load from the repository that we loaded the Object Detection API code" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5mucYUS6exUJ" + }, + "outputs": [], + "source": [ + "PATH_TO_LABELS = './models/research/object_detection/data/mscoco_label_map.pbtxt'\n", + "category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6917xnUSlp9x" + }, + "source": [ + "## Build a detection model and load pre-trained model weights\n", + "\n", + "Here we will choose which Object Detection model we will use.\n", + "Select the architecture and it will be loaded automatically.\n", + "If you want to change the model to try other architectures later, just change the next cell and execute following ones.\n", + "\n", + "**Tip:** if you want to read more details about the selected model, you can follow the link (model handle) and read additional documentation on TF Hub. After you select a model, we will print the handle to make it easier." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HtwrSqvakTNn" + }, + "outputs": [], + "source": [ + "#@title Model Selection { display-mode: \"form\", run: \"auto\" }\n", + "model_display_name = 'CenterNet HourGlass104 Keypoints 512x512' # @param ['CenterNet HourGlass104 512x512','CenterNet HourGlass104 Keypoints 512x512','CenterNet HourGlass104 1024x1024','CenterNet HourGlass104 Keypoints 1024x1024','CenterNet Resnet50 V1 FPN 512x512','CenterNet Resnet50 V1 FPN Keypoints 512x512','CenterNet Resnet101 V1 FPN 512x512','CenterNet Resnet50 V2 512x512','CenterNet Resnet50 V2 Keypoints 512x512','EfficientDet D0 512x512','EfficientDet D1 640x640','EfficientDet D2 768x768','EfficientDet D3 896x896','EfficientDet D4 1024x1024','EfficientDet D5 1280x1280','EfficientDet D6 1280x1280','EfficientDet D7 1536x1536','SSD MobileNet v2 320x320','SSD MobileNet V1 FPN 640x640','SSD MobileNet V2 FPNLite 320x320','SSD MobileNet V2 FPNLite 640x640','SSD ResNet50 V1 FPN 640x640 (RetinaNet50)','SSD ResNet50 V1 FPN 1024x1024 (RetinaNet50)','SSD ResNet101 V1 FPN 640x640 (RetinaNet101)','SSD ResNet101 V1 FPN 1024x1024 (RetinaNet101)','SSD ResNet152 V1 FPN 640x640 (RetinaNet152)','SSD ResNet152 V1 FPN 1024x1024 (RetinaNet152)','Faster R-CNN ResNet50 V1 640x640','Faster R-CNN ResNet50 V1 1024x1024','Faster R-CNN ResNet50 V1 800x1333','Faster R-CNN ResNet101 V1 640x640','Faster R-CNN ResNet101 V1 1024x1024','Faster R-CNN ResNet101 V1 800x1333','Faster R-CNN ResNet152 V1 640x640','Faster R-CNN ResNet152 V1 1024x1024','Faster R-CNN ResNet152 V1 800x1333','Faster R-CNN Inception ResNet V2 640x640','Faster R-CNN Inception ResNet V2 1024x1024','Mask R-CNN Inception ResNet V2 1024x1024']\n", + "model_handle = ALL_MODELS[model_display_name]\n", + "\n", + "print('Selected model:'+ model_display_name)\n", + "print('Model Handle at TensorFlow Hub: {}'.format(model_handle))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "muhUt-wWL582" + }, + "source": [ + "## Loading the selected model from TensorFlow Hub\n", + "\n", + "Here we just need the model handle that was selected and use the Tensorflow Hub library to load it to memory.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rBuD07fLlcEO" + }, + "outputs": [], + "source": [ + "print('loading model...')\n", + "hub_model = hub.load(model_handle)\n", + "print('model loaded!')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GIawRDKPPnd4" + }, + "source": [ + "## Loading an image\n", + "\n", + "Let's try the model on a simple image. To help with this, we provide a list of test images.\n", + "\n", + "Here are some simple things to try out if you are curious:\n", + "* Try running inference on your own images, just upload them to colab and load the same way it's done in the cell below.\n", + "* Modify some of the input images and see if detection still works. Some simple things to try out here include flipping the image horizontally, or converting to grayscale (note that we still expect the input image to have 3 channels).\n", + "\n", + "**Be careful:** when using images with an alpha channel, the model expect 3 channels images and the alpha will count as a 4th.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hX-AWUQ1wIEr" + }, + "outputs": [], + "source": [ + "#@title Image Selection (don't forget to execute the cell!) { display-mode: \"form\"}\n", + "selected_image = 'Beach' # @param ['Beach', 'Dogs', 'Naxos Taverna', 'Beatles', 'Phones', 'Birds']\n", + "flip_image_horizontally = False #@param {type:\"boolean\"}\n", + "convert_image_to_grayscale = False #@param {type:\"boolean\"}\n", + "\n", + "image_path = IMAGES_FOR_TEST[selected_image]\n", + "image_np = load_image_into_numpy_array(image_path)\n", + "\n", + "# Flip horizontally\n", + "if(flip_image_horizontally):\n", + " image_np[0] = np.fliplr(image_np[0]).copy()\n", + "\n", + "# Convert image to grayscale\n", + "if(convert_image_to_grayscale):\n", + " image_np[0] = np.tile(\n", + " np.mean(image_np[0], 2, keepdims=True), (1, 1, 3)).astype(np.uint8)\n", + "\n", + "plt.figure(figsize=(24,32))\n", + "plt.imshow(image_np[0])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FTHsFjR6HNwb" + }, + "source": [ + "## Doing the inference\n", + "\n", + "To do the inference we just need to call our TF Hub loaded model.\n", + "\n", + "Things you can try:\n", + "* Print out `result['detection_boxes']` and try to match the box locations to the boxes in the image. Notice that coordinates are given in normalized form (i.e., in the interval [0, 1]).\n", + "* inspect other output keys present in the result. A full documentation can be seen on the models documentation page (pointing your browser to the model handle printed earlier)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Gb_siXKcnnGC" + }, + "outputs": [], + "source": [ + "# running inference\n", + "results = hub_model(image_np)\n", + "\n", + "# different object detection models have additional results\n", + "# all of them are explained in the documentation\n", + "result = {key:value.numpy() for key,value in results.items()}\n", + "print(result.keys())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IZ5VYaBoeeFM" + }, + "source": [ + "## Visualizing the results\n", + "\n", + "Here is where we will need the TensorFlow Object Detection API to show the squares from the inference step (and the keypoints when available).\n", + "\n", + "the full documentation of this method can be seen [here](https://github.com/tensorflow/models/blob/master/research/object_detection/utils/visualization_utils.py)\n", + "\n", + "Here you can, for example, set `min_score_thresh` to other values (between 0 and 1) to allow more detections in or to filter out more detections." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2O7rV8g9s8Bz" + }, + "outputs": [], + "source": [ + "label_id_offset = 0\n", + "image_np_with_detections = image_np.copy()\n", + "\n", + "# Use keypoints if available in detections\n", + "keypoints, keypoint_scores = None, None\n", + "if 'detection_keypoints' in result:\n", + " keypoints = result['detection_keypoints'][0]\n", + " keypoint_scores = result['detection_keypoint_scores'][0]\n", + "\n", + "viz_utils.visualize_boxes_and_labels_on_image_array(\n", + " image_np_with_detections[0],\n", + " result['detection_boxes'][0],\n", + " (result['detection_classes'][0] + label_id_offset).astype(int),\n", + " result['detection_scores'][0],\n", + " category_index,\n", + " use_normalized_coordinates=True,\n", + " max_boxes_to_draw=200,\n", + " min_score_thresh=.30,\n", + " agnostic_mode=False,\n", + " keypoints=keypoints,\n", + " keypoint_scores=keypoint_scores,\n", + " keypoint_edges=COCO17_HUMAN_POSE_KEYPOINTS)\n", + "\n", + "plt.figure(figsize=(24,32))\n", + "plt.imshow(image_np_with_detections[0])\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Qaw6Xi08NpEP" + }, + "source": [ + "## [Optional]\n", + "\n", + "Among the available object detection models there's Mask R-CNN and the output of this model allows instance segmentation.\n", + "\n", + "To visualize it we will use the same method we did before but adding an additional parameter: `instance_masks=output_dict.get('detection_masks_reframed', None)`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zl3qdtR1OvM_" + }, + "outputs": [], + "source": [ + "# Handle models with masks:\n", + "image_np_with_mask = image_np.copy()\n", + "\n", + "if 'detection_masks' in result:\n", + " # we need to convert np.arrays to tensors\n", + " detection_masks = tf.convert_to_tensor(result['detection_masks'][0])\n", + " detection_boxes = tf.convert_to_tensor(result['detection_boxes'][0])\n", + "\n", + " # Reframe the bbox mask to the image size.\n", + " detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(\n", + " detection_masks, detection_boxes,\n", + " image_np.shape[1], image_np.shape[2])\n", + " detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,\n", + " tf.uint8)\n", + " result['detection_masks_reframed'] = detection_masks_reframed.numpy()\n", + "\n", + "viz_utils.visualize_boxes_and_labels_on_image_array(\n", + " image_np_with_mask[0],\n", + " result['detection_boxes'][0],\n", + " (result['detection_classes'][0] + label_id_offset).astype(int),\n", + " result['detection_scores'][0],\n", + " category_index,\n", + " use_normalized_coordinates=True,\n", + " max_boxes_to_draw=200,\n", + " min_score_thresh=.30,\n", + " agnostic_mode=False,\n", + " instance_masks=result.get('detection_masks_reframed', None),\n", + " line_thickness=8)\n", + "\n", + "plt.figure(figsize=(24,32))\n", + "plt.imshow(image_np_with_mask[0])\n", + "plt.show()" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "tf2_object_detection.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb b/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb new file mode 100644 index 00000000000..786065ff5a5 --- /dev/null +++ b/site/en/hub/tutorials/tf2_semantic_approximate_nearest_neighbors.ipynb @@ -0,0 +1,790 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "ACbjNjyO4f_8" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MCM50vaM4jiK" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9qOVy-_vmuUP" + }, + "source": [ + "# Semantic Search with Approximate Nearest Neighbors and Text Embeddings\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3T4d77AJaKte" + }, + "source": [ + "This tutorial illustrates how to generate embeddings from a [TensorFlow Hub](https://tfhub.dev) (TF-Hub) model given input data, and build an approximate nearest neighbours (ANN) index using the extracted embeddings. The index can then be used for real-time similarity matching and retrieval.\n", + "\n", + "When dealing with a large corpus of data, it's not efficient to perform exact matching by scanning the whole repository to find the most similar items to a given query in real-time. Thus, we use an approximate similarity matching algorithm which allows us to trade off a little bit of accuracy in finding exact nearest neighbor matches for a significant boost in speed.\n", + "\n", + "In this tutorial, we show an example of real-time text search over a corpus of news headlines to find the headlines that are most similar to a query. Unlike keyword search, this captures the semantic similarity encoded in the text embedding.\n", + "\n", + "The steps of this tutorial are:\n", + "1. Download sample data.\n", + "2. Generate embeddings for the data using a TF-Hub model\n", + "3. Build an ANN index for the embeddings\n", + "4. Use the index for similarity matching\n", + "\n", + "We use [Apache Beam](https://beam.apache.org/documentation/programming-guide/) to generate the embeddings from the TF-Hub model. We also use Spotify's [ANNOY](https://github.com/spotify/annoy) library to build the approximate nearest neighbor index." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nM17v_mEVSnd" + }, + "source": [ + "### More models\n", + "For models that have the same architecture but were trained on a different language, refer to [this](https://tfhub.dev/google/collections/nnlm/1) collection. [Here](https://tfhub.dev/s?module-type=text-embedding) you can find all text embeddings that are currently hosted on [tfhub.dev](https://tfhub.dev/). " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q0jr0QK9qO5P" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "whMRj9qeqed4" + }, + "source": [ + "Install the required libraries." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qmXkLPoaqS--" + }, + "outputs": [], + "source": [ + "!pip install apache_beam\n", + "!pip install 'scikit_learn~=0.23.0' # For gaussian_random_matrix.\n", + "!pip install annoy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A-vBZiCCqld0" + }, + "source": [ + "Import the required libraries" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6NTYbdWcseuK" + }, + "outputs": [], + "source": [ + "import os\n", + "import sys\n", + "import pickle\n", + "from collections import namedtuple\n", + "from datetime import datetime\n", + "import numpy as np\n", + "import apache_beam as beam\n", + "from apache_beam.transforms import util\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import annoy\n", + "from sklearn.random_projection import gaussian_random_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tx0SZa6-7b-f" + }, + "outputs": [], + "source": [ + "print('TF version: {}'.format(tf.__version__))\n", + "print('TF-Hub version: {}'.format(hub.__version__))\n", + "print('Apache Beam version: {}'.format(beam.__version__))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P6Imq876rLWx" + }, + "source": [ + "## 1. Download Sample Data\n", + "\n", + "[A Million News Headlines](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/SYBGZL#) dataset contains news headlines published over a period of 15 years sourced from the reputable Australian Broadcasting Corp. (ABC). This news dataset has a summarised historical record of noteworthy events in the globe from early-2003 to end-2017 with a more granular focus on Australia. \n", + "\n", + "**Format**: Tab-separated two-column data: 1) publication date and 2) headline text. We are only interested in the headline text.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "OpF57n8e5C9D" + }, + "outputs": [], + "source": [ + "!wget 'https://dataverse.harvard.edu/api/access/datafile/3450625?format=tab&gbrecs=true' -O raw.tsv\n", + "!wc -l raw.tsv\n", + "!head raw.tsv" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Reeoc9z0zTxJ" + }, + "source": [ + "For simplicity, we only keep the headline text and remove the publication date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "INPWa4upv_yJ" + }, + "outputs": [], + "source": [ + "!rm -r corpus\n", + "!mkdir corpus\n", + "\n", + "with open('corpus/text.txt', 'w') as out_file:\n", + " with open('raw.tsv', 'r') as in_file:\n", + " for line in in_file:\n", + " headline = line.split('\\t')[1].strip().strip('\"')\n", + " out_file.write(headline+\"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5-oedX40z6o2" + }, + "outputs": [], + "source": [ + "!tail corpus/text.txt" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2AngMtH50jNb" + }, + "source": [ + "## 2. Generate Embeddings for the Data.\n", + "\n", + "In this tutorial, we use the [Neural Network Language Model (NNLM)](https://tfhub.dev/google/nnlm-en-dim128/2) to generate embeddings for the headline data. The sentence embeddings can then be easily used to compute sentence level meaning similarity. We run the embedding generation process using Apache Beam." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "F_DvXnDB1pEX" + }, + "source": [ + "### Embedding extraction method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yL7OEY1E0A35" + }, + "outputs": [], + "source": [ + "embed_fn = None\n", + "\n", + "def generate_embeddings(text, model_url, random_projection_matrix=None):\n", + " # Beam will run this function in different processes that need to\n", + " # import hub and load embed_fn (if not previously loaded)\n", + " global embed_fn\n", + " if embed_fn is None:\n", + " embed_fn = hub.load(model_url)\n", + " embedding = embed_fn(text).numpy()\n", + " if random_projection_matrix is not None:\n", + " embedding = embedding.dot(random_projection_matrix)\n", + " return text, embedding\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "g6pXBVxsVUbm" + }, + "source": [ + "### Convert to tf.Example method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JMjqjWZNVVzd" + }, + "outputs": [], + "source": [ + "def to_tf_example(entries):\n", + " examples = []\n", + "\n", + " text_list, embedding_list = entries\n", + " for i in range(len(text_list)):\n", + " text = text_list[i]\n", + " embedding = embedding_list[i]\n", + "\n", + " features = {\n", + " 'text': tf.train.Feature(\n", + " bytes_list=tf.train.BytesList(value=[text.encode('utf-8')])),\n", + " 'embedding': tf.train.Feature(\n", + " float_list=tf.train.FloatList(value=embedding.tolist()))\n", + " }\n", + " \n", + " example = tf.train.Example(\n", + " features=tf.train.Features(\n", + " feature=features)).SerializeToString(deterministic=True)\n", + " \n", + " examples.append(example)\n", + " \n", + " return examples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gDiV4uQCVYGH" + }, + "source": [ + "### Beam pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jCGUIB172m2G" + }, + "outputs": [], + "source": [ + "def run_hub2emb(args):\n", + " '''Runs the embedding generation pipeline'''\n", + "\n", + " options = beam.options.pipeline_options.PipelineOptions(**args)\n", + " args = namedtuple(\"options\", args.keys())(*args.values())\n", + "\n", + " with beam.Pipeline(args.runner, options=options) as pipeline:\n", + " (\n", + " pipeline\n", + " | 'Read sentences from files' >> beam.io.ReadFromText(\n", + " file_pattern=args.data_dir)\n", + " | 'Batch elements' >> util.BatchElements(\n", + " min_batch_size=args.batch_size, max_batch_size=args.batch_size)\n", + " | 'Generate embeddings' >> beam.Map(\n", + " generate_embeddings, args.model_url, args.random_projection_matrix)\n", + " | 'Encode to tf example' >> beam.FlatMap(to_tf_example)\n", + " | 'Write to TFRecords files' >> beam.io.WriteToTFRecord(\n", + " file_path_prefix='{}/emb'.format(args.output_dir),\n", + " file_name_suffix='.tfrecords')\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nlbQdiYNVvne" + }, + "source": [ + "### Generating Random Projection Weight Matrix\n", + "\n", + "[Random projection](https://en.wikipedia.org/wiki/Random_projection) is a simple, yet powerful technique used to reduce the dimensionality of a set of points which lie in Euclidean space. For a theoretical background, see the [Johnson-Lindenstrauss lemma](https://en.wikipedia.org/wiki/Johnson%E2%80%93Lindenstrauss_lemma).\n", + "\n", + "Reducing the dimensionality of the embeddings with random projection means less time needed to build and query the ANN index.\n", + "\n", + "In this tutorial we use [Gaussian Random Projection](https://en.wikipedia.org/wiki/Random_projection#Gaussian_random_projection) from the [Scikit-learn](https://scikit-learn.org/stable/modules/random_projection.html#gaussian-random-projection) library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1yw1xgtNVv52" + }, + "outputs": [], + "source": [ + "def generate_random_projection_weights(original_dim, projected_dim):\n", + " random_projection_matrix = None\n", + " random_projection_matrix = gaussian_random_matrix(\n", + " n_components=projected_dim, n_features=original_dim).T\n", + " print(\"A Gaussian random weight matrix was creates with shape of {}\".format(random_projection_matrix.shape))\n", + " print('Storing random projection matrix to disk...')\n", + " with open('random_projection_matrix', 'wb') as handle:\n", + " pickle.dump(random_projection_matrix, \n", + " handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + " \n", + " return random_projection_matrix" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aJZUfT3NE7kj" + }, + "source": [ + "### Set parameters\n", + "If you want to build an index using the original embedding space without random projection, set the `projected_dim` parameter to `None`. Note that this will slow down the indexing step for high-dimensional embeddings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "77-Cow7uE74T" + }, + "outputs": [], + "source": [ + "model_url = 'https://tfhub.dev/google/nnlm-en-dim128/2' #@param {type:\"string\"}\n", + "projected_dim = 64 #@param {type:\"number\"}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "On-MbzD922kb" + }, + "source": [ + "### Run pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Y3I1Wv4i21yY" + }, + "outputs": [], + "source": [ + "import tempfile\n", + "\n", + "output_dir = tempfile.mkdtemp()\n", + "original_dim = hub.load(model_url)(['']).shape[1]\n", + "random_projection_matrix = None\n", + "\n", + "if projected_dim:\n", + " random_projection_matrix = generate_random_projection_weights(\n", + " original_dim, projected_dim)\n", + "\n", + "args = {\n", + " 'job_name': 'hub2emb-{}'.format(datetime.utcnow().strftime('%y%m%d-%H%M%S')),\n", + " 'runner': 'DirectRunner',\n", + " 'batch_size': 1024,\n", + " 'data_dir': 'corpus/*.txt',\n", + " 'output_dir': output_dir,\n", + " 'model_url': model_url,\n", + " 'random_projection_matrix': random_projection_matrix,\n", + "}\n", + "\n", + "print(\"Pipeline args are set.\")\n", + "args" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "iS9obmeP4ZOA" + }, + "outputs": [], + "source": [ + "print(\"Running pipeline...\")\n", + "%time run_hub2emb(args)\n", + "print(\"Pipeline is done.\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JAwOo7gQWvVd" + }, + "outputs": [], + "source": [ + "!ls {output_dir}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HVnee4e6U90u" + }, + "source": [ + "Read some of the generated embeddings..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-K7pGXlXOj1N" + }, + "outputs": [], + "source": [ + "embed_file = os.path.join(output_dir, 'emb-00000-of-00001.tfrecords')\n", + "sample = 5\n", + "\n", + "# Create a description of the features.\n", + "feature_description = {\n", + " 'text': tf.io.FixedLenFeature([], tf.string),\n", + " 'embedding': tf.io.FixedLenFeature([projected_dim], tf.float32)\n", + "}\n", + "\n", + "def _parse_example(example):\n", + " # Parse the input `tf.Example` proto using the dictionary above.\n", + " return tf.io.parse_single_example(example, feature_description)\n", + "\n", + "dataset = tf.data.TFRecordDataset(embed_file)\n", + "for record in dataset.take(sample).map(_parse_example):\n", + " print(\"{}: {}\".format(record['text'].numpy().decode('utf-8'), record['embedding'].numpy()[:10]))\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "agGoaMSgY8wN" + }, + "source": [ + "## 3. Build the ANN Index for the Embeddings\n", + "\n", + "[ANNOY](https://github.com/spotify/annoy) (Approximate Nearest Neighbors Oh Yeah) is a C++ library with Python bindings to search for points in space that are close to a given query point. It also creates large read-only file-based data structures that are mapped into memory. It is built and used by [Spotify](https://www.spotify.com) for music recommendations. If you are interested you can play along with other alternatives to ANNOY such as [NGT](https://github.com/yahoojapan/NGT), [FAISS](https://github.com/facebookresearch/faiss), etc. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UcPDspU3WjgH" + }, + "outputs": [], + "source": [ + "def build_index(embedding_files_pattern, index_filename, vector_length, \n", + " metric='angular', num_trees=100):\n", + " '''Builds an ANNOY index'''\n", + "\n", + " annoy_index = annoy.AnnoyIndex(vector_length, metric=metric)\n", + " # Mapping between the item and its identifier in the index\n", + " mapping = {}\n", + "\n", + " embed_files = tf.io.gfile.glob(embedding_files_pattern)\n", + " num_files = len(embed_files)\n", + " print('Found {} embedding file(s).'.format(num_files))\n", + "\n", + " item_counter = 0\n", + " for i, embed_file in enumerate(embed_files):\n", + " print('Loading embeddings in file {} of {}...'.format(i+1, num_files))\n", + " dataset = tf.data.TFRecordDataset(embed_file)\n", + " for record in dataset.map(_parse_example):\n", + " text = record['text'].numpy().decode(\"utf-8\")\n", + " embedding = record['embedding'].numpy()\n", + " mapping[item_counter] = text\n", + " annoy_index.add_item(item_counter, embedding)\n", + " item_counter += 1\n", + " if item_counter % 100000 == 0:\n", + " print('{} items loaded to the index'.format(item_counter))\n", + "\n", + " print('A total of {} items added to the index'.format(item_counter))\n", + "\n", + " print('Building the index with {} trees...'.format(num_trees))\n", + " annoy_index.build(n_trees=num_trees)\n", + " print('Index is successfully built.')\n", + " \n", + " print('Saving index to disk...')\n", + " annoy_index.save(index_filename)\n", + " print('Index is saved to disk.')\n", + " print(\"Index file size: {} GB\".format(\n", + " round(os.path.getsize(index_filename) / float(1024 ** 3), 2)))\n", + " annoy_index.unload()\n", + "\n", + " print('Saving mapping to disk...')\n", + " with open(index_filename + '.mapping', 'wb') as handle:\n", + " pickle.dump(mapping, handle, protocol=pickle.HIGHEST_PROTOCOL)\n", + " print('Mapping is saved to disk.')\n", + " print(\"Mapping file size: {} MB\".format(\n", + " round(os.path.getsize(index_filename + '.mapping') / float(1024 ** 2), 2)))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "AgyOQhUq6FNE" + }, + "outputs": [], + "source": [ + "embedding_files = \"{}/emb-*.tfrecords\".format(output_dir)\n", + "embedding_dimension = projected_dim\n", + "index_filename = \"index\"\n", + "\n", + "!rm {index_filename}\n", + "!rm {index_filename}.mapping\n", + "\n", + "%time build_index(embedding_files, index_filename, embedding_dimension)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ic31Tm5cgAd5" + }, + "outputs": [], + "source": [ + "!ls" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "maGxDl8ufP-p" + }, + "source": [ + "## 4. Use the Index for Similarity Matching\n", + "Now we can use the ANN index to find news headlines that are semantically close to an input query." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_dIs8W78fYPp" + }, + "source": [ + "### Load the index and the mapping files" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jlTTrbQHayvb" + }, + "outputs": [], + "source": [ + "index = annoy.AnnoyIndex(embedding_dimension)\n", + "index.load(index_filename, prefault=True)\n", + "print('Annoy index is loaded.')\n", + "with open(index_filename + '.mapping', 'rb') as handle:\n", + " mapping = pickle.load(handle)\n", + "print('Mapping file is loaded.')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y6liFMSUh08J" + }, + "source": [ + "### Similarity matching method" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mUxjTag8hc16" + }, + "outputs": [], + "source": [ + "def find_similar_items(embedding, num_matches=5):\n", + " '''Finds similar items to a given embedding in the ANN index'''\n", + " ids = index.get_nns_by_vector(\n", + " embedding, num_matches, search_k=-1, include_distances=False)\n", + " items = [mapping[i] for i in ids]\n", + " return items" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hjerNpmZja0A" + }, + "source": [ + "### Extract embedding from a given query" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a0IIXzfBjZ19" + }, + "outputs": [], + "source": [ + "# Load the TF-Hub model\n", + "print(\"Loading the TF-Hub model...\")\n", + "%time embed_fn = hub.load(model_url)\n", + "print(\"TF-Hub model is loaded.\")\n", + "\n", + "random_projection_matrix = None\n", + "if os.path.exists('random_projection_matrix'):\n", + " print(\"Loading random projection matrix...\")\n", + " with open('random_projection_matrix', 'rb') as handle:\n", + " random_projection_matrix = pickle.load(handle)\n", + " print('random projection matrix is loaded.')\n", + "\n", + "def extract_embeddings(query):\n", + " '''Generates the embedding for the query'''\n", + " query_embedding = embed_fn([query])[0].numpy()\n", + " if random_projection_matrix is not None:\n", + " query_embedding = query_embedding.dot(random_projection_matrix)\n", + " return query_embedding\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kCoCNROujEIO" + }, + "outputs": [], + "source": [ + "extract_embeddings(\"Hello Machine Learning!\")[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "koINo8Du--8C" + }, + "source": [ + "### Enter a query to find the most similar items" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "wC0uLjvfk5nB" + }, + "outputs": [], + "source": [ + "#@title { run: \"auto\" }\n", + "query = \"confronting global challenges\" #@param {type:\"string\"}\n", + "\n", + "print(\"Generating embedding for the query...\")\n", + "%time query_embedding = extract_embeddings(query)\n", + "\n", + "print(\"\")\n", + "print(\"Finding relevant items in the index...\")\n", + "%time items = find_similar_items(query_embedding, 10)\n", + "\n", + "print(\"\")\n", + "print(\"Results:\")\n", + "print(\"=========\")\n", + "for item in items:\n", + " print(item)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TkRSqs77tDuX" + }, + "source": [ + "## Want to learn more?\n", + "\n", + "You can learn more about TensorFlow at [tensorflow.org](https://www.tensorflow.org/) and see the TF-Hub API documentation at [tensorflow.org/hub](https://www.tensorflow.org/hub/). Find available TensorFlow Hub models at [tfhub.dev](https://tfhub.dev/) including more text embedding models and image feature vector models.\n", + "\n", + "Also check out the [Machine Learning Crash Course](https://developers.google.com/machine-learning/crash-course/) which is Google's fast-paced, practical introduction to machine learning." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "ACbjNjyO4f_8", + "g6pXBVxsVUbm" + ], + "name": "tf2_semantic_approximate_nearest_neighbors.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf2_text_classification.ipynb b/site/en/hub/tutorials/tf2_text_classification.ipynb new file mode 100644 index 00000000000..e2dae15bde0 --- /dev/null +++ b/site/en/hub/tutorials/tf2_text_classification.ipynb @@ -0,0 +1,571 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Ic4_occAAiAT" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "ioaprt5q5US7" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "yCl0eTNH5RS3" + }, + "outputs": [], + "source": [ + "#@title MIT License\n", + "#\n", + "# Copyright (c) 2017 François Chollet # IGNORE_COPYRIGHT: cleared by OSS licensing\n", + "#\n", + "# Permission is hereby granted, free of charge, to any person obtaining a\n", + "# copy of this software and associated documentation files (the \"Software\"),\n", + "# to deal in the Software without restriction, including without limitation\n", + "# the rights to use, copy, modify, merge, publish, distribute, sublicense,\n", + "# and/or sell copies of the Software, and to permit persons to whom the\n", + "# Software is furnished to do so, subject to the following conditions:\n", + "#\n", + "# The above copyright notice and this permission notice shall be included in\n", + "# all copies or substantial portions of the Software.\n", + "#\n", + "# THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n", + "# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n", + "# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n", + "# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n", + "# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n", + "# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n", + "# DEALINGS IN THE SOFTWARE." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ItXfxkxvosLH" + }, + "source": [ + "# Text Classification with Movie Reviews" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Eg62Pmz3o83v" + }, + "source": [ + "This notebook classifies movie reviews as *positive* or *negative* using the text of the review. This is an example of *binary*—or two-class—classification, an important and widely applicable kind of machine learning problem. \n", + "\n", + "We'll use the [IMDB dataset](https://www.tensorflow.org/api_docs/python/tf/keras/datasets/imdb) that contains the text of 50,000 movie reviews from the [Internet Movie Database](https://www.imdb.com/). These are split into 25,000 reviews for training and 25,000 reviews for testing. The training and testing sets are *balanced*, meaning they contain an equal number of positive and negative reviews. \n", + "\n", + "This notebook uses [tf.keras](https://www.tensorflow.org/api_docs/python/tf/keras), a high-level API to build and train models in TensorFlow, and [TensorFlow Hub](https://www.tensorflow.org/hub), a library and platform for transfer learning. For a more advanced text classification tutorial using `tf.keras`, see the [MLCC Text Classification Guide](https://developers.google.com/machine-learning/guides/text-classification/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qrk8NjzhSBh-" + }, + "source": [ + "### More models\n", + "[Here](https://tfhub.dev/s?module-type=text-embedding) you can find more expressive or performant models that you could use to generate the text embedding." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2ew7HTbPpCJH" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import tensorflow_datasets as tfds\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "print(\"Version: \", tf.__version__)\n", + "print(\"Eager mode: \", tf.executing_eagerly())\n", + "print(\"Hub version: \", hub.__version__)\n", + "print(\"GPU is\", \"available\" if tf.config.list_physical_devices('GPU') else \"NOT AVAILABLE\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iAsKG535pHep" + }, + "source": [ + "## Download the IMDB dataset\n", + "\n", + "The IMDB dataset is available on [TensorFlow datasets](https://github.com/tensorflow/datasets). The following code downloads the IMDB dataset to your machine (or the colab runtime):" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zXXx5Oc3pOmN" + }, + "outputs": [], + "source": [ + "train_data, test_data = tfds.load(name=\"imdb_reviews\", split=[\"train\", \"test\"], \n", + " batch_size=-1, as_supervised=True)\n", + "\n", + "train_examples, train_labels = tfds.as_numpy(train_data)\n", + "test_examples, test_labels = tfds.as_numpy(test_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "l50X3GfjpU4r" + }, + "source": [ + "## Explore the data \n", + "\n", + "Let's take a moment to understand the format of the data. Each example is a sentence representing the movie review and a corresponding label. The sentence is not preprocessed in any way. The label is an integer value of either 0 or 1, where 0 is a negative review, and 1 is a positive review." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "y8qCnve_-lkO" + }, + "outputs": [], + "source": [ + "print(\"Training entries: {}, test entries: {}\".format(len(train_examples), len(test_examples)))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RnKvHWW4-lkW" + }, + "source": [ + "Let's print first 10 examples." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QtTS4kpEpjbi" + }, + "outputs": [], + "source": [ + "train_examples[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IFtaCHTdc-GY" + }, + "source": [ + "Let's also print the first 10 labels." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tvAjVXOWc6Mj" + }, + "outputs": [], + "source": [ + "train_labels[:10]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LLC02j2g-llC" + }, + "source": [ + "## Build the model\n", + "\n", + "The neural network is created by stacking layers—this requires three main architectural decisions:\n", + "\n", + "* How to represent the text?\n", + "* How many layers to use in the model?\n", + "* How many *hidden units* to use for each layer?\n", + "\n", + "In this example, the input data consists of sentences. The labels to predict are either 0 or 1.\n", + "\n", + "One way to represent the text is to convert sentences into embeddings vectors. We can use a pre-trained text embedding as the first layer, which will have two advantages:\n", + "* we don't have to worry about text preprocessing,\n", + "* we can benefit from transfer learning.\n", + "\n", + "For this example we will use a model from [TensorFlow Hub](https://www.tensorflow.org/hub) called [google/nnlm-en-dim50/2](https://tfhub.dev/google/nnlm-en-dim50/2).\n", + "\n", + "There are two other models to test for the sake of this tutorial:\n", + "* [google/nnlm-en-dim50-with-normalization/2](https://tfhub.dev/google/nnlm-en-dim50-with-normalization/2) - same as [google/nnlm-en-dim50/2](https://tfhub.dev/google/nnlm-en-dim50/2), but with additional text normalization to remove punctuation. This can help to get better coverage of in-vocabulary embeddings for tokens on your input text.\n", + "* [google/nnlm-en-dim128-with-normalization/2](https://tfhub.dev/google/nnlm-en-dim128-with-normalization/2) - A larger model with an embedding dimension of 128 instead of the smaller 50." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "In2nDpTLkgKa" + }, + "source": [ + "Let's first create a Keras layer that uses a TensorFlow Hub model to embed the sentences, and try it out on a couple of input examples. Note that the output shape of the produced embeddings is a expected: `(num_examples, embedding_dimension)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_NUbzVeYkgcO" + }, + "outputs": [], + "source": [ + "model = \"https://tfhub.dev/google/nnlm-en-dim50/2/"\n", + "hub_layer = hub.KerasLayer(model, input_shape=[], dtype=tf.string, trainable=True)\n", + "hub_layer(train_examples[:3])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dfSbV6igl1EH" + }, + "source": [ + "Let's now build the full model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xpKOoWgu-llD" + }, + "outputs": [], + "source": [ + "model = tf.keras.Sequential()\n", + "model.add(hub_layer)\n", + "model.add(tf.keras.layers.Dense(16, activation='relu'))\n", + "model.add(tf.keras.layers.Dense(1))\n", + "\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6PbKQ6mucuKL" + }, + "source": [ + "The layers are stacked sequentially to build the classifier:\n", + "\n", + "1. The first layer is a TensorFlow Hub layer. This layer uses a pre-trained Saved Model to map a sentence into its embedding vector. The model that we are using ([google/nnlm-en-dim50/2](https://tfhub.dev/google/nnlm-en-dim50/2)) splits the sentence into tokens, embeds each token and then combines the embedding. The resulting dimensions are: `(num_examples, embedding_dimension)`.\n", + "2. This fixed-length output vector is piped through a fully-connected (`Dense`) layer with 16 hidden units.\n", + "3. The last layer is densely connected with a single output node. This outputs logits: the log-odds of the true class, according to the model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0XMwnDOp-llH" + }, + "source": [ + "### Hidden units\n", + "\n", + "The above model has two intermediate or \"hidden\" layers, between the input and output. The number of outputs (units, nodes, or neurons) is the dimension of the representational space for the layer. In other words, the amount of freedom the network is allowed when learning an internal representation.\n", + "\n", + "If a model has more hidden units (a higher-dimensional representation space), and/or more layers, then the network can learn more complex representations. However, it makes the network more computationally expensive and may lead to learning unwanted patterns—patterns that improve performance on training data but not on the test data. This is called *overfitting*, and we'll explore it later." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L4EqVWg4-llM" + }, + "source": [ + "### Loss function and optimizer\n", + "\n", + "A model needs a loss function and an optimizer for training. Since this is a binary classification problem and the model outputs a probability (a single-unit layer with a sigmoid activation), we'll use the `binary_crossentropy` loss function. \n", + "\n", + "This isn't the only choice for a loss function, you could, for instance, choose `mean_squared_error`. But, generally, `binary_crossentropy` is better for dealing with probabilities—it measures the \"distance\" between probability distributions, or in our case, between the ground-truth distribution and the predictions.\n", + "\n", + "Later, when we are exploring regression problems (say, to predict the price of a house), we will see how to use another loss function called mean squared error.\n", + "\n", + "Now, configure the model to use an optimizer and a loss function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Mr0GP-cQ-llN" + }, + "outputs": [], + "source": [ + "model.compile(optimizer='adam',\n", + " loss=tf.losses.BinaryCrossentropy(from_logits=True),\n", + " metrics=[tf.metrics.BinaryAccuracy(threshold=0.0, name='accuracy')])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hCWYwkug-llQ" + }, + "source": [ + "## Create a validation set\n", + "\n", + "When training, we want to check the accuracy of the model on data it hasn't seen before. Create a *validation set* by setting apart 10,000 examples from the original training data. (Why not use the testing set now? Our goal is to develop and tune our model using only the training data, then use the test data just once to evaluate our accuracy)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-NpcXY9--llS" + }, + "outputs": [], + "source": [ + "x_val = train_examples[:10000]\n", + "partial_x_train = train_examples[10000:]\n", + "\n", + "y_val = train_labels[:10000]\n", + "partial_y_train = train_labels[10000:]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "35jv_fzP-llU" + }, + "source": [ + "## Train the model\n", + "\n", + "Train the model for 40 epochs in mini-batches of 512 samples. This is 40 iterations over all samples in the `x_train` and `y_train` tensors. While training, monitor the model's loss and accuracy on the 10,000 samples from the validation set:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tXSGrjWZ-llW" + }, + "outputs": [], + "source": [ + "history = model.fit(partial_x_train,\n", + " partial_y_train,\n", + " epochs=40,\n", + " batch_size=512,\n", + " validation_data=(x_val, y_val),\n", + " verbose=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9EEGuDVuzb5r" + }, + "source": [ + "## Evaluate the model\n", + "\n", + "And let's see how the model performs. Two values will be returned. Loss (a number which represents our error, lower values are better), and accuracy." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zOMKywn4zReN" + }, + "outputs": [], + "source": [ + "results = model.evaluate(test_examples, test_labels)\n", + "\n", + "print(results)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "z1iEXVTR0Z2t" + }, + "source": [ + "This fairly naive approach achieves an accuracy of about 87%. With more advanced approaches, the model should get closer to 95%." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5KggXVeL-llZ" + }, + "source": [ + "## Create a graph of accuracy and loss over time\n", + "\n", + "`model.fit()` returns a `History` object that contains a dictionary with everything that happened during training:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VcvSXvhp-llb" + }, + "outputs": [], + "source": [ + "history_dict = history.history\n", + "history_dict.keys()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nRKsqL40-lle" + }, + "source": [ + "There are four entries: one for each monitored metric during training and validation. We can use these to plot the training and validation loss for comparison, as well as the training and validation accuracy:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nGoYf2Js-lle" + }, + "outputs": [], + "source": [ + "acc = history_dict['accuracy']\n", + "val_acc = history_dict['val_accuracy']\n", + "loss = history_dict['loss']\n", + "val_loss = history_dict['val_loss']\n", + "\n", + "epochs = range(1, len(acc) + 1)\n", + "\n", + "# \"bo\" is for \"blue dot\"\n", + "plt.plot(epochs, loss, 'bo', label='Training loss')\n", + "# b is for \"solid blue line\"\n", + "plt.plot(epochs, val_loss, 'b', label='Validation loss')\n", + "plt.title('Training and validation loss')\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('Loss')\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6hXx-xOv-llh" + }, + "outputs": [], + "source": [ + "plt.clf() # clear figure\n", + "\n", + "plt.plot(epochs, acc, 'bo', label='Training acc')\n", + "plt.plot(epochs, val_acc, 'b', label='Validation acc')\n", + "plt.title('Training and validation accuracy')\n", + "plt.xlabel('Epochs')\n", + "plt.ylabel('Accuracy')\n", + "plt.legend()\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oFEmZ5zq-llk" + }, + "source": [ + "In this plot, the dots represent the training loss and accuracy, and the solid lines are the validation loss and accuracy.\n", + "\n", + "Notice the training loss *decreases* with each epoch and the training accuracy *increases* with each epoch. This is expected when using a gradient descent optimization—it should minimize the desired quantity on every iteration.\n", + "\n", + "This isn't the case for the validation loss and accuracy—they seem to peak after about twenty epochs. This is an example of overfitting: the model performs better on the training data than it does on data it has never seen before. After this point, the model over-optimizes and learns representations *specific* to the training data that do not *generalize* to test data.\n", + "\n", + "For this particular case, we could prevent overfitting by simply stopping the training after twenty or so epochs. Later, you'll see how to do this automatically with a callback." + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "tf2_text_classification.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf_hub_delf_module.ipynb b/site/en/hub/tutorials/tf_hub_delf_module.ipynb new file mode 100644 index 00000000000..b6dec2eae00 --- /dev/null +++ b/site/en/hub/tutorials/tf_hub_delf_module.ipynb @@ -0,0 +1,372 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "RUymE2l9GZfO" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "code", + "id": "JMyTNwSJGGWg" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0DmDwGPOGfaQ" + }, + "source": [ + "# How to match images using DELF and TensorFlow Hub\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f3nk38tIKytQ" + }, + "source": [ + "TensorFlow Hub (TF-Hub) is a platform to share machine learning expertise packaged in reusable resources, notably pre-trained **modules**.\n", + "\n", + "In this colab, we will use a module that packages the [DELF](https://github.com/tensorflow/models/tree/master/research/delf) neural network and logic for processing images to identify keypoints and their descriptors. The weights of the neural network were trained on images of landmarks as described in [this paper](https://arxiv.org/abs/1612.06321)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "lrKaWOB_cuS3" + }, + "outputs": [], + "source": [ + "!pip install scikit-image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "SI7eVflHHxvi" + }, + "outputs": [], + "source": [ + "from absl import logging\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from PIL import Image, ImageOps\n", + "from scipy.spatial import cKDTree\n", + "from skimage.feature import plot_matched_features\n", + "from skimage.measure import ransac\n", + "from skimage.transform import AffineTransform\n", + "from six import BytesIO\n", + "\n", + "import tensorflow as tf\n", + "\n", + "import tensorflow_hub as hub\n", + "from six.moves.urllib.request import urlopen" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qquo2HiONiDK" + }, + "source": [ + "## The data\n", + "\n", + "In the next cell, we specify the URLs of two images we would like to process with DELF in order to match and compare them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "l93ye4WFIqIV" + }, + "outputs": [], + "source": [ + "#@title Choose images\n", + "images = \"Bridge of Sighs\" #@param [\"Bridge of Sighs\", \"Golden Gate\", \"Acropolis\", \"Eiffel tower\"]\n", + "if images == \"Bridge of Sighs\":\n", + " # from: https://commons.wikimedia.org/wiki/File:Bridge_of_Sighs,_Oxford.jpg\n", + " # by: N.H. Fischer\n", + " IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/2/28/Bridge_of_Sighs%2C_Oxford.jpg'\n", + " # from https://commons.wikimedia.org/wiki/File:The_Bridge_of_Sighs_and_Sheldonian_Theatre,_Oxford.jpg\n", + " # by: Matthew Hoser\n", + " IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/c/c3/The_Bridge_of_Sighs_and_Sheldonian_Theatre%2C_Oxford.jpg'\n", + "elif images == \"Golden Gate\":\n", + " IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/1/1e/Golden_gate2.jpg'\n", + " IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/3/3e/GoldenGateBridge.jpg'\n", + "elif images == \"Acropolis\":\n", + " IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/c/ce/2006_01_21_Ath%C3%A8nes_Parth%C3%A9non.JPG'\n", + " IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/5/5c/ACROPOLIS_1969_-_panoramio_-_jean_melis.jpg'\n", + "else:\n", + " IMAGE_1_URL = 'https://upload.wikimedia.org/wikipedia/commons/d/d8/Eiffel_Tower%2C_November_15%2C_2011.jpg'\n", + " IMAGE_2_URL = 'https://upload.wikimedia.org/wikipedia/commons/a/a8/Eiffel_Tower_from_immediately_beside_it%2C_Paris_May_2008.jpg'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ttlHtcmiN6QF" + }, + "source": [ + "Download, resize, save and display the images." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "E6RMomGJSfeb" + }, + "outputs": [], + "source": [ + "def download_and_resize(name, url, new_width=256, new_height=256):\n", + " path = tf.keras.utils.get_file(url.split('/')[-1], url)\n", + " image = Image.open(path)\n", + " image = ImageOps.fit(image, (new_width, new_height), Image.LANCZOS)\n", + " return image" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "reajtO7XSj7Y" + }, + "outputs": [], + "source": [ + "image1 = download_and_resize('image_1.jpg', IMAGE_1_URL)\n", + "image2 = download_and_resize('image_2.jpg', IMAGE_2_URL)\n", + "\n", + "plt.subplot(1,2,1)\n", + "plt.imshow(image1)\n", + "plt.subplot(1,2,2)\n", + "plt.imshow(image2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "leKqkoT9OP7r" + }, + "source": [ + "## Apply the DELF module to the data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A3WoT1-SPoTI" + }, + "source": [ + "The DELF module takes an image as input and will describe noteworthy points with vectors. The following cell contains the core of this colab's logic." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pXr2tUhvp1Ue" + }, + "outputs": [], + "source": [ + "delf = hub.load('https://tfhub.dev/google/delf/1').signatures['default']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pvAU_gUHoYcY" + }, + "outputs": [], + "source": [ + "def run_delf(image):\n", + " np_image = np.array(image)\n", + " float_image = tf.image.convert_image_dtype(np_image, tf.float32)\n", + "\n", + " return delf(\n", + " image=float_image,\n", + " score_threshold=tf.constant(100.0),\n", + " image_scales=tf.constant([0.25, 0.3536, 0.5, 0.7071, 1.0, 1.4142, 2.0]),\n", + " max_feature_num=tf.constant(1000))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FEzgHAT0UDNP" + }, + "outputs": [], + "source": [ + "result1 = run_delf(image1)\n", + "result2 = run_delf(image2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NByyBA5yOL2b" + }, + "source": [ + "## Use the locations and description vectors to match the images" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "mVaKXT3cMSib" + }, + "outputs": [], + "source": [ + "#@title TensorFlow is not needed for this post-processing and visualization\n", + "def match_images(image1, image2, result1, result2):\n", + " distance_threshold = 0.8\n", + "\n", + " # Read features.\n", + " num_features_1 = result1['locations'].shape[0]\n", + " print(\"Loaded image 1's %d features\" % num_features_1)\n", + " \n", + " num_features_2 = result2['locations'].shape[0]\n", + " print(\"Loaded image 2's %d features\" % num_features_2)\n", + "\n", + " # Find nearest-neighbor matches using a KD tree.\n", + " d1_tree = cKDTree(result1['descriptors'])\n", + " _, indices = d1_tree.query(\n", + " result2['descriptors'],\n", + " distance_upper_bound=distance_threshold)\n", + "\n", + " # Select feature locations for putative matches.\n", + " locations_2_to_use = np.array([\n", + " result2['locations'][i,]\n", + " for i in range(num_features_2)\n", + " if indices[i] != num_features_1\n", + " ])\n", + " locations_1_to_use = np.array([\n", + " result1['locations'][indices[i],]\n", + " for i in range(num_features_2)\n", + " if indices[i] != num_features_1\n", + " ])\n", + "\n", + " # Perform geometric verification using RANSAC.\n", + " _, inliers = ransac(\n", + " (locations_1_to_use, locations_2_to_use),\n", + " AffineTransform,\n", + " min_samples=3,\n", + " residual_threshold=20,\n", + " max_trials=1000)\n", + "\n", + " print('Found %d inliers' % sum(inliers))\n", + "\n", + " # Visualize correspondences.\n", + " _, ax = plt.subplots()\n", + " inlier_idxs = np.nonzero(inliers)[0]\n", + " plot_matched_features(\n", + " image1,\n", + " image2,\n", + " keypoints0=locations_1_to_use,\n", + " keypoints1=locations_2_to_use,\n", + " matches=np.column_stack((inlier_idxs, inlier_idxs)),\n", + " ax=ax,\n", + " )\n", + "\n", + " ax.axis('off')\n", + " ax.set_title('DELF correspondences')\n", + "\n", + " for line in ax.lines:\n", + " line.set_color('b')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tpEgqOvCYlPY" + }, + "outputs": [], + "source": [ + "match_images(image1, image2, result1, result2)" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "RUymE2l9GZfO" + ], + "name": "tf_hub_delf_module.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf_hub_film_example.ipynb b/site/en/hub/tutorials/tf_hub_film_example.ipynb new file mode 100644 index 00000000000..83bcd4bd12c --- /dev/null +++ b/site/en/hub/tutorials/tf_hub_film_example.ipynb @@ -0,0 +1,576 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "qNLUPuRpkFv_" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "DQcWZm0FkPk-" + }, + "outputs": [], + "source": [ + "#@title Copyright 2022 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Exbxve1rHlrF" + }, + "source": [ + "# Frame interpolation using the FILM model\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jMWFVTlbrQ8m" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "61H28S7ArUAZ" + }, + "source": [ + "Frame interpolation is the task of synthesizing many in-between images from a given set of images. The technique is often used for frame rate upsampling or creating slow-motion video effects.\n", + "\n", + "In this colab, you will use the FILM model to do frame interpolation. The colab also provides code snippets to create videos from the interpolated in-between images.\n", + "\n", + "For more information on FILM research, you can read more here:\n", + "- Google AI Blog: [Large Motion Frame Interpolation](https://ai.googleblog.com/2022/10/large-motion-frame-interpolation.html)\n", + "- Project Page: FILM: [Frame Interpolation for Large Motion](https://film-net.github.io/)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dVX7s6zMulsu" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oi5t2OEJsGBW" + }, + "outputs": [], + "source": [ + "!pip install mediapy\n", + "!sudo apt-get install -y ffmpeg" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BA1tq39MjOiF" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "\n", + "import requests\n", + "import numpy as np\n", + "\n", + "from typing import Generator, Iterable, List, Optional\n", + "import mediapy as media" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GTgXmeYGnT7q" + }, + "source": [ + "## Load the model from TFHub\n", + "\n", + "To load a model from TensorFlow Hub you need the tfhub library and the model handle which is its documentation url." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GojhvyAtjUt0" + }, + "outputs": [], + "source": [ + "model = hub.load(\"https://tfhub.dev/google/film/1/")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DOQJPsu2CwPk" + }, + "source": [ + "## Util function to load images from a url or locally\n", + "\n", + "This function loads an image and make it ready to be used by the model later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BPnh5uhQvFln" + }, + "outputs": [], + "source": [ + "_UINT8_MAX_F = float(np.iinfo(np.uint8).max)\n", + "\n", + "def load_image(img_url: str):\n", + " \"\"\"Returns an image with shape [height, width, num_channels], with pixels in [0..1] range, and type np.float32.\"\"\"\n", + "\n", + " if (img_url.startswith(\"https\")):\n", + " user_agent = {'User-agent': 'Colab Sample (https://tensorflow.org)'}\n", + " response = requests.get(img_url, headers=user_agent)\n", + " image_data = response.content\n", + " else:\n", + " image_data = tf.io.read_file(img_url)\n", + "\n", + " image = tf.io.decode_image(image_data, channels=3)\n", + " image_numpy = tf.cast(image, dtype=tf.float32).numpy()\n", + " return image_numpy / _UINT8_MAX_F\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yjDFns1zp5y6" + }, + "source": [ + "FILM's model input is a dictionary with the keys `time`, `x0`, `x1`:\n", + "\n", + "- `time`: position of the interpolated frame. Midway is `0.5`.\n", + "- `x0`: is the initial frame.\n", + "- `x1`: is the final frame.\n", + "\n", + "Both frames need to be normalized (done in the function `load_image` above) where each pixel is in the range of `[0..1]`.\n", + "\n", + "`time` is a value between `[0..1]` and it says where the generated image should be. 0.5 is midway between the input images.\n", + "\n", + "All three values need to have a batch dimension too." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VEQNQlHGsWSM" + }, + "outputs": [], + "source": [ + "# using images from the FILM repository (https://github.com/google-research/frame-interpolation/)\n", + "\n", + "image_1_url = \"https://github.com/google-research/frame-interpolation/blob/main/photos/one.png?raw=true\"\n", + "image_2_url = \"https://github.com/google-research/frame-interpolation/blob/main/photos/two.png?raw=true\"\n", + "\n", + "time = np.array([0.5], dtype=np.float32)\n", + "\n", + "image1 = load_image(image_1_url)\n", + "image2 = load_image(image_2_url)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "r6_MQE9EuF_K" + }, + "outputs": [], + "source": [ + "input = {\n", + " 'time': np.expand_dims(time, axis=0), # adding the batch dimension to the time\n", + " 'x0': np.expand_dims(image1, axis=0), # adding the batch dimension to the image\n", + " 'x1': np.expand_dims(image2, axis=0) # adding the batch dimension to the image\n", + "}\n", + "mid_frame = model(input)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nZkzYE2bptfD" + }, + "source": [ + "The model outputs a couple of results but what you'll use here is the `image` key, whose value is the interpolated frame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eClVbNFhA5Py" + }, + "outputs": [], + "source": [ + "print(mid_frame.keys())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rE2csH3u8ePe" + }, + "outputs": [], + "source": [ + "frames = [image1, mid_frame['image'][0].numpy(), image2]\n", + "\n", + "media.show_images(frames, titles=['input image one', 'generated image', 'input image two'], height=250)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fS1AT8kn-f_l" + }, + "source": [ + "Let's create a video from the generated frames" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oFc53B3p37SH" + }, + "outputs": [], + "source": [ + "media.show_video(frames, fps=3, title='FILM interpolated video')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x5AOFNkj-lfO" + }, + "source": [ + "## Define a Frame Interpolator Library\n", + "\n", + "As you can see, the transition is not too smooth. \n", + "\n", + "To improve that you'll need many more interpolated frames.\n", + "\n", + "You could just keep running the model many times with intermediary images but there is a better solution.\n", + "\n", + "To generate many interpolated images and have a smoother video you'll create an interpolator library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tsoDv_9geoZn" + }, + "outputs": [], + "source": [ + "\"\"\"A wrapper class for running a frame interpolation based on the FILM model on TFHub\n", + "\n", + "Usage:\n", + " interpolator = Interpolator()\n", + " result_batch = interpolator(image_batch_0, image_batch_1, batch_dt)\n", + " Where image_batch_1 and image_batch_2 are numpy tensors with TF standard\n", + " (B,H,W,C) layout, batch_dt is the sub-frame time in range [0..1], (B,) layout.\n", + "\"\"\"\n", + "\n", + "\n", + "def _pad_to_align(x, align):\n", + " \"\"\"Pads image batch x so width and height divide by align.\n", + "\n", + " Args:\n", + " x: Image batch to align.\n", + " align: Number to align to.\n", + "\n", + " Returns:\n", + " 1) An image padded so width % align == 0 and height % align == 0.\n", + " 2) A bounding box that can be fed readily to tf.image.crop_to_bounding_box\n", + " to undo the padding.\n", + " \"\"\"\n", + " # Input checking.\n", + " assert np.ndim(x) == 4\n", + " assert align > 0, 'align must be a positive number.'\n", + "\n", + " height, width = x.shape[-3:-1]\n", + " height_to_pad = (align - height % align) if height % align != 0 else 0\n", + " width_to_pad = (align - width % align) if width % align != 0 else 0\n", + "\n", + " bbox_to_pad = {\n", + " 'offset_height': height_to_pad // 2,\n", + " 'offset_width': width_to_pad // 2,\n", + " 'target_height': height + height_to_pad,\n", + " 'target_width': width + width_to_pad\n", + " }\n", + " padded_x = tf.image.pad_to_bounding_box(x, **bbox_to_pad)\n", + " bbox_to_crop = {\n", + " 'offset_height': height_to_pad // 2,\n", + " 'offset_width': width_to_pad // 2,\n", + " 'target_height': height,\n", + " 'target_width': width\n", + " }\n", + " return padded_x, bbox_to_crop\n", + "\n", + "\n", + "class Interpolator:\n", + " \"\"\"A class for generating interpolated frames between two input frames.\n", + "\n", + " Uses the Film model from TFHub\n", + " \"\"\"\n", + "\n", + " def __init__(self, align: int = 64) -> None:\n", + " \"\"\"Loads a saved model.\n", + "\n", + " Args:\n", + " align: 'If >1, pad the input size so it divides with this before\n", + " inference.'\n", + " \"\"\"\n", + " self._model = hub.load(\"https://tfhub.dev/google/film/1/")\n", + " self._align = align\n", + "\n", + " def __call__(self, x0: np.ndarray, x1: np.ndarray,\n", + " dt: np.ndarray) -> np.ndarray:\n", + " \"\"\"Generates an interpolated frame between given two batches of frames.\n", + "\n", + " All inputs should be np.float32 datatype.\n", + "\n", + " Args:\n", + " x0: First image batch. Dimensions: (batch_size, height, width, channels)\n", + " x1: Second image batch. Dimensions: (batch_size, height, width, channels)\n", + " dt: Sub-frame time. Range [0,1]. Dimensions: (batch_size,)\n", + "\n", + " Returns:\n", + " The result with dimensions (batch_size, height, width, channels).\n", + " \"\"\"\n", + " if self._align is not None:\n", + " x0, bbox_to_crop = _pad_to_align(x0, self._align)\n", + " x1, _ = _pad_to_align(x1, self._align)\n", + "\n", + " inputs = {'x0': x0, 'x1': x1, 'time': dt[..., np.newaxis]}\n", + " result = self._model(inputs, training=False)\n", + " image = result['image']\n", + "\n", + " if self._align is not None:\n", + " image = tf.image.crop_to_bounding_box(image, **bbox_to_crop)\n", + " return image.numpy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZeGYaNBd_7a5" + }, + "source": [ + "## Frame and Video Generation Utility Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gOJxup6s_1DP" + }, + "outputs": [], + "source": [ + "def _recursive_generator(\n", + " frame1: np.ndarray, frame2: np.ndarray, num_recursions: int,\n", + " interpolator: Interpolator) -> Generator[np.ndarray, None, None]:\n", + " \"\"\"Splits halfway to repeatedly generate more frames.\n", + "\n", + " Args:\n", + " frame1: Input image 1.\n", + " frame2: Input image 2.\n", + " num_recursions: How many times to interpolate the consecutive image pairs.\n", + " interpolator: The frame interpolator instance.\n", + "\n", + " Yields:\n", + " The interpolated frames, including the first frame (frame1), but excluding\n", + " the final frame2.\n", + " \"\"\"\n", + " if num_recursions == 0:\n", + " yield frame1\n", + " else:\n", + " # Adds the batch dimension to all inputs before calling the interpolator,\n", + " # and remove it afterwards.\n", + " time = np.full(shape=(1,), fill_value=0.5, dtype=np.float32)\n", + " mid_frame = interpolator(\n", + " np.expand_dims(frame1, axis=0), np.expand_dims(frame2, axis=0), time)[0]\n", + " yield from _recursive_generator(frame1, mid_frame, num_recursions - 1,\n", + " interpolator)\n", + " yield from _recursive_generator(mid_frame, frame2, num_recursions - 1,\n", + " interpolator)\n", + "\n", + "\n", + "def interpolate_recursively(\n", + " frames: List[np.ndarray], num_recursions: int,\n", + " interpolator: Interpolator) -> Iterable[np.ndarray]:\n", + " \"\"\"Generates interpolated frames by repeatedly interpolating the midpoint.\n", + "\n", + " Args:\n", + " frames: List of input frames. Expected shape (H, W, 3). The colors should be\n", + " in the range[0, 1] and in gamma space.\n", + " num_recursions: Number of times to do recursive midpoint\n", + " interpolation.\n", + " interpolator: The frame interpolation model to use.\n", + "\n", + " Yields:\n", + " The interpolated frames (including the inputs).\n", + " \"\"\"\n", + " n = len(frames)\n", + " for i in range(1, n):\n", + " yield from _recursive_generator(frames[i - 1], frames[i],\n", + " times_to_interpolate, interpolator)\n", + " # Separately yield the final frame.\n", + " yield frames[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "X1R2KjhEAHu0" + }, + "outputs": [], + "source": [ + "times_to_interpolate = 6\n", + "interpolator = Interpolator()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AZUo8tg1AYvZ" + }, + "source": [ + "## Running the Interpolator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QMMNjs7sAWTG" + }, + "outputs": [], + "source": [ + "input_frames = [image1, image2]\n", + "frames = list(\n", + " interpolate_recursively(input_frames, times_to_interpolate,\n", + " interpolator))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "s9mHHyCAAhrM" + }, + "outputs": [], + "source": [ + "print(f'video with {len(frames)} frames')\n", + "media.show_video(frames, fps=30, title='FILM interpolated video')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_0AZKeMVFwAc" + }, + "source": [ + "For more information, you can visit [FILM's model repository](https://github.com/google-research/frame-interpolation).\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8764ry3SGDks" + }, + "source": [ + "## Citation\n", + "\n", + "If you find this model and code useful in your works, please acknowledge it appropriately by citing:\n", + "\n", + "```\n", + "@inproceedings{reda2022film,\n", + " title = {FILM: Frame Interpolation for Large Motion},\n", + " author = {Fitsum Reda and Janne Kontkanen and Eric Tabellion and Deqing Sun and Caroline Pantofaru and Brian Curless},\n", + " booktitle = {The European Conference on Computer Vision (ECCV)},\n", + " year = {2022}\n", + "}\n", + "```\n", + "\n", + "```\n", + "@misc{film-tf,\n", + " title = {Tensorflow 2 Implementation of \"FILM: Frame Interpolation for Large Motion\"},\n", + " author = {Fitsum Reda and Janne Kontkanen and Eric Tabellion and Deqing Sun and Caroline Pantofaru and Brian Curless},\n", + " year = {2022},\n", + " publisher = {GitHub},\n", + " journal = {GitHub repository},\n", + " howpublished = {\\url{https://github.com/google-research/frame-interpolation}}\n", + "}\n", + "```" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "name": "tf_hub_film_example.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb b/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb new file mode 100644 index 00000000000..4937bc2eb22 --- /dev/null +++ b/site/en/hub/tutorials/tf_hub_generative_image_module.ipynb @@ -0,0 +1,447 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "N6ZDpd9XzFeN" + }, + "source": [ + "##### Copyright 2018 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "KUu4vOt5zI9d" + }, + "outputs": [], + "source": [ + "# Copyright 2018 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CxmDMK4yupqg" + }, + "source": [ + "# Generate Artificial Faces with CelebA Progressive GAN Model\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Sy553YSVmYiK" + }, + "source": [ + "This Colab demonstrates use of a TF Hub module based on a generative adversarial network (GAN). The module maps from N-dimensional vectors, called latent space, to RGB images.\n", + "\n", + "Two examples are provided:\n", + "* **Mapping** from latent space to images, and\n", + "* Given a target image, **using gradient descent to find** a latent vector that generates an image similar to the target image." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v4XGxDrCkeip" + }, + "source": [ + "## Optional prerequisites\n", + "\n", + "* Familiarity with [low level Tensorflow concepts](https://www.tensorflow.org/guide/eager).\n", + "* [Generative Adversarial Network](https://en.wikipedia.org/wiki/Generative_adversarial_network) on Wikipedia.\n", + "* Paper on Progressive GANs: [Progressive Growing of GANs for Improved Quality, Stability, and Variation](https://arxiv.org/abs/1710.10196)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HK3Q2vIaVw56" + }, + "source": [ + "### More models\n", + "[Here](https://tfhub.dev/s?module-type=image-generator) you can find all models currently hosted on [tfhub.dev](https://tfhub.dev/) that can generate images." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KNM3kA0arrUu" + }, + "outputs": [], + "source": [ + "# Install imageio for creating animations. \n", + "!pip -q install imageio\n", + "!pip -q install scikit-image\n", + "!pip install git+https://github.com/tensorflow/docs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "6cPY9Ou4sWs_" + }, + "outputs": [], + "source": [ + "#@title Imports and function definitions\n", + "from absl import logging\n", + "\n", + "import imageio\n", + "import PIL.Image\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "import tensorflow as tf\n", + "tf.random.set_seed(0)\n", + "\n", + "import tensorflow_hub as hub\n", + "from tensorflow_docs.vis import embed\n", + "import time\n", + "\n", + "try:\n", + " from google.colab import files\n", + "except ImportError:\n", + " pass\n", + "\n", + "from IPython import display\n", + "from skimage import transform\n", + "\n", + "# We could retrieve this value from module.get_input_shapes() if we didn't know\n", + "# beforehand which module we will be using.\n", + "latent_dim = 512\n", + "\n", + "\n", + "# Interpolates between two vectors that are non-zero and don't both lie on a\n", + "# line going through origin. First normalizes v2 to have the same norm as v1. \n", + "# Then interpolates between the two vectors on the hypersphere.\n", + "def interpolate_hypersphere(v1, v2, num_steps):\n", + " v1_norm = tf.norm(v1)\n", + " v2_norm = tf.norm(v2)\n", + " v2_normalized = v2 * (v1_norm / v2_norm)\n", + "\n", + " vectors = []\n", + " for step in range(num_steps):\n", + " interpolated = v1 + (v2_normalized - v1) * step / (num_steps - 1)\n", + " interpolated_norm = tf.norm(interpolated)\n", + " interpolated_normalized = interpolated * (v1_norm / interpolated_norm)\n", + " vectors.append(interpolated_normalized)\n", + " return tf.stack(vectors)\n", + "\n", + "# Simple way to display an image.\n", + "def display_image(image):\n", + " image = tf.constant(image)\n", + " image = tf.image.convert_image_dtype(image, tf.uint8)\n", + " return PIL.Image.fromarray(image.numpy())\n", + "\n", + "# Given a set of images, show an animation.\n", + "def animate(images):\n", + " images = np.array(images)\n", + " converted_images = np.clip(images * 255, 0, 255).astype(np.uint8)\n", + " imageio.mimsave('./animation.gif', converted_images)\n", + " return embed.embed_file('./animation.gif')\n", + "\n", + "logging.set_verbosity(logging.ERROR)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "f5EESfBvukYI" + }, + "source": [ + "## Latent space interpolation" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nJb9gFmRvynZ" + }, + "source": [ + "### Random vectors\n", + "\n", + "Latent space interpolation between two randomly initialized vectors. We will use a TF Hub module [progan-128](https://tfhub.dev/google/progan-128/1) that contains a pre-trained Progressive GAN." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8StEe9x9wGma" + }, + "outputs": [], + "source": [ + "progan = hub.load(\"https://tfhub.dev/google/progan-128/1/").signatures['default']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fZ0O5_5Jhwio" + }, + "outputs": [], + "source": [ + "def interpolate_between_vectors():\n", + " v1 = tf.random.normal([latent_dim])\n", + " v2 = tf.random.normal([latent_dim])\n", + " \n", + " # Creates a tensor with 25 steps of interpolation between v1 and v2.\n", + " vectors = interpolate_hypersphere(v1, v2, 50)\n", + "\n", + " # Uses module to generate images from the latent space.\n", + " interpolated_images = progan(vectors)['default']\n", + "\n", + " return interpolated_images\n", + "\n", + "interpolated_images = interpolate_between_vectors()\n", + "animate(interpolated_images)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "L9-uXoTHuXQC" + }, + "source": [ + "## Finding closest vector in latent space\n", + "Fix a target image. As an example use an image generated from the module or upload your own." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "both", + "id": "phT4W66pMmko" + }, + "outputs": [], + "source": [ + "image_from_module_space = True # @param { isTemplate:true, type:\"boolean\" }\n", + "\n", + "def get_module_space_image():\n", + " vector = tf.random.normal([1, latent_dim])\n", + " images = progan(vector)['default'][0]\n", + " return images\n", + "\n", + "def upload_image():\n", + " uploaded = files.upload()\n", + " image = imageio.imread(uploaded[list(uploaded.keys())[0]])\n", + " return transform.resize(image, [128, 128])\n", + "\n", + "if image_from_module_space:\n", + " target_image = get_module_space_image()\n", + "else:\n", + " target_image = upload_image()\n", + "\n", + "display_image(target_image)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rBIt3Q4qvhuq" + }, + "source": [ + "After defining a loss function between the target image and the image generated by a latent space variable, we can use gradient descent to find variable values that minimize the loss." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cUGakLdbML2Q" + }, + "outputs": [], + "source": [ + "tf.random.set_seed(42)\n", + "initial_vector = tf.random.normal([1, latent_dim])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "u7MGzDE5MU20" + }, + "outputs": [], + "source": [ + "display_image(progan(initial_vector)['default'][0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q_4Z7tnyg-ZY" + }, + "outputs": [], + "source": [ + "def find_closest_latent_vector(initial_vector, num_optimization_steps,\n", + " steps_per_image):\n", + " images = []\n", + " losses = []\n", + "\n", + " vector = tf.Variable(initial_vector) \n", + " optimizer = tf.optimizers.Adam(learning_rate=0.01)\n", + " loss_fn = tf.losses.MeanAbsoluteError(reduction=\"sum\")\n", + "\n", + " for step in range(num_optimization_steps):\n", + " if (step % 100)==0:\n", + " print()\n", + " print('.', end='')\n", + " with tf.GradientTape() as tape:\n", + " image = progan(vector.read_value())['default'][0]\n", + " if (step % steps_per_image) == 0:\n", + " images.append(image.numpy())\n", + " target_image_difference = loss_fn(image, target_image[:,:,:3])\n", + " # The latent vectors were sampled from a normal distribution. We can get\n", + " # more realistic images if we regularize the length of the latent vector to \n", + " # the average length of vector from this distribution.\n", + " regularizer = tf.abs(tf.norm(vector) - np.sqrt(latent_dim))\n", + " \n", + " loss = target_image_difference + regularizer\n", + " losses.append(loss.numpy())\n", + " grads = tape.gradient(loss, [vector])\n", + " optimizer.apply_gradients(zip(grads, [vector]))\n", + " \n", + " return images, losses\n", + "\n", + "\n", + "num_optimization_steps=200\n", + "steps_per_image=5\n", + "images, loss = find_closest_latent_vector(initial_vector, num_optimization_steps, steps_per_image)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pRbeF2oSAcOB" + }, + "outputs": [], + "source": [ + "plt.plot(loss)\n", + "plt.ylim([0,max(plt.ylim())])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "KnZkDy2FEsTt" + }, + "outputs": [], + "source": [ + "animate(np.stack(images))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GGKfuCdfPQKH" + }, + "source": [ + "Compare the result to the target:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "TK1P5z3bNuIl" + }, + "outputs": [], + "source": [ + "display_image(np.concatenate([images[-1], target_image], axis=1))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tDt15dLsJwMy" + }, + "source": [ + "### Playing with the above example\n", + "If image is from the module space, the descent is quick and converges to a reasonable sample. Try out descending to an image that is **not from the module space**. The descent will only converge if the image is reasonably close to the space of training images.\n", + "\n", + "How to make it descend faster and to a more realistic image? One can try:\n", + "* using different loss on the image difference, e.g., quadratic,\n", + "* using different regularizer on the latent vector,\n", + "* initializing from a random vector in multiple runs,\n", + "* etc.\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "N6ZDpd9XzFeN" + ], + "name": "tf_hub_generative_image_module.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/tweening_conv3d.ipynb b/site/en/hub/tutorials/tweening_conv3d.ipynb new file mode 100644 index 00000000000..8c53929021f --- /dev/null +++ b/site/en/hub/tutorials/tweening_conv3d.ipynb @@ -0,0 +1,297 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "wC0PtNm3Sa_T" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hgOqPjRKSa-7" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oKAkxAYuONU6" + }, + "source": [ + "# Video Inbetweening using 3D Convolutions\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cvMgkVIBpT-Y" + }, + "source": [ + "Yunpeng Li, Dominik Roblek, and Marco Tagliasacchi. From Here to There: Video Inbetweening Using Direct 3D Convolutions, 2019.\n", + "\n", + "https://arxiv.org/abs/1905.10240/n", + "\n", + "\n", + "Current Hub characteristics:\n", + "- has models for BAIR Robot pushing videos and KTH action video dataset (though this colab uses only BAIR)\n", + "- BAIR dataset already available in Hub. However, KTH videos need to be supplied by the users themselves.\n", + "- only evaluation (video generation) for now\n", + "- batch size and frame size are hard-coded\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Q4DN769E2O_R" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EsQFWvxrYrHg" + }, + "source": [ + "Since `tfds.load('bair_robot_pushing_small', split='test')` would download a 30GB archive that also contains the training data, we download a separated archive that only contains the 190MB test data. The used dataset has been published by [this paper](https://arxiv.org/abs/1710.05268) and is licensed as Creative Commons BY 4.0." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GhIKakhc7JYL" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "import tensorflow_hub as hub\n", + "import tensorflow_datasets as tfds\n", + "\n", + "from tensorflow_datasets.core import SplitGenerator\n", + "from tensorflow_datasets.video.bair_robot_pushing import BairRobotPushingSmall\n", + "\n", + "import tempfile\n", + "import pathlib\n", + "\n", + "TEST_DIR = pathlib.Path(tempfile.mkdtemp()) / \"bair_robot_pushing_small/softmotion30_44k/test/\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zBMz14GmYkwz" + }, + "outputs": [], + "source": [ + "# Download the test split to $TEST_DIR\n", + "!mkdir -p $TEST_DIR\n", + "!wget -nv https://storage.googleapis.com/download.tensorflow.org/data/bair_test_traj_0_to_255.tfrecords -O $TEST_DIR/traj_0_to_255.tfrecords" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "irRJ2Q0iYoW0" + }, + "outputs": [], + "source": [ + "# Since the dataset builder expects the train and test split to be downloaded,\n", + "# patch it so it only expects the test data to be available\n", + "builder = BairRobotPushingSmall()\n", + "test_generator = SplitGenerator(name='test', gen_kwargs={\"filedir\": str(TEST_DIR)})\n", + "builder._split_generators = lambda _: [test_generator]\n", + "builder.download_and_prepare()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "iaGU8hhBPi_6" + }, + "source": [ + "## BAIR: Demo based on numpy array inputs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "IgWmW8YzEiDo" + }, + "outputs": [], + "source": [ + "# @title Load some example data (BAIR).\n", + "batch_size = 16\n", + "\n", + "# If unable to download the dataset automatically due to \"not enough disk space\", please download manually to Google Drive and\n", + "# load using tf.data.TFRecordDataset.\n", + "ds = builder.as_dataset(split=\"test\")\n", + "test_videos = ds.batch(batch_size)\n", + "first_batch = next(iter(test_videos))\n", + "input_frames = first_batch['image_aux1'][:, ::15]\n", + "input_frames = tf.cast(input_frames, tf.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "96Jd5XefGHRr" + }, + "outputs": [], + "source": [ + "# @title Visualize loaded videos start and end frames.\n", + "\n", + "print('Test videos shape [batch_size, start/end frame, height, width, num_channels]: ', input_frames.shape)\n", + "sns.set_style('white')\n", + "plt.figure(figsize=(4, 2*batch_size))\n", + "\n", + "for i in range(batch_size)[:4]:\n", + " plt.subplot(batch_size, 2, 1 + 2*i)\n", + " plt.imshow(input_frames[i, 0] / 255.0)\n", + " plt.title('Video {}: First frame'.format(i))\n", + " plt.axis('off')\n", + " plt.subplot(batch_size, 2, 2 + 2*i)\n", + " plt.imshow(input_frames[i, 1] / 255.0)\n", + " plt.title('Video {}: Last frame'.format(i))\n", + " plt.axis('off')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "w0FFhkikQABy" + }, + "source": [ + "### Load Hub Module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cLAUiWfEQAB5" + }, + "outputs": [], + "source": [ + "hub_handle = 'https://tfhub.dev/google/tweening_conv3d_bair/1'\n", + "module = hub.load(hub_handle).signatures['default']" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PVHTdXnhbGsK" + }, + "source": [ + "### Generate and show the videos" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FHAwBW-zyegP" + }, + "outputs": [], + "source": [ + "filled_frames = module(input_frames)['default'] / 255.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "tVesWHTnSW1Z" + }, + "outputs": [], + "source": [ + "# Show sequences of generated video frames.\n", + "\n", + "# Concatenate start/end frames and the generated filled frames for the new videos.\n", + "generated_videos = np.concatenate([input_frames[:, :1] / 255.0, filled_frames, input_frames[:, 1:] / 255.0], axis=1)\n", + "\n", + "for video_id in range(4):\n", + " fig = plt.figure(figsize=(10 * 2, 2))\n", + " for frame_id in range(1, 16):\n", + " ax = fig.add_axes([frame_id * 1 / 16., 0, (frame_id + 1) * 1 / 16., 1],\n", + " xmargin=0, ymargin=0)\n", + " ax.imshow(generated_videos[video_id, frame_id])\n", + " ax.axis('off')" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [ + "Q4DN769E2O_R" + ], + "name": "tweening_conv3d.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb b/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb new file mode 100644 index 00000000000..879bdbd0edb --- /dev/null +++ b/site/en/hub/tutorials/wav2vec2_saved_model_finetuning.ipynb @@ -0,0 +1,984 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "yCs7P9JTMlzV" + }, + "source": [ + "##### Copyright 2021 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Jqn-HYw-Mkea" + }, + "outputs": [], + "source": [ + "#@title Copyright 2021 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "stRetE8gMlmZ" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ndG8MjmJeicp" + }, + "source": [ + "# Fine-tuning Wav2Vec2 with an LM head\n", + "\n", + "In this notebook, we will load the pre-trained wav2vec2 model from [TFHub](https://tfhub.dev) and will fine-tune it on [LibriSpeech dataset](https://huggingface.co/datasets/librispeech_asr) by appending Language Modeling head (LM) over the top of our pre-trained model. The underlying task is to build a model for **Automatic Speech Recognition** i.e. given some speech, the model should be able to transcribe it into text." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rWk8nL6Ui-_0" + }, + "source": [ + "## Setting Up\n", + "\n", + "Before running this notebook, please ensure that you are on GPU runtime (`Runtime` > `Change runtime type` > `GPU`). The following cell will install [`gsoc-wav2vec2`](https://github.com/vasudevgupta7/gsoc-wav2vec2) package & its dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "seqTlMyeZvM4" + }, + "outputs": [], + "source": [ + "!pip3 install -q git+https://github.com/vasudevgupta7/gsoc-wav2vec2@main\n", + "!sudo apt-get install -y libsndfile1-dev\n", + "!pip3 install -q SoundFile" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wvuJL8-f0zn5" + }, + "source": [ + "## Model setup using `TFHub`\n", + "\n", + "We will start by importing some libraries/modules." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "M3_fgx4eZvM7" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "from wav2vec2 import Wav2Vec2Config\n", + "\n", + "config = Wav2Vec2Config()\n", + "\n", + "print(\"TF version:\", tf.__version__)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y0rVUxyWsS5f" + }, + "source": [ + "First, we will download our model from TFHub & will wrap our model signature with [`hub.KerasLayer`](https://www.tensorflow.org/hub/api_docs/python/hub/KerasLayer) to be able to use this model like any other Keras layer. Fortunately, `hub.KerasLayer` can do both in just 1 line.\n", + "\n", + "**Note:** When loading model with `hub.KerasLayer`, model becomes a bit opaque but sometimes we need finer controls over the model, then we can load the model with `tf.keras.models.load_model(...)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NO6QRC7KZvM9" + }, + "outputs": [], + "source": [ + "pretrained_layer = hub.KerasLayer(\"https://tfhub.dev/vasudevgupta7/wav2vec2/1/", trainable=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "pCputyVBv2e9" + }, + "source": [ + "You can refer to this [script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/export2hub.py) in case you are interested in the model exporting script. Object `pretrained_layer` is the freezed version of [`Wav2Vec2Model`](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/wav2vec2/modeling.py). These pre-trained weights were converted from HuggingFace PyTorch [pre-trained weights](https://huggingface.co/facebook/wav2vec2-base) using [this script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/convert_torch_to_tf.py).\n", + "\n", + "Originally, wav2vec2 was pre-trained with a masked language modelling approach with the objective to identify the true quantized latent speech representation for a masked time step. You can read more about the training objective in the paper- [wav2vec 2.0: A Framework for Self-Supervised Learning of Speech Representations](https://arxiv.org/abs/2006.11477)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SseDnCr7hyhC" + }, + "source": [ + "Now, we will be defining a few constants and hyper-parameters which will be useful in the next few cells. `AUDIO_MAXLEN` is intentionally set to `246000` as the model signature only accepts static sequence length of `246000`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "eiILuMBERxlO" + }, + "outputs": [], + "source": [ + "AUDIO_MAXLEN = 246000\n", + "LABEL_MAXLEN = 256\n", + "BATCH_SIZE = 2" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1V4gTgGLgXvO" + }, + "source": [ + "In the following cell, we will wrap `pretrained_layer` & a dense layer (LM head) with the [Keras's Functional API](https://www.tensorflow.org/guide/keras/functional)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a3CUN1KEB10Q" + }, + "outputs": [], + "source": [ + "inputs = tf.keras.Input(shape=(AUDIO_MAXLEN,))\n", + "hidden_states = pretrained_layer(inputs)\n", + "outputs = tf.keras.layers.Dense(config.vocab_size)(hidden_states)\n", + "\n", + "model = tf.keras.Model(inputs=inputs, outputs=outputs)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5zDXuoMXhDMo" + }, + "source": [ + "The dense layer (defined above) is having an output dimension of `vocab_size` as we want to predict probabilities of each token in the vocabulary at each time step." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oPp18ZHRtnq-" + }, + "source": [ + "## Setting up training state" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ATQy1ZK3vFr7" + }, + "source": [ + "In TensorFlow, model weights are built only when `model.call` or `model.build` is called for the first time, so the following cell will build the model weights for us. Further, we will be running `model.summary()` to check the total number of trainable parameters." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ZgL5wyaXZvM-" + }, + "outputs": [], + "source": [ + "model(tf.random.uniform(shape=(BATCH_SIZE, AUDIO_MAXLEN)))\n", + "model.summary()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EQxxA4Fevp7m" + }, + "source": [ + "Now, we need to define the `loss_fn` and optimizer to be able to train the model. The following cell will do that for us. We will be using the `Adam` optimizer for simplicity. `CTCLoss` is a common loss type that is used for tasks (like `ASR`) where input sub-parts can't be easily aligned with output sub-parts. You can read more about CTC-loss from this amazing [blog post](https://distill.pub/2017/ctc/).\n", + "\n", + "\n", + "`CTCLoss` (from [`gsoc-wav2vec2`](https://github.com/vasudevgupta7/gsoc-wav2vec2) package) accepts 3 arguments: `config`, `model_input_shape` & `division_factor`. If `division_factor=1`, then loss will simply get summed, so pass `division_factor` accordingly to get mean over batch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "glDepVEHZvM_" + }, + "outputs": [], + "source": [ + "from wav2vec2 import CTCLoss\n", + "\n", + "LEARNING_RATE = 5e-5\n", + "\n", + "loss_fn = CTCLoss(config, (BATCH_SIZE, AUDIO_MAXLEN), division_factor=BATCH_SIZE)\n", + "optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1mvTuOXpwsQe" + }, + "source": [ + "## Loading & Pre-processing data\n", + "\n", + "Let's now download the LibriSpeech dataset from the [official website](http://www.openslr.org/12) and set it up." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "I4kIEC77cBCM" + }, + "outputs": [], + "source": [ + "!wget https://www.openslr.org/resources/12/dev-clean.tar.gz -P ./data/train/\n", + "!tar -xf ./data/train/dev-clean.tar.gz -C ./data/train/" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "LsQpmpn6jrMI" + }, + "source": [ + "**Note:** We are using `dev-clean` configuration as this notebook is just for demonstration purposes, so we need a small amount of data. Complete training data can be easily downloaded from [LibriSpeech website](http://www.openslr.org/12)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ynxAjtGHGFpM" + }, + "outputs": [], + "source": [ + "ls ./data/train/" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "yBMiORo0xJD0" + }, + "source": [ + "Our dataset lies in the LibriSpeech directory. Let's explore these files." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jkIu_Wt4ZvNA" + }, + "outputs": [], + "source": [ + "data_dir = \"./data/train/LibriSpeech/dev-clean/2428/83705/\"\n", + "all_files = os.listdir(data_dir)\n", + "\n", + "flac_files = [f for f in all_files if f.endswith(\".flac\")]\n", + "txt_files = [f for f in all_files if f.endswith(\".txt\")]\n", + "\n", + "print(\"Transcription files:\", txt_files, \"\\nSound files:\", flac_files)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XEObi_Apk3ZD" + }, + "source": [ + "Alright, so each sub-directory has many `.flac` files and a `.txt` file. The `.txt` file contains text transcriptions for all the speech samples (i.e. `.flac` files) present in that sub-directory." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WYW6WKJflO2e" + }, + "source": [ + "We can load this text data as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cEBKxQblHPwq" + }, + "outputs": [], + "source": [ + "def read_txt_file(f):\n", + " with open(f, \"r\") as f:\n", + " samples = f.read().split(\"\\n\")\n", + " samples = {s.split()[0]: \" \".join(s.split()[1:]) for s in samples if len(s.split()) > 2}\n", + " return samples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ldkf_ceb0_YW" + }, + "source": [ + "Similarly, we will define a function for loading a speech sample from a `.flac` file.\n", + "\n", + "`REQUIRED_SAMPLE_RATE` is set to `16000` as wav2vec2 was pre-trained with `16K` frequency and it's recommended to fine-tune it without any major change in data distribution due to frequency." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YOJ3OzPsTyXv" + }, + "outputs": [], + "source": [ + "import soundfile as sf\n", + "\n", + "REQUIRED_SAMPLE_RATE = 16000\n", + "\n", + "def read_flac_file(file_path):\n", + " with open(file_path, \"rb\") as f:\n", + " audio, sample_rate = sf.read(f)\n", + " if sample_rate != REQUIRED_SAMPLE_RATE:\n", + " raise ValueError(\n", + " f\"sample rate (={sample_rate}) of your files must be {REQUIRED_SAMPLE_RATE}\"\n", + " )\n", + " file_id = os.path.split(file_path)[-1][:-len(\".flac\")]\n", + " return {file_id: audio}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2sxDN8P4nWkW" + }, + "source": [ + "Now, we will pick some random samples & will try to visualize them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HI5J-2Dfm_wT" + }, + "outputs": [], + "source": [ + "from IPython.display import Audio\n", + "import random\n", + "\n", + "file_id = random.choice([f[:-len(\".flac\")] for f in flac_files])\n", + "flac_file_path, txt_file_path = os.path.join(data_dir, f\"{file_id}.flac\"), os.path.join(data_dir, \"2428-83705.trans.txt\")\n", + "\n", + "print(\"Text Transcription:\", read_txt_file(txt_file_path)[file_id], \"\\nAudio:\")\n", + "Audio(filename=flac_file_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M8jJ7Ed81p_A" + }, + "source": [ + "Now, we will combine all the speech & text samples and will define the function (in next cell) for that purpose." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "MI-5YCzaTsei" + }, + "outputs": [], + "source": [ + "def fetch_sound_text_mapping(data_dir):\n", + " all_files = os.listdir(data_dir)\n", + "\n", + " flac_files = [os.path.join(data_dir, f) for f in all_files if f.endswith(\".flac\")]\n", + " txt_files = [os.path.join(data_dir, f) for f in all_files if f.endswith(\".txt\")]\n", + "\n", + " txt_samples = {}\n", + " for f in txt_files:\n", + " txt_samples.update(read_txt_file(f))\n", + "\n", + " speech_samples = {}\n", + " for f in flac_files:\n", + " speech_samples.update(read_flac_file(f))\n", + "\n", + " assert len(txt_samples) == len(speech_samples)\n", + "\n", + " samples = [(speech_samples[file_id], txt_samples[file_id]) for file_id in speech_samples.keys() if len(speech_samples[file_id]) < AUDIO_MAXLEN]\n", + " return samples" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mx95Lxvu0nT4" + }, + "source": [ + "It's time to have a look at a few samples ..." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_Ls7X_jqIz4R" + }, + "outputs": [], + "source": [ + "samples = fetch_sound_text_mapping(data_dir)\n", + "samples[:5]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "TUjhSWfsnlCL" + }, + "source": [ + "Note: We are loading this data into memory as we working with a small amount of dataset in this notebook. But for training on the complete dataset (~300 GBs), you will have to load data lazily. You can refer to [this script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/data_utils.py) to know more on that." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xg8Zia1kzw0J" + }, + "source": [ + "Let's pre-process the data now !!!\n", + "\n", + "We will first define the tokenizer & processor using `gsoc-wav2vec2` package. Then, we will do very simple pre-processing. `processor` will normalize raw speech w.r.to frames axis and `tokenizer` will convert our model outputs into the string (using the defined vocabulary) & will take care of the removal of special tokens (depending on your tokenizer configuration)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gaat_hMLNVHF" + }, + "outputs": [], + "source": [ + "from wav2vec2 import Wav2Vec2Processor\n", + "tokenizer = Wav2Vec2Processor(is_tokenizer=True)\n", + "processor = Wav2Vec2Processor(is_tokenizer=False)\n", + "\n", + "def preprocess_text(text):\n", + " label = tokenizer(text)\n", + " return tf.constant(label, dtype=tf.int32)\n", + "\n", + "def preprocess_speech(audio):\n", + " audio = tf.constant(audio, dtype=tf.float32)\n", + " return processor(tf.transpose(audio))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GyKl8QP-zRFC" + }, + "source": [ + "Now, we will define the python generator to call the preprocessing functions we defined in above cells." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PoQrRalwMpQ6" + }, + "outputs": [], + "source": [ + "def inputs_generator():\n", + " for speech, text in samples:\n", + " yield preprocess_speech(speech), preprocess_text(text)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7Vlm3ySFULsG" + }, + "source": [ + "## Setting up `tf.data.Dataset`\n", + "\n", + "Following cell will setup `tf.data.Dataset` object using its `.from_generator(...)` method. We will be using the `generator` object, we defined in the above cell.\n", + "\n", + "**Note:** For distributed training (especially on TPUs), `.from_generator(...)` doesn't work currently and it is recommended to train on data stored in `.tfrecord` format (Note: The TFRecords should ideally be stored inside a GCS Bucket in order for the TPUs to work to the fullest extent).\n", + "\n", + "You can refer to [this script](https://github.com/vasudevgupta7/gsoc-wav2vec2/blob/main/src/make_tfrecords.py) for more details on how to convert LibriSpeech data into tfrecords." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LbQ_dMwGO62h" + }, + "outputs": [], + "source": [ + "output_signature = (\n", + " tf.TensorSpec(shape=(None), dtype=tf.float32),\n", + " tf.TensorSpec(shape=(None), dtype=tf.int32),\n", + ")\n", + "\n", + "dataset = tf.data.Dataset.from_generator(inputs_generator, output_signature=output_signature)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HXBbNsRyPyw3" + }, + "outputs": [], + "source": [ + "BUFFER_SIZE = len(flac_files)\n", + "SEED = 42\n", + "\n", + "dataset = dataset.shuffle(BUFFER_SIZE, seed=SEED)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9DAUmns3pXfr" + }, + "source": [ + "We will pass the dataset into multiple batches, so let's prepare batches in the following cell. Now, all the sequences in a batch should be padded to a constant length. We will use the`.padded_batch(...)` method for that purpose." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Okhko1IWRida" + }, + "outputs": [], + "source": [ + "dataset = dataset.padded_batch(BATCH_SIZE, padded_shapes=(AUDIO_MAXLEN, LABEL_MAXLEN), padding_values=(0.0, 0))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A45CjQG5qSbV" + }, + "source": [ + "Accelerators (like GPUs/TPUs) are very fast and often data-loading (& pre-processing) becomes the bottleneck during training as the data-loading part happens on CPUs. This can increase the training time significantly especially when there is a lot of online pre-processing involved or data is streamed online from GCS buckets. To handle those issues, `tf.data.Dataset` offers the `.prefetch(...)` method. This method helps in preparing the next few batches in parallel (on CPUs) while the model is making predictions (on GPUs/TPUs) on the current batch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "f-bKu2YjRior" + }, + "outputs": [], + "source": [ + "dataset = dataset.prefetch(tf.data.AUTOTUNE)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Lqk2cs6LxVIh" + }, + "source": [ + "Since this notebook is made for demonstration purposes, we will be taking first `num_train_batches` and will perform training over only that. You are encouraged to train on the whole dataset though. Similarly, we will evaluate only `num_val_batches`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "z6GO5oYUxXtz" + }, + "outputs": [], + "source": [ + "num_train_batches = 10\n", + "num_val_batches = 4\n", + "\n", + "train_dataset = dataset.take(num_train_batches)\n", + "val_dataset = dataset.skip(num_train_batches).take(num_val_batches)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CzAOI78tky08" + }, + "source": [ + "## Model training\n", + "\n", + "For training our model, we will be directly calling `.fit(...)` method after compiling our model with `.compile(...)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vuBY2sZElgwg" + }, + "outputs": [], + "source": [ + "model.compile(optimizer, loss=loss_fn)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qswxafSl0HjO" + }, + "source": [ + "The above cell will set up our training state. Now we can initiate training with the `.fit(...)` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vtuSfnj1l-I_" + }, + "outputs": [], + "source": [ + "history = model.fit(train_dataset, validation_data=val_dataset, epochs=3)\n", + "history.history" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ySvp8r2E1q_V" + }, + "source": [ + "Let's save our model with `.save(...)` method to be able to perform inference later. You can also export this SavedModel to TFHub by following [TFHub documentation](https://www.tensorflow.org/hub/publish)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "C0KEYcwydwjF" + }, + "outputs": [], + "source": [ + "save_dir = \"finetuned-wav2vec2\"\n", + "model.save(save_dir, include_optimizer=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MkOpp9rZ211t" + }, + "source": [ + "Note: We are setting `include_optimizer=False` as we want to use this model for inference only." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SJfPlTgezD0i" + }, + "source": [ + "## Evaluation\n", + "\n", + "Now we will be computing Word Error Rate over the validation dataset\n", + "\n", + "**Word error rate** (WER) is a common metric for measuring the performance of an automatic speech recognition system. The WER is derived from the Levenshtein distance, working at the word level. Word error rate can then be computed as: WER = (S + D + I) / N = (S + D + I) / (S + D + C) where S is the number of substitutions, D is the number of deletions, I is the number of insertions, C is the number of correct words, N is the number of words in the reference (N=S+D+C). This value indicates the percentage of words that were incorrectly predicted. \n", + "\n", + "You can refer to [this paper](https://www.isca-speech.org/archive_v0/interspeech_2004/i04_2765.html) to learn more about WER." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Io_91Y7-r3xu" + }, + "source": [ + "We will use `load_metric(...)` function from [HuggingFace datasets](https://huggingface.co/docs/datasets/) library. Let's first install the `datasets` library using `pip` and then define the `metric` object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GW9F_oVDU1TZ" + }, + "outputs": [], + "source": [ + "!pip3 install -q datasets\n", + "\n", + "from datasets import load_metric\n", + "metric = load_metric(\"wer\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ssWXWc7CZvNB" + }, + "outputs": [], + "source": [ + "@tf.function(jit_compile=True)\n", + "def eval_fwd(batch):\n", + " logits = model(batch, training=False)\n", + " return tf.argmax(logits, axis=-1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NFh1myg1x4ua" + }, + "source": [ + "It's time to run the evaluation on validation data now." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EQTFVjZghckJ" + }, + "outputs": [], + "source": [ + "from tqdm.auto import tqdm\n", + "\n", + "for speech, labels in tqdm(val_dataset, total=num_val_batches):\n", + " predictions = eval_fwd(speech)\n", + " predictions = [tokenizer.decode(pred) for pred in predictions.numpy().tolist()]\n", + " references = [tokenizer.decode(label, group_tokens=False) for label in labels.numpy().tolist()]\n", + " metric.add_batch(references=references, predictions=predictions)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WWCc8qBesv3e" + }, + "source": [ + "We are using the `tokenizer.decode(...)` method for decoding our predictions and labels back into the text and will add them to the metric for `WER` computation later." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XI_URj8Wtb2g" + }, + "source": [ + "Now, let's calculate the metric value in following cell:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "a83wekLgWMod" + }, + "outputs": [], + "source": [ + "metric.compute()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "c_cD1OgVEjl4" + }, + "source": [ + "**Note:** Here metric value doesn't make any sense as the model is trained on very small data and ASR-like tasks often require a large amount of data to learn a mapping from speech to text. You should probably train on large data to get some good results. This notebook gives you a template to fine-tune a pre-trained speech model." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G14o706kdTE1" + }, + "source": [ + "## Inference\n", + "\n", + "Now that we are satisfied with the training process & have saved the model in `save_dir`, we will see how this model can be used for inference.\n", + "\n", + "First, we will load our model using `tf.keras.models.load_model(...)`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wrTrExiUdaED" + }, + "outputs": [], + "source": [ + "finetuned_model = tf.keras.models.load_model(save_dir)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "luodSroz20SR" + }, + "source": [ + "Let's download some speech samples for performing inference. You can replace the following sample with your speech sample also." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HUE0shded6Ej" + }, + "outputs": [], + "source": [ + "!wget https://github.com/vasudevgupta7/gsoc-wav2vec2/raw/main/data/SA2.wav" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ycBjU_U53FjL" + }, + "source": [ + "Now, we will read the speech sample using `soundfile.read(...)` and pad it to `AUDIO_MAXLEN` to satisfy the model signature. Then we will normalize that speech sample using the `Wav2Vec2Processor` instance & will feed it into the model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "z7CARje4d5_H" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "speech, _ = sf.read(\"SA2.wav\")\n", + "speech = np.pad(speech, (0, AUDIO_MAXLEN - len(speech)))\n", + "speech = tf.expand_dims(processor(tf.constant(speech)), 0)\n", + "\n", + "outputs = finetuned_model(speech)\n", + "outputs" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lUSttSPa30qP" + }, + "source": [ + "Let's decode numbers back into text sequence using the `Wav2Vec2tokenizer` instance, we defined above." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RYdJqxQ4llgI" + }, + "outputs": [], + "source": [ + "predictions = tf.argmax(outputs, axis=-1)\n", + "predictions = [tokenizer.decode(pred) for pred in predictions.numpy().tolist()]\n", + "predictions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7DXC757bztJc" + }, + "source": [ + "This prediction is quite random as the model was never trained on large data in this notebook (as this notebook is not meant for doing complete training). You will get good predictions if you train this model on complete LibriSpeech dataset.\n", + "\n", + "Finally, we have reached an end to this notebook. But it's not an end of learning TensorFlow for speech-related tasks, this [repository](https://github.com/tulasiram58827/TTS_TFLite) contains some more amazing tutorials. In case you encountered any bug in this notebook, please create an issue [here](https://github.com/vasudevgupta7/gsoc-wav2vec2/issues)." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [ + "rWk8nL6Ui-_0", + "wvuJL8-f0zn5", + "oPp18ZHRtnq-", + "1mvTuOXpwsQe", + "7Vlm3ySFULsG", + "CzAOI78tky08", + "SJfPlTgezD0i", + "G14o706kdTE1" + ], + "name": "wav2vec2_saved_model_finetuning.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/wiki40b_lm.ipynb b/site/en/hub/tutorials/wiki40b_lm.ipynb new file mode 100644 index 00000000000..ad94ce0aab8 --- /dev/null +++ b/site/en/hub/tutorials/wiki40b_lm.ipynb @@ -0,0 +1,451 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Oxb_tjw13y4G" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EAkh2aBJLg6q" + }, + "outputs": [], + "source": [ + "# Copyright 2019 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "owAopeOtirc9" + }, + "source": [ + "# Wiki40B Language Models\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "T-nCyGRri-KO" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub models\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8eY9jkGpjf3d" + }, + "source": [ + "Generate Wikipedia-like text using the **Wiki40B language models** from [TensorFlow Hub](https://tfhub.dev)!\n", + "\n", + "This notebook illustrates how to:\n", + "* Load the 41 monolingual and 2 multilingual language models that are part of the [Wiki40b-LM collection](https://tfhub.dev/google/collections/wiki40b-lm/1) on TF-Hub\n", + "* Use the models to obtain perplexity, per layer activations, and word embeddings for a given piece of text\n", + "* Generate text token-by-token from a piece of seed text\n", + "\n", + "The language models are trained on the newly published, cleaned-up [Wiki40B dataset](https://www.tensorflow.org/datasets/catalog/wiki40b) available on TensorFlow Datasets. The training setup is based on the paper [“Wiki-40B: Multilingual Language Model Dataset”](https://research.google/pubs/pub49029/)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wK2YnrEhLjDf" + }, + "source": [ + "## Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "sv2CmI7BdaML" + }, + "outputs": [], + "source": [ + "#@title Installing Dependencies\n", + "!pip install --quiet \"tensorflow-text==2.11.*\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "8uSkaQ-Vdon2" + }, + "outputs": [], + "source": [ + "#@title Imports\n", + "import numpy as np\n", + "import tensorflow.compat.v1 as tf\n", + "import tensorflow_hub as hub\n", + "import tensorflow_text as tf_text\n", + "\n", + "tf.disable_eager_execution()\n", + "tf.logging.set_verbosity(tf.logging.WARN)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "d2MvP-cyL-BN" + }, + "source": [ + "## Choose Language\n", + "\n", + "Let's choose **which language model** to load from TF-Hub and the **length of text** to be generated. \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "33zYlSXwMA_o" + }, + "outputs": [], + "source": [ + "#@title { run: \"auto\" }\n", + "language = \"en\" #@param [\"en\", \"ar\", \"zh-cn\", \"zh-tw\", \"nl\", \"fr\", \"de\", \"it\", \"ja\", \"ko\", \"pl\", \"pt\", \"ru\", \"es\", \"th\", \"tr\", \"bg\", \"ca\", \"cs\", \"da\", \"el\", \"et\", \"fa\", \"fi\", \"he\", \"hi\", \"hr\", \"hu\", \"id\", \"lt\", \"lv\", \"ms\", \"no\", \"ro\", \"sk\", \"sl\", \"sr\", \"sv\", \"tl\", \"uk\", \"vi\", \"multilingual-64k\", \"multilingual-128k\"]\n", + "hub_module = \"https://tfhub.dev/google/wiki40b-lm-%7B%7D/1/".format(language)\n", + "max_gen_len = 20 #@param\n", + "\n", + "print(\"Using the {} model to generate sequences of max length {}.\".format(hub_module, max_gen_len))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dgw2qW4xZbMj" + }, + "source": [ + "## Build the Model\n", + "\n", + "Okay, now that we've configured which pre-trained model to use, let's configure it to generate text up to `max_gen_len`. We will need to load the language model from TF-Hub, feed in a piece of starter text, and then iteratively feed in tokens as they are generated." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "pUypKuc3Mlpa" + }, + "outputs": [], + "source": [ + "#@title Load the language model pieces\n", + "g = tf.Graph()\n", + "n_layer = 12\n", + "model_dim = 768\n", + "\n", + "with g.as_default():\n", + " text = tf.placeholder(dtype=tf.string, shape=(1,))\n", + "\n", + " # Load the pretrained model from TF-Hub\n", + " module = hub.Module(hub_module)\n", + "\n", + " # Get the word embeddings, activations at each layer, negative log likelihood\n", + " # of the text, and calculate the perplexity.\n", + " embeddings = module(dict(text=text), signature=\"word_embeddings\", as_dict=True)[\"word_embeddings\"]\n", + " activations = module(dict(text=text), signature=\"activations\", as_dict=True)[\"activations\"]\n", + " neg_log_likelihood = module(dict(text=text), signature=\"neg_log_likelihood\", as_dict=True)[\"neg_log_likelihood\"]\n", + " ppl = tf.exp(tf.reduce_mean(neg_log_likelihood, axis=1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "ZOS2Z2n0MsuC" + }, + "outputs": [], + "source": [ + "#@title Construct the per-token generation graph\n", + "def feedforward_step(module, inputs, mems):\n", + " \"\"\"Generate one step.\"\"\"\n", + " # Set up the input dict for one step of generation\n", + " inputs = tf.dtypes.cast(inputs, tf.int64)\n", + " generation_input_dict = dict(input_tokens=inputs)\n", + " mems_dict = {\"mem_{}\".format(i): mems[i] for i in range(n_layer)}\n", + " generation_input_dict.update(mems_dict)\n", + "\n", + " # Generate the tokens from the language model\n", + " generation_outputs = module(generation_input_dict, signature=\"prediction\", as_dict=True)\n", + "\n", + " # Get the probabilities and the inputs for the next steps\n", + " probs = generation_outputs[\"probs\"]\n", + " new_mems = [generation_outputs[\"new_mem_{}\".format(i)] for i in range(n_layer)]\n", + "\n", + " return probs, new_mems" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "S9ss6amQMyVY" + }, + "outputs": [], + "source": [ + "#@title Build the statically unrolled graph for `max_gen_len` tokens\n", + "with g.as_default():\n", + " # Tokenization with the sentencepiece model.\n", + " token_ids = module(dict(text=text), signature=\"tokenization\", as_dict=True)[\"token_ids\"]\n", + " inputs_np = token_ids\n", + " # Generate text by statically unrolling the computational graph\n", + " mems_np = [np.zeros([1, 0, model_dim], dtype=np.float32) for _ in range(n_layer)]\n", + "\n", + " # Generate up to `max_gen_len` tokens\n", + " sampled_ids = []\n", + " for step in range(max_gen_len):\n", + " probs, mems_np = feedforward_step(module, inputs_np, mems_np)\n", + " sampled_id = tf.random.categorical(tf.math.log(probs[0]), num_samples=1, dtype=tf.int32)\n", + " sampled_id = tf.squeeze(sampled_id)\n", + " sampled_ids.append(sampled_id)\n", + " inputs_np = tf.reshape(sampled_id, [1, 1])\n", + "\n", + " # Transform the ids into text\n", + " sampled_ids = tf.expand_dims(sampled_ids, axis=0)\n", + " generated_text = module(dict(token_ids=sampled_ids), signature=\"detokenization\", as_dict=True)[\"text\"]\n", + "\n", + " init_op = tf.group([tf.global_variables_initializer(), tf.tables_initializer()])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "K5SYcRrxM7vS" + }, + "source": [ + "## Generate some text\n", + "\n", + "Let's generate some text! We'll set a text `seed` to prompt the language model.\n", + "\n", + "You can use one of the **predefined** seeds or _optionally_ **enter your own**. This text will be used as seed for the language model to help prompt the language model for what to generate next.\n", + "\n", + "You can use the following special tokens precede special parts of the generated article. Use **`_START_ARTICLE_`** to indicate the beginning of the article, **`_START_SECTION_`** to indicate the beginning of a section, and **`_START_PARAGRAPH_`** to generate text in the article\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "GmZxv7bzMIcL" + }, + "outputs": [], + "source": [ + "#@title Predefined Seeds\n", + "lang_to_seed = {\"en\": \"\\n_START_ARTICLE_\\n1882 Prince Edward Island general election\\n_START_PARAGRAPH_\\nThe 1882 Prince Edward Island election was held on May 8, 1882 to elect members of the House of Assembly of the province of Prince Edward Island, Canada.\",\n", + " \"ar\": \"\\n_START_ARTICLE_\\nأوليفيا كوك\\n_START_SECTION_\\nنشأتها والتعلي \\n_START_PARAGRAPH_\\nولدت أوليفيا كوك في أولدهام في مانشستر الكبرى لأسرة تتكون من أب يعمل كظابط شرطة، وأمها تعمل كممثلة مبيعات. عندما كانت صغيرة بدأت تأخذ دروساً في الباليه الجمباز. وفي المدرسة شاركت في المسرحيات المدرسية، إضافةً إلى عملها في مسرح سندريلا . وفي سن الرابعة عشر عاماً، حصلت على وكيلة لها في مانشستر وهي وقعت عقداً مع وكالة الفنانين المبدعين في مانشستر،\",\n", + " \"zh-cn\": \"\\n_START_ARTICLE_\\n上尾事件\\n_START_SECTION_\\n日本国铁劳资关系恶化\\n_START_PARAGRAPH_\\n由于日本国铁财政恶化,管理层开始重整人手安排,令工会及员工感到受威胁。但日本国铁作为公营企业,其雇员均受公营企业等劳资关系法规管——该法第17条规定公营企业员工不得发动任何罢工行为。为了规避该法例\",\n", + " \"zh-tw\": \"\\n_START_ARTICLE_\\n乌森\\n_START_PARAGRAPH_\\n烏森(法語:Houssen,發音:[usən];德語:Hausen;阿爾薩斯語:Hüse)是法國上萊茵省的一個市鎮,位於該省北部,屬於科爾馬-里博維萊區(Colmar-Ribeauvillé)第二科爾馬縣(Colmar-2)。該市鎮總面積6.7平方公里,2009年時的人口為\",\n", + " \"nl\": \"\\n_START_ARTICLE_\\n1001 vrouwen uit de Nederlandse geschiedenis\\n_START_SECTION_\\nSelectie van vrouwen\\n_START_PARAGRAPH_\\nDe 'oudste' biografie in het boek is gewijd aan de beschermheilige\",\n", + " \"fr\": \"\\n_START_ARTICLE_\\nꝹ\\n_START_SECTION_\\nUtilisation\\n_START_PARAGRAPH_\\nLe d insulaire est utilisé comme lettre additionnelle dans l’édition de 1941 du recueil de chroniques galloises Brut y Tywysogion\",\n", + " \"de\": \"\\n_START_ARTICLE_\\nÜnal Demirkıran\\n_START_SECTION_\\nLaufbahn\\n_START_PARAGRAPH_\\nDemirkıran debütierte als junges Talent am 25. September 1999 im Auswärtsspiel des SSV Ulm 1846 bei Werder Bremen (2:2) in der Bundesliga, als er kurz\",\n", + " \"it\": \"\\n_START_ARTICLE_\\n28th Street (linea IRT Lexington Avenue)\\n_START_SECTION_\\nStoria\\n_START_PARAGRAPH_\\nLa stazione, i cui lavori di costruzione ebbero inizio nel 1900, venne aperta il 27 ottobre 1904, come\",\n", + " \"ja\": \"\\n_START_ARTICLE_\\nしのぶ・まさみshow'05 恋してラララ\\n_START_SECTION_\\n概要\\n_START_PARAGRAPH_\\n『上海ルーキーSHOW』の打ち切り後に放送された年末特番で、同番組MCの大竹しのぶと久本雅美が恋愛にまつわるテーマでトークや音楽企画を展開していた。基本は女\",\n", + " \"ko\": \"\\n_START_ARTICLE_\\n녹턴, Op. 9 (쇼팽)\\n_START_SECTION_\\n녹턴 3번 나장조\\n_START_PARAGRAPH_\\n쇼팽의 녹턴 3번은 세도막 형식인 (A-B-A)형식을 취하고 있다. 첫 부분은 알레그레토(Allegretto)의 빠르기가 지시되어 있으며 물 흐르듯이 부드럽게 전개되나\",\n", + " \"pl\": \"\\n_START_ARTICLE_\\nAK-176\\n_START_SECTION_\\nHistoria\\n_START_PARAGRAPH_\\nPod koniec lat 60 XX w. w ZSRR dostrzeżono potrzebę posiadania lekkiej armaty uniwersalnej średniego kalibru o stosunkowo dużej mocy ogniowej, która\",\n", + " \"pt\": \"\\n_START_ARTICLE_\\nÁcido ribonucleico\\n_START_SECTION_\\nIntermediário da transferência de informação\\n_START_PARAGRAPH_\\nEm 1957 Elliot Volkin e Lawrence Astrachan fizeram uma observação significativa. Eles descobriram que uma das mais marcantes mudanças\",\n", + " \"ru\": \"\\n_START_ARTICLE_\\nАрнольд, Ремо\\n_START_SECTION_\\nКлубная карьера\\n_START_PARAGRAPH_\\nАрнольд перешёл в академию «Люцерна» в 12 лет. С 2014 года выступал за вторую команду, где провёл пятнадцать встреч. С сезона 2015/2016 находится в составе основной команды. 27 сентября 2015 года дебютировал\",\n", + " \"es\": \"\\n_START_ARTICLE_\\n(200012) 2007 LK20\\n_START_SECTION_\\nDesignación y nombre\\n_START_PARAGRAPH_\\nDesignado provisionalmente como 2007 LK20.\\n_START_SECTION_\\nCaracterísticas orbitales\\n_START_PARAGRAPH_\\n2007 LK20\",\n", + " \"th\": \"\\n_START_ARTICLE_\\nการนัดหยุดเรียนเพื่อภูมิอากาศ\\n_START_SECTION_\\nเกรียตา ทืนแบร์ย\\n_START_PARAGRAPH_\\nวันที่ 20 สิงหาคม 2561 เกรียตา ทืนแบร์ย นักกิจกรรมภูมิอากาศชาวสวีเดน ซึ่งขณะนั้นศึกษาอยู่ในชั้นเกรด 9 (เทียบเท่ามัธยมศึกษาปีที่ 3) ตัดสินใจไม่เข้าเรียนจนกระทั่งการเลือกตั้งทั่วไปในประเทศสวีเดนปี\",\n", + " \"tr\": \"\\n_START_ARTICLE_\\nİsrail'in Muhafazakar Dostları\\n_START_SECTION_\\nFaaliyetleri\\n_START_PARAGRAPH_\\nGrubun 2005 stratejisi ile aşağıdaki faaliyet alanları tespit edilmiştir:_NEWLINE_İsrail'i destekleme\",\n", + " \"bg\": \"\\n_START_ARTICLE_\\nАвтомобил с повишена проходимост\\n_START_SECTION_\\nОсобености на конструкцията\\n_START_PARAGRAPH_\\nВ исторически план леки автомобили с висока проходимост се произвеждат и имат военно\",\n", + " \"ca\": \"\\n_START_ARTICLE_\\nAuchy-la-Montagne\\n_START_SECTION_\\nPoblació\\n_START_PARAGRAPH_\\nEl 2007 la població de fet d'Auchy-la-Montagne era de 469 persones. Hi havia 160 famílies de les quals 28\",\n", + " \"cs\": \"\\n_START_ARTICLE_\\nŘemeslo\\n_START_PARAGRAPH_\\nŘemeslo je určitý druh manuální dovednosti, provozovaný za účelem obživy, resp. vytváření zisku. Pro řemeslné práce je charakteristický vysoký podíl ruční práce, spojený s používáním specializovaných nástrojů a pomůcek. Řemeslné práce\",\n", + " \"da\": \"\\n_START_ARTICLE_\\nÖrenäs slot\\n_START_PARAGRAPH_\\nÖrenäs slot (svensk: Örenäs slott) er et slot nær Glumslöv i Landskrona stad tæt på Øresunds-kysten i Skåne i Sverige._NEWLINE_Örenäs ligger\",\n", + " \"el\": \"\\n_START_ARTICLE_\\nΆλβαρο Ρεκόμπα\\n_START_SECTION_\\nΒιογραφικά στοιχεία\\n_START_PARAGRAPH_\\nΟ Άλβαρο Ρεκόμπα γεννήθηκε στις 17 Μαρτίου 1976 στο Μοντεβίδεο της Ουρουγουάης από\",\n", + " \"et\": \"\\n_START_ARTICLE_\\nAus deutscher Geistesarbeit\\n_START_PARAGRAPH_\\nAus deutscher Geistesarbeit (alapealkiri Wochenblatt für wissenschaftliche und kulturelle Fragen der Gegenwart) oli ajakiri, mis 1924–1934 ilmus Tallinnas. Ajakirja andis 1932–1934\",\n", + " \"fa\": \"\\n_START_ARTICLE_\\nتفسیر بغوی\\n_START_PARAGRAPH_\\nایرانی حسین بن مسعود بغوی است. این کتاب خلاصه ای از تفسیر الکشف و البیان عن تفسیر القرآن ابواسحاق احمد ثعلبی می‌باشد. این کتاب در ۴ جلد موجود می‌باش\",\n", + " \"fi\": \"\\n_START_ARTICLE_\\nBovesin verilöyly\\n_START_SECTION_\\nVerilöyly\\n_START_PARAGRAPH_\\n19. syyskuuta 1943 partisaaniryhmä saapui Bovesiin tarkoituksenaan ostaa leipää kylästä. Kylässä sattui olemaan kaksi SS-miestä, jotka\",\n", + " \"he\": \"\\n_START_ARTICLE_\\nאוגדה 85\\n_START_SECTION_\\nהיסטוריה\\n_START_PARAGRAPH_\\nהאוגדה הוקמה בהתחלה כמשלט העמקים בשנות השבעים. בשנות השמונים הפכה להיות אוגדה מרחבית עם שתי\",\n", + " \"hi\": \"\\n_START_ARTICLE_\\nऑडी\\n_START_SECTION_\\nऑडी इंडिया\\n_START_PARAGRAPH_\\nऑडी इंडिया की स्थापना मार्च 2007 में फोक्सवैगन ग्रुप सेल्स इंडिया के एक विभाजन के रूप में की गई थी। दुनिया भर में 110\",\n", + " \"hr\": \"\\n_START_ARTICLE_\\nČimariko (jezična porodica)\\n_START_PARAGRAPH_\\nChimarikan.-porodica sjevernoameričkih indijanskih jezika koja prema Powersu obuhvaća jezike Indijanaca Chimariko (Chemaŕeko) sa rijeke Trinity i Chimalakwe\",\n", + " \"hu\": \"\\n_START_ARTICLE_\\nÁllami Politikai Igazgatóság\\n_START_PARAGRAPH_\\nAz Állami Politikai Igazgatóság (rövidítve: GPU, oroszul: Государственное политическое управление), majd később Egyesített Állami Politikai Igazgatóság Szovjet-Oroszország\",\n", + " \"id\": \"\\n_START_ARTICLE_\\n(257195) 2008 QY41\\n_START_SECTION_\\nPembentukan\\n_START_PARAGRAPH_\\nSeperti asteroid secara keseluruhan, asteroid ini terbentuk dari nebula matahari primordial sebagai pecahan planetisimal, sesuatu di\",\n", + " \"lt\": \"\\n_START_ARTICLE_\\nŠavijos–Uardigo regionas\\n_START_SECTION_\\nGeografija\\n_START_PARAGRAPH_\\nŠavijos-Uardigo regionas yra Atlanto vandenynu pakrantės lygumoje\",\n", + " \"lv\": \"\\n_START_ARTICLE_\\nApatīts\\n_START_SECTION_\\nĪpašības\\n_START_PARAGRAPH_\\nApatīta kopējā ķīmiskā formula ir Ca₁₀(PO₄)₆(OH,F,Cl)₂, ir trīs atšķirīgi apatīta veidi: apatīts: Ca₁₀(PO₄)₆(OH)₂, fluorapatīts Ca₁₀(PO₄)₆(F)₂ un hlorapatīts: Ca₁₀(PO₄)₆(Cl)₂. Pēc sastāva\",\n", + " \"ms\": \"\\n_START_ARTICLE_\\nEdward C. Prescott\\n_START_PARAGRAPH_\\nEdward Christian Prescott (lahir 26 Disember 1940) ialah seorang ahli ekonomi Amerika. Beliau menerima Hadiah Peringatan Nobel dalam Sains Ekonomi pada tahun 2004, berkongsi\",\n", + " \"no\": \"\\n_START_ARTICLE_\\nAl-Minya\\n_START_SECTION_\\nEtymologi\\n_START_PARAGRAPH_\\nDet er sprikende forklaringer på bynavnet. Det kan komme fra gammelegyptisk Men'at Khufu, i betydning byen hvor Khufu ble ammet, noe som knytter byen til farao Khufu (Keops), som\",\n", + " \"ro\": \"\\n_START_ARTICLE_\\nDealurile Cernăuțiului\\n_START_PARAGRAPH_\\nDealurile Cernăuțiului sunt un lanț deluros striat, care se întinde în partea centrală a interfluviului dintre Prut și Siret, în cadrul regiunii Cernăuți din\",\n", + " \"sk\": \"\\n_START_ARTICLE_\\n10. peruť RAAF\\n_START_PARAGRAPH_\\n10. peruť RAAF je námorná hliadkovacia peruť kráľovských austrálskych vzdušných síl (Royal Australian Air Force – RAAF) založená na základni Edinburgh v Južnej Austrálii ako súčasť 92\",\n", + " \"sl\": \"\\n_START_ARTICLE_\\n105 Artemida\\n_START_SECTION_\\nOdkritje\\n_START_PARAGRAPH_\\nAsteroid je 16. septembra 1868 odkril James Craig Watson (1838 – 1880). Poimenovan je po Artemidi, boginji Lune iz grške\",\n", + " \"sr\": \"\\n_START_ARTICLE_\\nЉанос Морелос 1. Сексион (Истапангахоја)\\n_START_SECTION_\\nСтановништво\\n_START_PARAGRAPH_\\nПрема подацима из 2010. године у насељу је живело 212\",\n", + " \"sv\": \"\\n_START_ARTICLE_\\nÖstra Torps landskommun\\n_START_SECTION_\\nAdministrativ historik\\n_START_PARAGRAPH_\\nKommunen bildades i Östra Torps socken i Vemmenhögs härad i Skåne när 1862 års kommunalförordningar trädde i kraft. _NEWLINE_Vid kommunreformen\",\n", + " \"tl\": \"\\n_START_ARTICLE_\\nBésame Mucho\\n_START_PARAGRAPH_\\nAng Bésame Mucho ay isang awit na nasa Kastila. Isinulat ito ng Mehikanang si Consuelo Velázquez noong 1940, bago sumapit ang kanyang ika-16 na\",\n", + " \"uk\": \"\\n_START_ARTICLE_\\nІслам та інші релігії\\n_START_PARAGRAPH_\\nПротягом багатовікової ісламської історії мусульманські правителі, ісламські вчені і звичайні мусульмани вступали у різні відносини з представниками інших релігій. Стиль цих\",\n", + " \"vi\": \"\\n_START_ARTICLE_\\nĐường tỉnh 316\\n_START_PARAGRAPH_\\nĐường tỉnh 316 hay tỉnh lộ 316, viết tắt ĐT316 hay TL316, là đường tỉnh ở các huyện Thanh Sơn, Thanh Thủy, Tam Nông tỉnh Phú Thọ ._NEWLINE_ĐT316 bắt đầu từ xã Tinh Nhuệ\",\n", + " \"multilingual-64k\": \"\\n_START_ARTICLE_\\n1882 Prince Edward Island general election\\n_START_PARAGRAPH_\\nThe 1882 Prince Edward Island election was held on May 8, 1882 to elect members of the House of Assembly of the province of Prince Edward Island, Canada.\",\n", + " \"multilingual-128k\": \"\\n_START_ARTICLE_\\n1882 Prince Edward Island general election\\n_START_PARAGRAPH_\\nThe 1882 Prince Edward Island election was held on May 8, 1882 to elect members of the House of Assembly of the province of Prince Edward Island, Canada.\"}\n", + "\n", + "seed = lang_to_seed[language]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "mZDGsSyUM_Mg" + }, + "outputs": [], + "source": [ + "#@title Enter your own seed (Optional).\n", + "user_seed = \"\" #@param { type: \"string\" }\n", + "if user_seed.strip():\n", + " seed = user_seed.strip()\n", + "\n", + "# The seed must start with \"_START_ARTICLE_\" or the generated text will be gibberish\n", + "START_ARTICLE = \"_START_ARTICLE_\"\n", + "if START_ARTICLE not in seed:\n", + " seed = \"\\n{}\\n{}\".format(START_ARTICLE, seed)\n", + "\n", + "print(\"Generating text from seed:\\n{}\".format(seed))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "5dMuShi3XuLd" + }, + "outputs": [], + "source": [ + "#@title Initialize session.\n", + "with tf.Session(graph=g).as_default() as session:\n", + " session.run(init_op)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "aS53xjmbbw0Z" + }, + "outputs": [], + "source": [ + "#@title Generate text\n", + "\n", + "with session.as_default():\n", + " results = session.run([embeddings, neg_log_likelihood, ppl, activations, token_ids, generated_text], feed_dict={text: [seed]})\n", + " embeddings_result, neg_log_likelihood_result, ppl_result, activations_result, token_ids_result, generated_text_result = results\n", + " generated_text_output = generated_text_result[0].decode('utf-8')\n", + "\n", + "print(generated_text_output)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tjQf3N1wdND0" + }, + "source": [ + "We can also look at the other outputs of the model - the perplexity, the token ids, the intermediate activations, and the embeddings" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pGfw3CQWNC_n" + }, + "outputs": [], + "source": [ + "ppl_result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "FLlgJObFNEmj" + }, + "outputs": [], + "source": [ + "token_ids_result" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5SaH36M-NGXc" + }, + "outputs": [], + "source": [ + "activations_result.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k9Eb_DPfQdUu" + }, + "outputs": [], + "source": [ + "embeddings_result" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "wiki40b_lm.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/hub/tutorials/yamnet.ipynb b/site/en/hub/tutorials/yamnet.ipynb new file mode 100644 index 00000000000..e6c9fbca5a1 --- /dev/null +++ b/site/en/hub/tutorials/yamnet.ipynb @@ -0,0 +1,359 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "laa9tRjJ59bl" + }, + "source": [ + "##### Copyright 2020 The TensorFlow Hub Authors.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\");" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "T4ZHtBpK6Dom" + }, + "outputs": [], + "source": [ + "#@title Copyright 2020 The TensorFlow Hub Authors. All Rights Reserved.\n", + "#\n", + "# Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License.\n", + "# ==============================================================================" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hk5u_9KN1m-t" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View on GitHub\n", + " \n", + " Download notebook\n", + " \n", + " See TF Hub model\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "x2ep-q7k_5R-" + }, + "source": [ + "# Sound classification with YAMNet\n", + "\n", + "YAMNet is a deep net that predicts 521 audio event [classes](https://github.com/tensorflow/models/blob/master/research/audioset/yamnet/yamnet_class_map.csv) from the [AudioSet-YouTube corpus](http://g.co/audioset) it was trained on. It employs the\n", + "[Mobilenet_v1](https://arxiv.org/pdf/1704.04861.pdf) depthwise-separable\n", + "convolution architecture." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Bteu7pfkpt_f" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_hub as hub\n", + "import numpy as np\n", + "import csv\n", + "\n", + "import matplotlib.pyplot as plt\n", + "from IPython.display import Audio\n", + "from scipy.io import wavfile" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YSVs3zRrrYmY" + }, + "source": [ + "Load the Model from TensorFlow Hub.\n", + "\n", + "Note: to read the documentation just follow the model's [url](https://tfhub.dev/google/yamnet/1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VX8Vzs6EpwMo" + }, + "outputs": [], + "source": [ + "# Load the model.\n", + "model = hub.load('https://tfhub.dev/google/yamnet/1')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lxWx6tOdtdBP" + }, + "source": [ + "The labels file will be loaded from the models assets and is present at `model.class_map_path()`.\n", + "You will load it on the `class_names` variable." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "EHSToAW--o4U" + }, + "outputs": [], + "source": [ + "# Find the name of the class with the top score when mean-aggregated across frames.\n", + "def class_names_from_csv(class_map_csv_text):\n", + " \"\"\"Returns list of class names corresponding to score vector.\"\"\"\n", + " class_names = []\n", + " with tf.io.gfile.GFile(class_map_csv_text) as csvfile:\n", + " reader = csv.DictReader(csvfile)\n", + " for row in reader:\n", + " class_names.append(row['display_name'])\n", + "\n", + " return class_names\n", + "\n", + "class_map_path = model.class_map_path().numpy()\n", + "class_names = class_names_from_csv(class_map_path)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mSFjRwkZ59lU" + }, + "source": [ + "Add a method to verify and convert a loaded audio is on the proper sample_rate (16K), otherwise it would affect the model's results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LizGwWjc5w6A" + }, + "outputs": [], + "source": [ + "def ensure_sample_rate(original_sample_rate, waveform,\n", + " desired_sample_rate=16000):\n", + " \"\"\"Resample waveform if required.\"\"\"\n", + " if original_sample_rate != desired_sample_rate:\n", + " desired_length = int(round(float(len(waveform)) /\n", + " original_sample_rate * desired_sample_rate))\n", + " waveform = scipy.signal.resample(waveform, desired_length)\n", + " return desired_sample_rate, waveform" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AZEgCobA9bWl" + }, + "source": [ + "## Downloading and preparing the sound file\n", + "\n", + "Here you will download a wav file and listen to it.\n", + "If you have a file already available, just upload it to colab and use it instead.\n", + "\n", + "Note: The expected audio file should be a mono wav file at 16kHz sample rate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WzZHvyTtsJrc" + }, + "outputs": [], + "source": [ + "!curl -O https://storage.googleapis.com/audioset/speech_whistling2.wav" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "D8LKmqvGzZzr" + }, + "outputs": [], + "source": [ + "!curl -O https://storage.googleapis.com/audioset/miaow_16k.wav" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Wo9KJb-5zuz1" + }, + "outputs": [], + "source": [ + "# wav_file_name = 'speech_whistling2.wav'\n", + "wav_file_name = 'miaow_16k.wav'\n", + "sample_rate, wav_data = wavfile.read(wav_file_name, 'rb')\n", + "sample_rate, wav_data = ensure_sample_rate(sample_rate, wav_data)\n", + "\n", + "# Show some basic information about the audio.\n", + "duration = len(wav_data)/sample_rate\n", + "print(f'Sample rate: {sample_rate} Hz')\n", + "print(f'Total duration: {duration:.2f}s')\n", + "print(f'Size of the input: {len(wav_data)}')\n", + "\n", + "# Listening to the wav file.\n", + "Audio(wav_data, rate=sample_rate)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "P9I290COsMBm" + }, + "source": [ + "The `wav_data` needs to be normalized to values in `[-1.0, 1.0]` (as stated in the model's [documentation](https://tfhub.dev/google/yamnet/1))." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bKr78aCBsQo3" + }, + "outputs": [], + "source": [ + "waveform = wav_data / tf.int16.max" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "e_Xwd4GPuMsB" + }, + "source": [ + "## Executing the Model\n", + "\n", + "Now the easy part: using the data already prepared, you just call the model and get the: scores, embedding and the spectrogram.\n", + "\n", + "The score is the main result you will use.\n", + "The spectrogram you will use to do some visualizations later." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BJGP6r-At_Jc" + }, + "outputs": [], + "source": [ + "# Run the model, check the output.\n", + "scores, embeddings, spectrogram = model(waveform)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Vmo7griQprDk" + }, + "outputs": [], + "source": [ + "scores_np = scores.numpy()\n", + "spectrogram_np = spectrogram.numpy()\n", + "infered_class = class_names[scores_np.mean(axis=0).argmax()]\n", + "print(f'The main sound is: {infered_class}')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uj2xLf-P_ndS" + }, + "source": [ + "## Visualization\n", + "\n", + "YAMNet also returns some additional information that we can use for visualization.\n", + "Let's take a look on the Waveform, spectrogram and the top classes inferred." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "_QSTkmv7wr2M" + }, + "outputs": [], + "source": [ + "plt.figure(figsize=(10, 6))\n", + "\n", + "# Plot the waveform.\n", + "plt.subplot(3, 1, 1)\n", + "plt.plot(waveform)\n", + "plt.xlim([0, len(waveform)])\n", + "\n", + "# Plot the log-mel spectrogram (returned by the model).\n", + "plt.subplot(3, 1, 2)\n", + "plt.imshow(spectrogram_np.T, aspect='auto', interpolation='nearest', origin='lower')\n", + "\n", + "# Plot and label the model output scores for the top-scoring classes.\n", + "mean_scores = np.mean(scores, axis=0)\n", + "top_n = 10\n", + "top_class_indices = np.argsort(mean_scores)[::-1][:top_n]\n", + "plt.subplot(3, 1, 3)\n", + "plt.imshow(scores_np[:, top_class_indices].T, aspect='auto', interpolation='nearest', cmap='gray_r')\n", + "\n", + "# patch_padding = (PATCH_WINDOW_SECONDS / 2) / PATCH_HOP_SECONDS\n", + "# values from the model documentation\n", + "patch_padding = (0.025 / 2) / 0.01\n", + "plt.xlim([-patch_padding-0.5, scores.shape[0] + patch_padding-0.5])\n", + "# Label the top_N classes.\n", + "yticks = range(0, top_n, 1)\n", + "plt.yticks(yticks, [class_names[top_class_indices[x]] for x in yticks])\n", + "_ = plt.ylim(-0.5 + np.array([top_n, 0]))" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "yamnet.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/install/_index.yaml b/site/en/install/_index.yaml index 906537fc062..71bc660f81d 100644 --- a/site/en/install/_index.yaml +++ b/site/en/install/_index.yaml @@ -20,7 +20,7 @@ landing_page: @@ -112,7 +112,7 @@ ImportError: cannot import name 'descriptor' @@ -227,7 +227,7 @@ ImportError: cannot import name 'descriptor' diff --git a/site/en/install/gpu.md b/site/en/install/gpu.md deleted file mode 100644 index 76008867312..00000000000 --- a/site/en/install/gpu.md +++ /dev/null @@ -1,197 +0,0 @@ -# GPU support - -Note: GPU support is available for Ubuntu and Windows with CUDA®-enabled cards. - -TensorFlow GPU support requires an assortment of drivers and libraries. To -simplify installation and avoid library conflicts, we recommend using a -[TensorFlow Docker image with GPU support](./docker.md) (Linux only). This setup -only requires the [NVIDIA® GPU drivers](https://www.nvidia.com/drivers){:.external}. - -These install instructions are for the latest release of TensorFlow. See the -[tested build configurations](./source.md#gpu) for CUDA® and cuDNN versions to -use with older TensorFlow releases. - -## Pip package - -See the [pip install guide](./pip) for available packages, systems requirements, -and instructions. The TensorFlow `pip` package includes GPU support for -CUDA®-enabled cards: - -
    -pip install tensorflow
    -
    - -This guide covers GPU support and installation steps for the latest *stable* -TensorFlow release. - -### Older versions of TensorFlow - -For releases 1.15 and older, CPU and GPU packages are separate: - -
    -pip install tensorflow==1.15      # CPU
    -pip install tensorflow-gpu==1.15  # GPU
    -
    - -## Hardware requirements - -The following GPU-enabled devices are supported: - -* NVIDIA® GPU card with CUDA® architectures 3.5, 5.0, 6.0, 7.0, 7.5, 8.0 and - higher than 8.0. See the list of - CUDA®-enabled - GPU cards. -* For GPUs with unsupported CUDA® architectures, or to avoid JIT compilation - from PTX, or to use different versions of the NVIDIA® libraries, see the - [Linux build from source](./source.md) guide. -* Packages do not contain PTX code except for the latest supported CUDA® - architecture; therefore, TensorFlow fails to load on older GPUs when - `CUDA_FORCE_PTX_JIT=1` is set. (See - Application - Compatibility for details.) - -Note: The error message "Status: device kernel image is invalid" indicates that -the TensorFlow package does not contain PTX for your architecture. You can -enable compute capabilities by [building TensorFlow from source](./source.md). - -## Software requirements - -The following NVIDIA® software must be installed on your system: - -* [NVIDIA® GPU drivers](https://www.nvidia.com/drivers){:.external} —CUDA® - 11.2 requires 450.80.02 or higher. -* [CUDA® Toolkit](https://developer.nvidia.com/cuda-toolkit-archive){:.external} - —TensorFlow supports CUDA® 11.2 (TensorFlow >= 2.5.0) -* [CUPTI](http://docs.nvidia.com/cuda/cupti/){:.external} ships with the CUDA® - Toolkit. -* [cuDNN SDK 8.1.0](https://developer.nvidia.com/cudnn){:.external} - [cuDNN versions](https://developer.nvidia.com/rdp/cudnn-archive){:.external}). -* *(Optional)* - [TensorRT 7](https://docs.nvidia.com/deeplearning/tensorrt/archives/index.html#trt_7){:.external} - to improve latency and throughput for inference on some models. - -## Linux setup - -The `apt` instructions below are the easiest way to install the required NVIDIA -software on Ubuntu. However, if [building TensorFlow from source](./source.md), -manually install the software requirements listed above, and consider using a -`-devel` [TensorFlow Docker image](./docker.md) as a base. - -Install [CUPTI](http://docs.nvidia.com/cuda/cupti/){:.external} which ships with -the CUDA® Toolkit. Append its installation directory to the `$LD_LIBRARY_PATH` -environmental variable: - -
    -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/extras/CUPTI/lib64
    -
    - -### Install CUDA with apt - -This section shows how to install CUDA® 11 (TensorFlow >= 2.4.0) on Ubuntu -16.04 and 18.04. These instructions may work for other Debian-based distros. - -Caution: [Secure Boot](https://wiki.ubuntu.com/UEFI/SecureBoot){:.external} -complicates installation of the NVIDIA driver and is beyond the scope of these instructions. - - -#### Ubuntu 18.04 (CUDA 11.0) - -
    -# Add NVIDIA package repositories
    -wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
    -sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
    -sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub
    -sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
    -sudo apt-get update
    -
    -wget http://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
    -
    -sudo apt install ./nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb
    -sudo apt-get update
    -
    -wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/libnvinfer7_7.1.3-1+cuda11.0_amd64.deb
    -sudo apt install ./libnvinfer7_7.1.3-1+cuda11.0_amd64.deb
    -sudo apt-get update
    -
    -# Install development and runtime libraries (~4GB)
    -sudo apt-get install --no-install-recommends \
    -    cuda-11-0 \
    -    libcudnn8=8.0.4.30-1+cuda11.0  \
    -    libcudnn8-dev=8.0.4.30-1+cuda11.0
    -
    -# Reboot. Check that GPUs are visible using the command: nvidia-smi
    -
    -# Install TensorRT. Requires that libcudnn8 is installed above.
    -sudo apt-get install -y --no-install-recommends libnvinfer7=7.1.3-1+cuda11.0 \
    -    libnvinfer-dev=7.1.3-1+cuda11.0 \
    -    libnvinfer-plugin7=7.1.3-1+cuda11.0
    -
    -
    - -#### Ubuntu 16.04 (CUDA 11.0) - -
    -# Add NVIDIA package repositories
    -# Add HTTPS support for apt-key
    -sudo apt-get install gnupg-curl
    -wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/cuda-ubuntu1604.pin
    -sudo mv cuda-ubuntu1604.pin /etc/apt/preferences.d/cuda-repository-pin-600
    -sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/7fa2af80.pub
    -sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/x86_64/ /"
    -sudo apt-get update
    -wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb
    -sudo apt install ./nvidia-machine-learning-repo-ubuntu1604_1.0.0-1_amd64.deb
    -sudo apt-get update
    -wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1604/x86_64/libnvinfer7_7.1.3-1+cuda11.0_amd64.deb
    -sudo apt install ./libnvinfer7_7.1.3-1+cuda11.0_amd64.deb
    -sudo apt-get update
    -
    -# Install development and runtime libraries (~4GB)
    -sudo apt-get install --no-install-recommends \
    -    cuda-11-0 \
    -    libcudnn8=8.0.4.30-1+cuda11.0  \
    -    libcudnn8-dev=8.0.4.30-1+cuda11.0
    -
    -
    -# Reboot. Check that GPUs are visible using the command: nvidia-smi
    -
    -# Install TensorRT. Requires that libcudnn7 is installed above.
    -sudo apt-get install -y --no-install-recommends \
    -    libnvinfer7=7.1.3-1+cuda11.0 \
    -    libnvinfer-dev=7.1.3-1+cuda11.0 \
    -    libnvinfer-plugin7=7.1.3-1+cuda11.0 \
    -    libnvinfer-plugin-dev=7.1.3-1+cuda11.0
    -
    -
    - - -## Windows setup - -See the [hardware requirements](#hardware_requirements) and -[software requirements](#software_requirements) listed above. Read the -[CUDA® install guide for Windows](https://docs.nvidia.com/cuda/cuda-installation-guide-microsoft-windows/){:.external}. - -Make sure the installed NVIDIA software packages match the versions listed above. In -particular, TensorFlow will not load without the `cuDNN64_8.dll` file. To use a -different version, see the [Windows build from source](./source_windows.md) guide. - -Add the CUDA®, CUPTI, and cuDNN installation directories to the `%PATH%` -environmental variable. For example, if the CUDA® Toolkit is installed to -`C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0` and cuDNN to -`C:\tools\cuda`, update your `%PATH%` to match: - -
    -SET PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\bin;%PATH%
    -SET PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\extras\CUPTI\lib64;%PATH%
    -SET PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.0\include;%PATH%
    -SET PATH=C:\tools\cuda\bin;%PATH%
    -
    - -## WSL2 setup - -Experimental support for WSL2 on Windows 10 19044 or higher with GPU access is now available. This corresponds to the most recent update of Windows 10 (aka version 21H2/November 2021 Update). You can get the latest update from here: [Download Windows 10](https://www.microsoft.com/en-us/software-download/windows10). - -For instructions, please see [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html) for CUDA in WSL. - - - diff --git a/site/en/install/gpu_plugins.md b/site/en/install/gpu_plugins.md index 358db01b312..39e3cf09b29 100644 --- a/site/en/install/gpu_plugins.md +++ b/site/en/install/gpu_plugins.md @@ -1,12 +1,12 @@ # GPU device plugins -Note: This page is for non-NVIDIA® GPU devices. For NVIDIA® GPU support, click -[here](./gpu.md). +Note: This page is for non-NVIDIA® GPU devices. For NVIDIA® GPU support, go to +the [Install TensorFlow with pip](./pip.md) guide. TensorFlow's -pluggable -device architecture adds new device support as separate plug-in packages -that are installed alongside the official TensorFlow package. +[pluggable device](https://github.com/tensorflow/community/blob/master/rfcs/20200624-pluggable-device-for-tensorflow.md) +architecture adds new device support as separate plug-in packages that are +installed alongside the official TensorFlow package. The mechanism requires no device-specific changes in the TensorFlow code. It relies on C APIs to communicate with the TensorFlow binary in a stable manner. @@ -57,6 +57,24 @@ run() # PluggableDevices also work with tf.function and graph mode. Metal `PluggableDevice` for macOS GPUs: -* [Getting started guide](https://developer.apple.com/metal/tensorflow-plugin/){:.external}. +* Works with TF 2.5 or later. +* [Getting started guide](https://developer.apple.com/metal/tensorflow-plugin/). * For questions and feedback, please visit the - [Apple Developer Forum](https://developer.apple.com/forums/tags/tensorflow-metal){:.external}. + [Apple Developer Forum](https://developer.apple.com/forums/tags/tensorflow-metal). + +DirectML `PluggableDevice` for Windows and WSL (preview): + +* Works with `tensorflow-cpu` package, version 2.10 or later. +* [PyPI wheel](https://pypi.org/project/tensorflow-directml-plugin/). +* [GitHub repo](https://github.com/microsoft/tensorflow-directml-plugin). +* For questions, feedback or to raise issues, please visit the + [Issues page of `tensorflow-directml-plugin` on GitHub](https://github.com/microsoft/tensorflow-directml-plugin/issues). + +Intel® Extension for TensorFlow `PluggableDevice` for Linux and WSL: + +* Works with TF 2.10 or later. +* [Getting started guide](https://intel.github.io/intel-extension-for-tensorflow/latest/get_started.html) +* [PyPI wheel](https://pypi.org/project/intel-extension-for-tensorflow/). +* [GitHub repo](https://github.com/intel/intel-extension-for-tensorflow). +* For questions, feedback, or to raise issues, please visit the + [Issues page of `intel-extension-for-tensorflow` on GitHub](https://github.com/intel/intel-extension-for-tensorflow/issues). diff --git a/site/en/install/lang_c.ipynb b/site/en/install/lang_c.ipynb index cdc62873df3..788a5e6c891 100644 --- a/site/en/install/lang_c.ipynb +++ b/site/en/install/lang_c.ipynb @@ -48,16 +48,16 @@ "source": [ "
      -
    • Python 3.7–3.10
    • +
    • Python 3.9–3.12
    • Ubuntu 16.04 or later
    • Windows 7 or later (with C++ redistributable)
    @@ -41,7 +41,6 @@ landing_page:

    Install TensorFlow with Python's pip package manager.

    Official packages available for Ubuntu, Windows, and macOS.

    -

    See the GPU guide for CUDA®-enabled cards.

    buttons: - label: Read the pip install guide @@ -52,8 +51,10 @@ landing_page:
             # Requires the latest pip
             pip install --upgrade pip
    - # Current stable release for CPU and GPU + # Current stable release for CPU pip install tensorflow
    + # Current stable release for GPU (Linux / WSL2) + pip install tensorflow[and-cuda]
    # Or try the preview build (unstable) pip install tf-nightly
    @@ -67,8 +68,7 @@ landing_page: The TensorFlow Docker images are already configured to run TensorFlow. A Docker container runs in a - virtual environment and is the easiest way to set up GPU - support. + virtual environment and is the easiest way to set up GPU support.

             docker pull tensorflow/tensorflow:latest  # Download latest stable image
    diff --git a/site/en/install/_toc.yaml b/site/en/install/_toc.yaml index c8f60bde852..26cdb270bb8 100644 --- a/site/en/install/_toc.yaml +++ b/site/en/install/_toc.yaml @@ -7,8 +7,6 @@ toc: - title: Docker path: /install/docker - heading: Additional setup -- title: GPU support - path: /install/gpu - title: GPU device plugins path: /install/gpu_plugins - title: Problems diff --git a/site/en/install/docker.md b/site/en/install/docker.md index 30942924688..376ca0820a7 100644 --- a/site/en/install/docker.md +++ b/site/en/install/docker.md @@ -1,45 +1,43 @@ # Docker -[Docker](https://docs.docker.com/install/){:.external} uses *containers* to +[Docker](https://docs.docker.com/install/) uses *containers* to create virtual environments that isolate a TensorFlow installation from the rest of the system. TensorFlow programs are run *within* this virtual environment that can share resources with its host machine (access directories, use the GPU, connect to the Internet, etc.). The -[TensorFlow Docker images](https://hub.docker.com/r/tensorflow/tensorflow/){:.external} +[TensorFlow Docker images](https://hub.docker.com/r/tensorflow/tensorflow/) are tested for each release. -Docker is the easiest way to enable TensorFlow [GPU support](./gpu.md) on Linux since only the -[NVIDIA® GPU driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver){:.external} +Docker is the easiest way to enable TensorFlow [GPU support](./pip.md) on Linux since only the +[NVIDIA® GPU driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver) is required on the *host* machine (the *NVIDIA® CUDA® Toolkit* does not need to be installed). ## TensorFlow Docker requirements -1. [Install Docker](https://docs.docker.com/install/){:.external} on +1. [Install Docker](https://docs.docker.com/install/) on your local *host* machine. -2. For GPU support on Linux, [install NVIDIA Docker support](https://github.com/NVIDIA/nvidia-docker){:.external}. +2. For GPU support on Linux, [install NVIDIA Docker support](https://github.com/NVIDIA/nvidia-container-toolkit). * Take note of your Docker version with `docker -v`. Versions __earlier than__ 19.03 require nvidia-docker2 and the `--runtime=nvidia` flag. On versions __including and after__ 19.03, you will use the `nvidia-container-toolkit` package and the `--gpus all` flag. Both options are documented on the page linked above. Note: To run the `docker` command without `sudo`, create the `docker` group and add your user. For details, see the -[post-installation steps for Linux](https://docs.docker.com/install/linux/linux-postinstall/){:.external}. +[post-installation steps for Linux](https://docs.docker.com/install/linux/linux-postinstall/). ## Download a TensorFlow Docker image The official TensorFlow Docker images are located in the -[tensorflow/tensorflow](https://hub.docker.com/r/tensorflow/tensorflow/){:.external} -Docker Hub repository. Image releases [are tagged](https://hub.docker.com/r/tensorflow/tensorflow/tags/){:.external} +[tensorflow/tensorflow](https://hub.docker.com/r/tensorflow/tensorflow/) +Docker Hub repository. Image releases [are tagged](https://hub.docker.com/r/tensorflow/tensorflow/tags/) using the following format: | Tag | Description | |-------------|----------------------------------------------------------------------------------------------------------------------| | `latest` | The latest release of TensorFlow CPU binary image. Default. | | `nightly` | Nightly builds of the TensorFlow image. (Unstable.) | -| *`version`* | Specify the *version* of the TensorFlow binary image, for example\: *2.1.0* | -| `devel` | Nightly builds of a TensorFlow `master` development environment. Includes TensorFlow source code. | -| `custom-op` | Special experimental image for developing TF custom ops. More info [here](https://github.com/tensorflow/custom-op). | +| *`version`* | Specify the *version* of the TensorFlow binary image, for example\: *2.8.3* | Each base *tag* has variants that add or change functionality: @@ -66,7 +64,7 @@ To start a TensorFlow-configured container, use the following command form: docker run [-it] [--rm] [-p hostPort:containerPort] tensorflow/tensorflow[:tag] [command]
    -For details, see the [docker run reference](https://docs.docker.com/engine/reference/run/){:.external}. +For details, see the [docker run reference](https://docs.docker.com/engine/reference/run/). ### Examples using CPU-only images @@ -100,7 +98,7 @@ docker run -it --rm -v $PWD:/tmp -w /tmp tensorflow/tensorflow python ./script.p Permission issues can arise when files created within a container are exposed to the host. It's usually best to edit files on the host system. -Start a [Jupyter Notebook](https://jupyter.org/){:.external} server using +Start a [Jupyter Notebook](https://jupyter.org/) server using TensorFlow's nightly build:
    @@ -114,13 +112,13 @@ Follow the instructions and open the URL in your host web browser:
     ## GPU support
     
     Docker is the easiest way to run TensorFlow on a GPU since the *host* machine
    -only requires the [NVIDIA® driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver){:.external}
    +only requires the [NVIDIA® driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver)
     (the *NVIDIA® CUDA® Toolkit* is not required).
     
    -Install the [Nvidia Container Toolkit](https://github.com/NVIDIA/nvidia-docker/blob/master/README.md#quickstart){:.external} 
    +Install the [Nvidia Container Toolkit](https://github.com/NVIDIA/nvidia-docker/blob/master/README.md#quickstart) 
     to add NVIDIA® GPU support to Docker. `nvidia-container-runtime` is only
     available for Linux. See the `nvidia-container-runtime` 
    -[platform support FAQ](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#platform-support){:.external}
    +[platform support FAQ](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#platform-support)
     for details.
     
     Check if a GPU is available:
    @@ -132,7 +130,7 @@ lspci | grep -i nvidia
     Verify your `nvidia-docker` installation:
     
     
    -docker run --gpus all --rm nvidia/cuda nvidia-smi
    +docker run --rm --runtime=nvidia --gpus all ubuntu nvidia-smi
     
    Note: `nvidia-docker` v2 uses `--runtime=nvidia` instead of `--gpus all`. `nvidia-docker` v1 uses the `nvidia-docker` alias, diff --git a/site/en/install/errors.md b/site/en/install/errors.md index 158b165f492..938ba8b454f 100644 --- a/site/en/install/errors.md +++ b/site/en/install/errors.md @@ -1,8 +1,8 @@ # Build and install error messages -TensorFlow uses [GitHub issues](https://github.com/tensorflow/tensorflow/issues){:.external}, -[Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow){:.external} and -[TensorFlow Forum](https://discuss.tensorflow.org/c/general-discussion/6){:.external} +TensorFlow uses [GitHub issues](https://github.com/tensorflow/tensorflow/issues), +[Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow) and +[TensorFlow Forum](https://discuss.tensorflow.org/c/general-discussion/6) to track, document, and discuss build and installation problems. The following list links error messages to a solution or discussion. If you find @@ -61,7 +61,7 @@ CodedInputStream::SetTotalBytesLimit() in google/protobuf/io/coded_stream.h.
    33623453
    IOError: [Errno 2] No such file or directory:
    -  '/tmp/pip-o6Tpui-build/setup.py'
    + '/tmp/pip-o6Tpui-build/setup.py'
    42006320
    33623453
    IOError: [Errno 2] No such file or directory:
    -  '/tmp/pip-o6Tpui-build/setup.py'
    + '/tmp/pip-o6Tpui-build/setup.py'
    35190574
    33623453
    IOError: [Errno 2] No such file or directory:
    -  '/tmp/pip-o6Tpui-build/setup.py'
    + '/tmp/pip-o6Tpui-build/setup.py'
    35190574
    \n", " \n", " \n", " \n", " \n", "
    \n", - " View on TensorFlow.org\n", + " View on TensorFlow.org\n", " \n", - " Run in Google Colab\n", + " Run in Google Colab\n", " \n", - " View source on GitHub\n", + " View source on GitHub\n", " \n", - " Download notebook\n", + " Download notebook\n", "
    " ] @@ -81,15 +81,15 @@ "id": "Vk--31hqIwSV" }, "source": [ - "## Nightly Libtensorflow C packages\n", + "## Nightly libtensorflow C packages\n", "\n", - "Libtensorflow packages are built nightly and uploaded to GCS for all supported\n", + "libtensorflow packages are built nightly and uploaded to GCS for all supported\n", "platforms. They are uploaded to the\n", "[libtensorflow-nightly GCS bucket](https://storage.googleapis.com/libtensorflow-nightly)\n", "and are indexed by operating system and date built. For MacOS and Linux shared\n", - "objects, we have a\n", + "objects, there is a\n", "[script](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/ci_build/builds/libtensorflow_nightly_symlink.sh)\n", - "that renames the .so files versioned to the current date copied into the\n", + "that renames the `.so` files versioned to the current date copied into the\n", "directory with the artifacts." ] }, @@ -99,8 +99,6 @@ "id": "qowtdsijFMYZ" }, "source": [ - "\n", - "\n", "## Supported Platforms\n", "\n", "TensorFlow for C is supported on the following systems:\n", @@ -125,35 +123,52 @@ "id": "y50y01XUFVb2" }, "source": [ - "\n", - "### Download & extract\n", + "### Download and extract\n", "\n", "\n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - "
    TensorFlow C libraryURL
    Linux
    Linux\n", + " \n", + "
    Linux CPU onlyhttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-linux-x86_64-2.7.0.tar.gzhttps://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-cpu-linux-x86_64.tar.gz
    Linux GPU supporthttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-linux-x86_64-2.7.0.tar.gzhttps://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-gpu-linux-x86_64.tar.gz
    macOS
    macOS\n", + " \n", + "
    macOS CPU onlyhttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-darwin-x86_64-2.7.0.tar.gzhttps://storage.googleapis.com/tensorflow/versions/2.16.2/libtensorflow-cpu-darwin-x86_64.tar.gz
    Windows
    macOS ARM64 CPU onlyhttps://storage.googleapis.com/tensorflow/versions/2.18.0/libtensorflow-cpu-darwin-arm64.tar.gz
    Windows\n", + " \n", + "
    Windows CPU onlyhttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-cpu-windows-x86_64-2.7.0.ziphttps://storage.googleapis.com/tensorflow/versions/2.18.1/libtensorflow-cpu-windows-x86_64.zip
    Windows GPU onlyhttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-windows-x86_64-2.7.0.ziphttps://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-gpu-windows-x86_64-2.10.0.zip
    " + "

    " ] }, { @@ -177,8 +192,8 @@ "outputs": [], "source": [ "%%bash\n", - "FILENAME=libtensorflow-cpu-linux-x86_64-2.7.0.tar.gz\n", - "wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/libtensorflow/${FILENAME}\n", + "FILENAME=libtensorflow-cpu-linux-x86_64.tar.gz\n", + "wget -q --no-check-certificate https://storage.googleapis.com/tensorflow/versions/2.18.1/${FILENAME}\n", "sudo tar -C /usr/local -xzf ${FILENAME}" ] }, @@ -188,7 +203,6 @@ "id": "fcBJDdojJDyk" }, "source": [ - "\n", "### Linker\n", "\n", "On Linux/macOS, if you extract the TensorFlow C library to a system directory,\n", @@ -266,12 +280,6 @@ "cell_type": "code", "execution_count": null, "metadata": { - "attributes": { - "classes": [ - "c" - ], - "id": "" - }, "id": "b5851f1b" }, "outputs": [], @@ -351,22 +359,17 @@ "id": "ea5fd208" }, "source": [ - "\n", - "\n", - "\n", "## Build from source\n", "\n", "TensorFlow is open source. Read\n", - "[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/lib_package/README.md){:.external}\n", + "[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/lib_package/README.md)\n", "to build TensorFlow's C library from source code." ] } ], "metadata": { "colab": { - "collapsed_sections": [], - "name": "Copy of lang_c.ipynb", - "private_outputs": true, + "name": "lang_c.ipynb", "provenance": [], "toc_visible": true }, diff --git a/site/en/install/lang_java_legacy.md b/site/en/install/lang_java_legacy.md index af177dc0950..37341c36659 100644 --- a/site/en/install/lang_java_legacy.md +++ b/site/en/install/lang_java_legacy.md @@ -1,7 +1,7 @@ # Install TensorFlow for Java Warning: TensorFlow for Java is deprecated and will be removed in a future -version of TensorFlow once the replacement is stable. +version of TensorFlow once [the replacement](https://www.tensorflow.org/jvm) is stable. TensorFlow provides a [Java API](https://www.tensorflow.org/api_docs/java/reference/org/tensorflow/package-summary)— @@ -27,7 +27,7 @@ To use TensorFlow on Android see [TensorFlow Lite](https://tensorflow.org/lite) ## TensorFlow with Apache Maven -To use TensorFlow with [Apache Maven](https://maven.apache.org){:.external}, +To use TensorFlow with [Apache Maven](https://maven.apache.org), add the dependency to the project's `pom.xml` file: ```xml @@ -40,7 +40,7 @@ add the dependency to the project's `pom.xml` file: ### GPU support -If your system has [GPU support](./gpu.md), add the following TensorFlow +If your system has [GPU support](./pip.md), add the following TensorFlow dependencies to the project's `pom.xml` file: ```xml @@ -167,11 +167,11 @@ system and processor support: Note: On Windows, the native library (`tensorflow_jni.dll`) requires `msvcp140.dll` at runtime. See the [Windows build from source](./source_windows.md) guide to install the -[Visual C++ 2019 Redistributable](https://visualstudio.microsoft.com/vs/){:.external}. +[Visual C++ 2019 Redistributable](https://visualstudio.microsoft.com/vs/). ### Compile -Using the `HelloTensorFlow.java` file from the [previous example](#example), +Using the `HelloTensorFlow.java` file from the [previous example](#example-program), compile a program that uses TensorFlow. Make sure the `libtensorflow.jar` is accessible to your `classpath`: @@ -203,5 +203,5 @@ Success: TensorFlow for Java is configured. ## Build from source TensorFlow is open source. Read -[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/java/README.md){:.external} +[the instructions](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/java/README.md) to build TensorFlow's Java and native libraries from source code. diff --git a/site/en/install/pip.html b/site/en/install/pip.html deleted file mode 100644 index 9a7265725a9..00000000000 --- a/site/en/install/pip.html +++ /dev/null @@ -1,409 +0,0 @@ - - - Install TensorFlow with pip - - - - - - -

    TensorFlow 2 packages are available

    -
      -
    • tensorflow —Latest stable release with CPU and GPU support (Ubuntu and Windows)
    • -
    • tf-nightly —Preview build (unstable). Ubuntu and Windows include GPU support.
    • -
    - - -

    Older versions of TensorFlow

    - -

    For TensorFlow 1.x, CPU and GPU packages are separate:

    - -
      -
    • tensorflow==1.15 —Release for CPU-only
    • -
    • tensorflow-gpu==1.15 —Release with GPU support (Ubuntu and Windows)
    • -
    - - -

    System requirements

    -
      -
    • Python 3.7–3.10 -
        -
      • Python 3.10 support requires TensorFlow 2.8 or later.
      • -
      • Python 3.9 support requires TensorFlow 2.5 or later.
      • -
      • Python 3.8 support requires TensorFlow 2.2 or later.
      • -
      -
    • -
    • pip 19.0 or later (requires manylinux2010 support)
    • -
    • Ubuntu 16.04 or later (64-bit)
    • -
    • macOS 10.12.6 (Sierra) or later (64-bit) (no GPU support) -
        -
      • macOS requires pip 20.3 or later
      • -
      -
    • -
    • Windows 7 or later (64-bit) - -
    • -
    • GPU support requires a CUDA®-enabled card (Ubuntu and Windows)
    • -
    - - - -

    Hardware requirements

    -
      -
    • Starting with TensorFlow 1.6, binaries use AVX instructions which may not run on older CPUs.
    • -
    • Read the GPU support guide to set up a CUDA®-enabled GPU card on Ubuntu or Windows.
    • -
    - - -

    1. Install the Python development environment on your system

    - -

    - Check if your Python environment is already configured: -

    - - - -
    -python3 --version
    -pip3 --version
    -
    - -

    - If these packages are already installed, skip to the next step.
    - Otherwise, install Python, the - pip package manager, - and venv: -

    - -
    -
    -

    Ubuntu

    -
    -sudo apt update
    -sudo apt install python3-dev python3-pip python3-venv
    -
    -
    - -
    -

    macOS

    - -

    Install using the Homebrew package manager:

    -
    -/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
    -export PATH="/usr/local/opt/python/libexec/bin:$PATH"
    -# if you are on macOS 10.12 (Sierra) use `export PATH="/usr/local/bin:/usr/local/sbin:$PATH"`
    -brew update
    -brew install python  # Python 3
    -
    -
    - -
    -

    Windows

    -

    - Install the Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017, - and 2019. Starting with the TensorFlow 2.1.0 version, the msvcp140_1.dll - file is required from this package (which may not be provided from older redistributable packages). - The redistributable comes with Visual Studio 2019 but can be installed separately: -

    -
      -
    1. Go to the Microsoft Visual C++ downloads,
    2. -
    3. Scroll down the page to the Visual Studio 2015, 2017 and 2019 section.
    4. -
    5. Download and install the Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019 for your platform.
    6. -
    -

    Make sure long paths are enabled on Windows.

    -

    Install the 64-bit Python 3 release for Windows (select pip as an optional feature).

    -
    - -
    -

    Other

    -
    -curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
    -python get-pip.py
    -
    -
    -
    - - - - -

    2. Create a virtual environment (recommended)

    - -

    - Python virtual environments are used to isolate package installation from the system. -

    - -
    -
    -

    Ubuntu

    -

    - Create a new virtual environment by choosing a Python interpreter and making a - ./venv directory to hold it: -

    -
    python3 -m venv --system-site-packages ./venv
    -

    - Activate the virtual environment using a shell-specific command: -

    -
    source ./venv/bin/activate  # sh, bash, or zsh
    -
    . ./venv/bin/activate.fish  # fish
    -
    source ./venv/bin/activate.csh  # csh or tcsh
    - -

    - When the virtual environment is active, your shell prompt is prefixed with (venv). -

    -

    - Install packages within a virtual environment without affecting the host system - setup. Start by upgrading pip: -

    -
    -pip install --upgrade pip
    -
    -pip list  # show packages installed within the virtual environment
    -
    -

    - And to exit the virtual environment later: -

    -
    deactivate  # don't exit until you're done using TensorFlow
    -
    - -
    -

    macOS

    - -

    - Create a new virtual environment by choosing a Python interpreter and making a - ./venv directory to hold it: -

    -
    python3 -m venv --system-site-packages ./venv
    -

    - Activate the virtual environment using a shell-specific command: -

    -
    source ./venv/bin/activate  # sh, bash, or zsh
    -
    . ./venv/bin/activate.fish  # fish
    -
    source ./venv/bin/activate.csh  # csh or tcsh
    - -

    - When the virtual environment is active, your shell prompt is prefixed with (venv). -

    -

    - Install packages within a virtual environment without affecting the host system - setup. Start by upgrading pip: -

    -
    -pip install --upgrade pip
    -
    -pip list  # show packages installed within the virtual environment
    -
    -

    - And to exit the virtual environment later: -

    -
    deactivate  # don't exit until you're done using TensorFlow
    -
    - - -
    -

    Windows

    -

    - Create a new virtual environment by choosing a Python interpreter and making a - .\venv directory to hold it: -

    -
    python -m venv --system-site-packages .\venv
    -

    - Activate the virtual environment: -

    -
    .\venv\Scripts\activate
    -

    - Install packages within a virtual environment without affecting the host system - setup. Start by upgrading pip: -

    -
    -pip install --upgrade pip
    -
    -pip list  # show packages installed within the virtual environment
    -
    -

    - And to exit the virtual environment later: -

    -
    deactivate  # don't exit until you're done using TensorFlow
    -
    - - -
    -

    Conda

    -

    -While the TensorFlow provided pip package is recommended, a -community-supported Anaconda package -is available. To install, read the Anaconda TensorFlow guide. -

    -
    -
    - - -

    3. Install the TensorFlow pip package

    - -

    - Choose one of the following TensorFlow packages to install from PyPI: -

    - -
      -
    • tensorflow —Latest stable release with CPU and GPU support (Ubuntu and Windows).
    • -
    • tf-nightly —Preview build (unstable). Ubuntu and Windows include GPU support.
    • -
    • tensorflow==1.15 —The final version of TensorFlow 1.x.
    • -
    - - - -
    -
    -

    Virtual environment install

    -
    pip install --upgrade tensorflow
    -

    Verify the install:

    -
    python -c "import tensorflow as tf;print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
    -
    - -
    -

    System install

    -
    pip3 install --user --upgrade tensorflow  # install in $HOME
    -

    Verify the install:

    -
    python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))"
    -
    -
    - - - -

    Package location

    - -

    - A few installation mechanisms require the URL of the TensorFlow Python package. - The value you specify depends on your Python version. -

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    VersionURL
    Linux
    Python 3.7 GPU supporthttps://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.8.0-cp37-cp37m-manylinux2010_x86_64.whl
    Python 3.7 CPU-onlyhttps://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.8.0-cp37-cp37m-manylinux2010_x86_64.whl
    Python 3.8 GPU supporthttps://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.8.0-cp38-cp38-manylinux2010_x86_64.whl
    Python 3.8 CPU-onlyhttps://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.8.0-cp38-cp38-manylinux2010_x86_64.whl
    Python 3.9 GPU supporthttps://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.8.0-cp39-cp39-manylinux2010_x86_64.whl
    Python 3.9 CPU-onlyhttps://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.8.0-cp39-cp39-manylinux2010_x86_64.whl
    Python 3.10 GPU supporthttps://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-2.8.0-cp310-cp310-manylinux2010_x86_64.whl
    Python 3.10 CPU-onlyhttps://storage.googleapis.com/tensorflow/linux/cpu/tensorflow_cpu-2.8.0-cp310-cp310-manylinux2010_x86_64.whl
    macOS (CPU-only)
    Python 3.7https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.8.0-cp37-cp37m-macosx_10_11_x86_64.whl
    Python 3.8https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.8.0-cp38-cp38-macosx_10_11_x86_64.whl
    Python 3.9https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.8.0-cp39-cp39-macosx_10_11_x86_64.whl
    Python 3.10https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-2.8.0-cp310-cp310-macosx_10_11_x86_64.whl
    Windows
    Python 3.7 GPU supporthttps://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.8.0-cp37-cp37m-win_amd64.whl
    Python 3.7 CPU-onlyhttps://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.8.0-cp37-cp37m-win_amd64.whl
    Python 3.8 GPU supporthttps://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.8.0-cp38-cp38-win_amd64.whl
    Python 3.8 CPU-onlyhttps://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.8.0-cp38-cp38-win_amd64.whl
    Python 3.9 GPU supporthttps://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.8.0-cp39-cp39-win_amd64.whl
    Python 3.9 CPU-onlyhttps://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.8.0-cp39-cp39-win_amd64.whl
    Python 3.10 GPU supporthttps://storage.googleapis.com/tensorflow/windows/gpu/tensorflow_gpu-2.8.0-cp310-cp310-win_amd64.whl
    Python 3.10 CPU-onlyhttps://storage.googleapis.com/tensorflow/windows/cpu/tensorflow_cpu-2.8.0-cp310-cp310-win_amd64.whl
    - - - diff --git a/site/en/install/pip.md b/site/en/install/pip.md new file mode 100644 index 00000000000..a9e4bf4bf74 --- /dev/null +++ b/site/en/install/pip.md @@ -0,0 +1,658 @@ + +# Install TensorFlow with pip + + +This guide is for the latest stable version of TensorFlow. For the +preview build *(nightly)*, use the pip package named +`tf-nightly`. Refer to [these tables](./source#tested_build_configurations) for +older TensorFlow version requirements. For the CPU-only build, use the pip +package named `tensorflow-cpu`. + +Here are the quick versions of the install commands. Scroll down for the +step-by-step instructions. + +* {Linux} + + Note: Starting with TensorFlow `2.10`, Linux CPU-builds for Aarch64/ARM64 + processors are built, maintained, tested and released by a third party: + [AWS](https://aws.amazon.com/). + Installing the [`tensorflow`](https://pypi.org/project/tensorflow/) + package on an ARM machine installs AWS's + [`tensorflow-cpu-aws`](https://pypi.org/project/tensorflow-cpu-aws/) package. + They are provided as-is. Tensorflow will use reasonable efforts to maintain + the availability and integrity of this pip package. There may be delays if + the third party fails to release the pip package. See + [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html) + for more information about this collaboration. + + ```bash + python3 -m pip install 'tensorflow[and-cuda]' + # Verify the installation: + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + +* {MacOS} + + ```bash + # There is currently no official GPU support for MacOS. + python3 -m pip install tensorflow + # Verify the installation: + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + +* {Windows Native} + + Caution: TensorFlow `2.10` was the **last** TensorFlow release that + supported GPU on native-Windows. + Starting with TensorFlow `2.11`, you will need to install + [TensorFlow in WSL2](https://tensorflow.org/install/pip#windows-wsl2), + or install `tensorflow` or `tensorflow-cpu` and, optionally, try the + [TensorFlow-DirectML-Plugin](https://github.com/microsoft/tensorflow-directml-plugin#tensorflow-directml-plugin-) + + ```bash + conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 + # Anything above 2.10 is not supported on the GPU on Windows Native + python -m pip install "tensorflow<2.11" + # Verify the installation: + python -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + +* {Windows WSL2} + + Note: TensorFlow with GPU access is supported for WSL2 on Windows 10 19044 or + higher. This corresponds to Windows 10 version 21H2, the November 2021 + update. You can get the latest update from here: + [Download Windows 10](https://www.microsoft.com/software-download/windows10). + For instructions, see + [Install WSL2](https://docs.microsoft.com/windows/wsl/install) + and + [NVIDIA’s setup docs](https://docs.nvidia.com/cuda/wsl-user-guide/index.html) + for CUDA in WSL. + + ```bash + python3 -m pip install tensorflow[and-cuda] + # Verify the installation: + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + +* {CPU} + + Note: Starting with TensorFlow `2.10`, Windows CPU-builds for x86/x64 + processors are built, maintained, tested and released by a third party: + [Intel](https://www.intel.com/). + Installing the Windows-native [`tensorflow`](https://pypi.org/project/tensorflow/) + or [`tensorflow-cpu`](https://pypi.org/project/tensorflow-cpu/) + package installs Intel's + [`tensorflow-intel`](https://pypi.org/project/tensorflow-intel/) + package. These packages are provided as-is. Tensorflow will use reasonable + efforts to maintain the availability and integrity of this pip package. + There may be delays if the third party fails to release the pip package. See + [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html) + for more information about this + collaboration. + + ```bash + python3 -m pip install tensorflow + # Verify the installation: + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + +* {Nightly} + + ```bash + python3 -m pip install tf-nightly + # Verify the installation: + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + +## Hardware requirements + +Note: TensorFlow binaries use +[AVX instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions#CPUs_with_AVX) +which may not run on older CPUs. + +The following GPU-enabled devices are supported: + +* NVIDIA® GPU card with CUDA® architectures 3.5, 5.0, 6.0, 7.0, 7.5, 8.0 and + higher. See the list of + [CUDA®-enabled GPU cards](https://developer.nvidia.com/cuda-gpus). +* For GPUs with unsupported CUDA® architectures, or to avoid JIT compilation + from PTX, or to use different versions of the NVIDIA® libraries, see the + [Linux build from source](./source.md) guide. +* Packages do not contain PTX code except for the latest supported CUDA® + architecture; therefore, TensorFlow fails to load on older GPUs when + `CUDA_FORCE_PTX_JIT=1` is set. (See + [Application Compatibility](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#application-compatibility) + for details.) + +Note: The error message "Status: device kernel image is invalid" indicates that +the TensorFlow package does not contain PTX for your architecture. You can +enable compute capabilities by [building TensorFlow from source](./source.md). + +## System requirements + +* Ubuntu 16.04 or higher (64-bit) +* macOS 12.0 (Monterey) or higher (64-bit) *(no GPU support)* +* Windows Native - Windows 7 or higher (64-bit) *(no GPU support after TF 2.10)* +* Windows WSL2 - Windows 10 19044 or higher (64-bit) + +Note: GPU support is available for Ubuntu and Windows with CUDA®-enabled cards. + +## Software requirements + +* Python 3.9–3.12 +* pip version 19.0 or higher for Linux (requires `manylinux2014` support) and + Windows. pip version 20.3 or higher for macOS. +* Windows Native Requires + [Microsoft Visual C++ Redistributable for Visual Studio 2015, 2017 and 2019](https://learn.microsoft.com/en-us/cpp/windows/latest-supported-vc-redist) + + +The following NVIDIA® software are only required for GPU support. + +* [NVIDIA® GPU drivers](https://www.nvidia.com/drivers) + * >= 525.60.13 for Linux + * >= 528.33 for WSL on Windows +* [CUDA® Toolkit 12.3](https://developer.nvidia.com/cuda-toolkit-archive). +* [cuDNN SDK 8.9.7](https://developer.nvidia.com/cudnn). +* *(Optional)* + [TensorRT](https://docs.nvidia.com/deeplearning/tensorrt/archives/index.html#trt_7) + to improve latency and throughput for inference. + +## Step-by-step instructions + +* {Linux} + + ### 1. System requirements + + * Ubuntu 16.04 or higher (64-bit) + + TensorFlow only officially supports Ubuntu. However, the following + instructions may also work for other Linux distros. + + Note: Starting with TensorFlow `2.10`, Linux CPU-builds for Aarch64/ARM64 + processors are built, maintained, tested and released by a third party: + [AWS](https://aws.amazon.com/). + Installing the [`tensorflow`](https://pypi.org/project/tensorflow/) + package on an ARM machine installs AWS's + [`tensorflow-cpu-aws`](https://pypi.org/project/tensorflow-cpu-aws/) package. + They are provided as-is. Tensorflow will use reasonable efforts to maintain + the availability and integrity of this pip package. There may be delays if + the third party fails to release the pip package. See + [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html) + for more information about this collaboration. + + ### 2. GPU setup + + You can skip this section if you only run TensorFlow on the CPU. + + Install the + [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx) + if you have not. You can use the following command to verify it is + installed. + + ```bash + nvidia-smi + ``` + + ### 3. Create a virtual environment with [venv](https://docs.python.org/3/library/venv.html){:.external} + + The venv module is part of Python’s standard library and is the officially recommended way to create virtual environments. + + Navigate to your desired virtual environments directory and create a new venv environment named `tf` with the following command. + + ```bash + python3 -m venv tf + ``` + + You can activate it with the following command. + + ```bash + source tf/bin/activate + ``` + + Make sure that the virtual environment is activated for the rest of the installation. + + ### 4. Install TensorFlow + + TensorFlow requires a recent version of pip, so upgrade your pip + installation to be sure you're running the latest version. + + ```bash + pip install --upgrade pip + ``` + + Then, install TensorFlow with pip. + + ```bash + # For GPU users + pip install tensorflow[and-cuda] + # For CPU users + pip install tensorflow + ``` + + **Note:** Do not install TensorFlow with `conda`. It may not have the latest stable version. `pip` is recommended since TensorFlow is only officially released to PyPI. + + ### 6. Verify the installation + + Verify the CPU setup: + + ```bash + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + + If a tensor is returned, you've installed TensorFlow successfully. + + Verify the GPU setup: + + ```bash + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + + If a list of GPU devices is returned, you've installed TensorFlow + successfully. **If not continue to the next step**. + + ### 6. [GPU only] Virtual environment configuration + + If the GPU test in the last section was unsuccessful, the most likely cause is that components aren't being detected, + and/or conflict with the existing system CUDA installation. So you need to add some symbolic links to fix this. + + * Create symbolic links to NVIDIA shared libraries: + + ```bash + pushd $(dirname $(python -c 'print(__import__("tensorflow").__file__)')) + ln -svf ../nvidia/*/lib/*.so* . + popd + ``` + + * Create a symbolic link to ptxas: + + ```bash + ln -sf $(find $(dirname $(dirname $(python -c "import nvidia.cuda_nvcc; + print(nvidia.cuda_nvcc.__file__)"))/*/bin/) -name ptxas -print -quit) $VIRTUAL_ENV/bin/ptxas + ``` + + Verify the GPU setup: + + ```bash + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + + + + +* {MacOS} + + ### 1. System requirements + + * macOS 10.12.6 (Sierra) or higher (64-bit) + + Note: While TensorFlow supports Apple Silicon (M1), packages that include + custom C++ extensions for TensorFlow also need to be compiled for Apple M1. + Some packages, like + [tensorflow_decision_forests](https://www.tensorflow.org/decision_forests) + publish M1-compatible versions, but many packages don't. To use those + libraries, you will have to use TensorFlow with x86 emulation and Rosetta. + + Currently there is no official GPU support for running TensorFlow on + MacOS. The following instructions are for running on CPU. + + ### 2. Check Python version + + Check if your Python environment is already configured: + + Note: Requires Python 3.9–3.11, and pip >= 20.3 for MacOS. + + ```bash + python3 --version + python3 -m pip --version + ``` + + ### 3. Install TensorFlow + + TensorFlow requires a recent version of pip, so upgrade your pip + installation to be sure you're running the latest version. + + ```bash + pip install --upgrade pip + ``` + + Then, install TensorFlow with pip. + + ```bash + pip install tensorflow + ``` + + ### 4. Verify the installation + + ```bash + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + + If a tensor is returned, you've installed TensorFlow successfully. + +* {Windows Native} + + Caution: TensorFlow `2.10` was the **last** TensorFlow release that + supported GPU on native-Windows. + Starting with TensorFlow `2.11`, you will need to install + [TensorFlow in WSL2](https://tensorflow.org/install/pip#windows-[wsl2]), + or install `tensorflow-cpu` and, optionally, try the + [TensorFlow-DirectML-Plugin](https://github.com/microsoft/tensorflow-directml-plugin#tensorflow-directml-plugin-) + + ## 1. System requirements + + * Windows 7 or higher (64-bit) + + Note: Starting with TensorFlow `2.10`, Windows CPU-builds for x86/x64 + processors are built, maintained, tested and released by a third party: + [Intel](https://www.intel.com/). + Installing the windows-native [`tensorflow`](https://pypi.org/project/tensorflow/) + or [`tensorflow-cpu`](https://pypi.org/project/tensorflow-cpu/) + package installs Intel's + [`tensorflow-intel`](https://pypi.org/project/tensorflow-intel/) + package. These packages are provided as-is. Tensorflow will use reasonable + efforts to maintain the availability and integrity of this pip package. + There may be delays if the third party fails to release the pip package. See + [this blog post](https://blog.tensorflow.org/2022/09/announcing-tensorflow-official-build-collaborators.html) + for more information about this + collaboration. + + ### 2. Install Microsoft Visual C++ Redistributable + + Install the *Microsoft Visual C++ Redistributable for Visual Studio 2015, + 2017, and 2019*. Starting with the TensorFlow 2.1.0 version, the + `msvcp140_1.dll` file is required from this package (which may not be + provided from older redistributable packages). The redistributable comes + with *Visual Studio 2019* but can be installed separately: + + 1. Go to the + [Microsoft Visual C++ downloads](https://support.microsoft.com/help/2977003/the-latest-supported-visual-c-downloads). + 2. Scroll down the page to the *Visual Studio 2015, 2017 and 2019* section. + 3. Download and install the *Microsoft Visual C++ Redistributable for + Visual Studio 2015, 2017 and 2019* for your platform. + + Make sure + [long paths are enabled](https://superuser.com/questions/1119883/windows-10-enable-ntfs-long-paths-policy-option-missing) + on Windows. + + ### 3. Install Miniconda + + [Miniconda](https://docs.conda.io/en/latest/miniconda.html) + is the recommended approach for installing TensorFlow with GPU support. + It creates a separate environment to avoid changing any installed + software in your system. This is also the easiest way to install the + required software especially for the GPU setup. + + Download the + [Miniconda Windows Installer](https://repo.anaconda.com/miniconda/Miniconda3-latest-Windows-x86_64.exe). + Double-click the downloaded file and follow the instructions on the screen. + + ### 4. Create a conda environment + + Create a new conda environment named `tf` with the following command. + + ```bash + conda create --name tf python=3.9 + ``` + + You can deactivate and activate it with the following commands. + + ```bash + conda deactivate + conda activate tf + ``` + + Make sure it is activated for the rest of the installation. + + ### 5. GPU setup + + You can skip this section if you only run TensorFlow on CPU. + + First install + [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx) + if you have not. + + Then install the CUDA, cuDNN with conda. + + ```bash + conda install -c conda-forge cudatoolkit=11.2 cudnn=8.1.0 + ``` + + ### 6. Install TensorFlow + + TensorFlow requires a recent version of pip, so upgrade your pip + installation to be sure you're running the latest version. + + ```bash + pip install --upgrade pip + ``` + + Then, install TensorFlow with pip. + + Note: Do not install TensorFlow with conda. It may not have the latest stable + version. pip is recommended since TensorFlow is only officially released to + PyPI. + + ```bash + # Anything above 2.10 is not supported on the GPU on Windows Native + pip install "tensorflow<2.11" + ``` + + ### 7. Verify the installation + + Verify the CPU setup: + + ```bash + python -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + + If a tensor is returned, you've installed TensorFlow successfully. + + Verify the GPU setup: + + ```bash + python -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + + If a list of GPU devices is returned, you've installed TensorFlow + successfully. + +* {Windows WSL2} + + ### 1. System requirements + + * Windows 10 19044 or higher (64-bit). This corresponds to Windows 10 + version 21H2, the November 2021 update. + + See the following documents to: + + * [Download the latest Windows 10 update](https://www.microsoft.com/software-download/windows10). + * [Install WSL2](https://docs.microsoft.com/windows/wsl/install) + * [Setup NVIDIA® GPU support in WSL2](https://docs.nvidia.com/cuda/wsl-user-guide/index.html) + + ### 2. GPU setup + + You can skip this section if you only run TensorFlow on the CPU. + + Install the + [NVIDIA GPU driver](https://www.nvidia.com/Download/index.aspx) + if you have not. You can use the following command to verify it is + installed. + + ```bash + nvidia-smi + ``` + + ### 3. Install TensorFlow + + TensorFlow requires a recent version of pip, so upgrade your pip + installation to be sure you're running the latest version. + + ```bash + pip install --upgrade pip + ``` + + Then, install TensorFlow with pip. + + ```bash + # For GPU users + pip install tensorflow[and-cuda] + # For CPU users + pip install tensorflow + ``` + + ### 4. Verify the installation + + Verify the CPU setup: + + ```bash + python3 -c "import tensorflow as tf; print(tf.reduce_sum(tf.random.normal([1000, 1000])))" + ``` + + If a tensor is returned, you've installed TensorFlow successfully. + + Verify the GPU setup: + + ```bash + python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" + ``` + + If a list of GPU devices is returned, you've installed TensorFlow + successfully. + + +## Package location + +A few installation mechanisms require the URL of the TensorFlow Python package. +The value you specify depends on your Python version. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    VersionURL
    Linux x86
    Python 3.9 GPU supporthttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.9 CPU-onlyhttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.10 GPU supporthttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.10 CPU-onlyhttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.11 GPU supporthttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.11 CPU-onlyhttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.12 GPU supporthttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.12 CPU-onlyhttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.13 GPU supporthttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Python 3.13 CPU-onlyhttps://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
    Linux Arm64 (CPU-only)
    Python 3.9https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
    Python 3.10https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
    Python 3.11https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
    Python 3.12https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
    Python 3.13https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
    macOS x86 (CPU-only)
    Caution: TensorFlow 2.16 was the last TensorFlow release that supported macOS x86
    Python 3.9https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp39-cp39-macosx_10_15_x86_64.whl
    Python 3.10https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp310-cp310-macosx_10_15_x86_64.whl
    Python 3.11https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp311-cp311-macosx_10_15_x86_64.whl
    Python 3.12https://storage.googleapis.com/tensorflow/versions/2.16.2/tensorflow-2.16.2-cp312-cp312-macosx_10_15_x86_64.whl
    macOS Arm64 (CPU-only)
    Python 3.9https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp39-cp39-macosx_12_0_arm64.whl
    Python 3.10https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp310-cp310-macosx_12_0_arm64.whl
    Python 3.11https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp311-cp311-macosx_12_0_arm64.whl
    Python 3.12https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp312-cp312-macosx_12_0_arm64.whl
    Python 3.13https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow-2.20.0-cp313-cp313-macosx_12_0_arm64.whl
    Windows (CPU-only)
    Python 3.9https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp39-cp39-win_amd64.whl
    Python 3.10https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp310-cp310-win_amd64.whl
    Python 3.11https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp311-cp311-win_amd64.whl
    Python 3.12https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp312-cp312-win_amd64.whl
    Python 3.13https://storage.googleapis.com/tensorflow/versions/2.20.0/tensorflow_cpu-2.20.0-cp313-cp313-win_amd64.whl
    diff --git a/site/en/install/source.md b/site/en/install/source.md index 67db7c7ca96..dc847f017e9 100644 --- a/site/en/install/source.md +++ b/site/en/install/source.md @@ -4,8 +4,8 @@ Build a TensorFlow *pip* package from source and install it on Ubuntu Linux and macOS. While the instructions might work for other systems, it is only tested and supported for Ubuntu and macOS. -Note: Well-tested, pre-built -[TensorFlow packages](./pip.html) for Linux and macOS systems are already provided. +Note: Well-tested, pre-built [TensorFlow packages](./pip.md) for Linux and macOS +systems are already provided. ## Setup for Linux and macOS @@ -25,9 +25,6 @@ Install the following build tools to configure your development environment.

    Requires Xcode 9.2 or later.

    Install using the Homebrew package manager:

    -/usr/bin/ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)"
    -export PATH="/usr/local/opt/python/libexec/bin:$PATH"
    -# if you are on macOS 10.12 (Sierra) use `export PATH="/usr/local/bin:/usr/local/sbin:$PATH"`
     brew install python
     
    @@ -37,13 +34,12 @@ Install the TensorFlow *pip* package dependencies (if using a virtual environment, omit the `--user` argument):
    -pip install -U --user pip numpy wheel packaging
    -pip install -U --user keras_preprocessing --no-deps
    +pip install -U --user pip
     
    Note: A `pip` version >19.0 is required to install the TensorFlow 2 `.whl` package. Additional required dependencies are listed in the -setup.py +setup.py.tpl file under `REQUIRED_PACKAGES`. ### Install Bazel @@ -54,32 +50,83 @@ Bazel and automatically downloads the correct Bazel version for TensorFlow. For ease of use, add Bazelisk as the `bazel` executable in your `PATH`. If Bazelisk is not available, you can manually -[install Bazel](https://docs.bazel.build/versions/master/install.html). Make -sure to install a supported Bazel version: any version between -`_TF_MIN_BAZEL_VERSION` and `_TF_MAX_BAZEL_VERSION` as specified in -`tensorflow/configure.py`. +[install Bazel](https://bazel.build/install). Make +sure to install the correct Bazel version from TensorFlow's +[.bazelversion](https://github.com/tensorflow/tensorflow/blob/master/.bazelversion) +file. + +### Install Clang (recommended, Linux only) + +Clang is a C/C++/Objective-C compiler that is compiled in C++ based on LLVM. It +is the default compiler to build TensorFlow starting with TensorFlow 2.13. The +current supported version is LLVM/Clang 17. + +[LLVM Debian/Ubuntu nightly packages](https://apt.llvm.org) provide an automatic +installation script and packages for manual installation on Linux. Make sure you +run the following command if you manually add llvm apt repository to your +package sources: + +
    +sudo apt-get update && sudo apt-get install -y llvm-17 clang-17
    +
    + +Now that `/usr/lib/llvm-17/bin/clang` is the actual path to clang in this case. + +Alternatively, you can download and unpack the pre-built +[Clang + LLVM 17](https://github.com/llvm/llvm-project/releases/tag/llvmorg-17.0.2). + +Below is an example of steps you can take to set up the downloaded Clang + LLVM +17 binaries on Debian/Ubuntu operating systems: + +1. Change to the desired destination directory: `cd ` + +1. Load and extract an archive file...(suitable to your architecture): +
    +    wget https://github.com/llvm/llvm-project/releases/download/llvmorg-17.0.2/clang+llvm-17.0.2-x86_64-linux-gnu-ubuntu-22.04.tar.xz
    +    
    +    tar -xvf clang+llvm-17.0.2-x86_64-linux-gnu-ubuntu-22.04.tar.xz
    +    
    +    
    + +1. Copy the extracted contents (directories and files) to `/usr` (you may need + sudo permissions, and the correct directory may vary by distribution). This + effectively installs Clang and LLVM, and adds it to the path. You should not + have to replace anything, unless you have a previous installation, in which + case you should replace the files: +
    +    cp -r clang+llvm-17.0.2-x86_64-linux-gnu-ubuntu-22.04/* /usr
    +    
    + +1. Check the obtained Clang + LLVM 17 binaries version: +
    +    clang --version
    +    
    + +1. Now that `/usr/bin/clang` is the actual path to your new clang. You can run + the `./configure` script or manually set environment variables `CC` and + `BAZEL_COMPILER` to this path. ### Install GPU support (optional, Linux only) There is *no* GPU support for macOS. -Read the [GPU support](./gpu.md) guide to install the drivers and additional +Read the [GPU support](./pip.md) guide to install the drivers and additional software required to run TensorFlow on a GPU. Note: It is easier to set up one of TensorFlow's GPU-enabled [Docker images](#docker_linux_builds). ### Download the TensorFlow source code -Use [Git](https://git-scm.com/){:.external} to clone the -[TensorFlow repository](https://github.com/tensorflow/tensorflow){:.external}: +Use [Git](https://git-scm.com/) to clone the +[TensorFlow repository](https://github.com/tensorflow/tensorflow):
     git clone https://github.com/tensorflow/tensorflow.git
     cd tensorflow
     
    -The repo defaults to the `master` development branch. You can also checkout a -[release branch](https://github.com/tensorflow/tensorflow/releases){:.external} +The repo defaults to the `master` development branch. You can also check out a +[release branch](https://github.com/tensorflow/tensorflow/releases) to build:
    @@ -89,16 +136,21 @@ git checkout branch_name  # r2.2, r2.3, etc.
     
     ## Configure the build
     
    -Configure your system build by running the `./configure` at the root of your
    -TensorFlow source tree. This script prompts you for the location of TensorFlow
    -dependencies and asks for additional build configuration options (compiler
    -flags, for example).
    +TensorFlow builds are configured by the `.bazelrc` file in the repository's
    +root directory. The `./configure` or `./configure.py` scripts can be used to
    +adjust common settings.
    +
    +Please run the `./configure` script from the repository's root directory. This
    +script will prompt you for the location of TensorFlow dependencies and asks for
    +additional build configuration options (compiler flags, for example). Refer to
    +the _Sample session_ section for details.
     
     
     ./configure
     
    -If using a virtual environment, `python configure.py` prioritizes paths +There is also a python version of this script, `./configure.py`. If using a +virtual environment, `python configure.py` prioritizes paths within the environment, whereas `./configure` prioritizes paths outside the environment. In both cases you can change the default. @@ -111,65 +163,47 @@ session may differ):

    View sample configuration session

     ./configure
    -You have bazel 3.0.0 installed.
    -Please specify the location of python. [Default is /usr/bin/python3]: 
    +You have bazel 6.1.0 installed.
    +Please specify the location of python. [Default is /Library/Frameworks/Python.framework/Versions/3.9/bin/python3]: 
     
     
     Found possible Python library paths:
    -  /usr/lib/python3/dist-packages
    -  /usr/local/lib/python3.6/dist-packages
    -Please input the desired Python library path to use.  Default is [/usr/lib/python3/dist-packages]
    -
    -Do you wish to build TensorFlow with OpenCL SYCL support? [y/N]: 
    -No OpenCL SYCL support will be enabled for TensorFlow.
    +  /Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages
    +Please input the desired Python library path to use.  Default is [/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages]
     
    -Do you wish to build TensorFlow with ROCm support? [y/N]: 
    +Do you wish to build TensorFlow with ROCm support? [y/N]:
     No ROCm support will be enabled for TensorFlow.
     
    -Do you wish to build TensorFlow with CUDA support? [y/N]: Y
    -CUDA support will be enabled for TensorFlow.
    -
    -Do you wish to build TensorFlow with TensorRT support? [y/N]: 
    -No TensorRT support will be enabled for TensorFlow.
    -
    -Found CUDA 10.1 in:
    -    /usr/local/cuda-10.1/targets/x86_64-linux/lib
    -    /usr/local/cuda-10.1/targets/x86_64-linux/include
    -Found cuDNN 7 in:
    -    /usr/lib/x86_64-linux-gnu
    -    /usr/include
    -
    -
    -Please specify a list of comma-separated CUDA compute capabilities you want to build with.
    -You can find the compute capability of your device at: https://developer.nvidia.com/cuda-gpus. Each capability can be specified as "x.y" or "compute_xy" to include both virtual and binary GPU code, or as "sm_xy" to only include the binary code.
    -Please note that each additional compute capability significantly increases your build time and binary size, and that TensorFlow only supports compute capabilities >= 3.5 [Default is: 3.5,7.0]: 6.1
    -
    +Do you wish to build TensorFlow with CUDA support? [y/N]:
    +No CUDA support will be enabled for TensorFlow.
     
    -Do you want to use clang as CUDA compiler? [y/N]: 
    -nvcc will be used as CUDA compiler.
    +Do you want to use Clang to build TensorFlow? [Y/n]:
    +Clang will be used to compile TensorFlow.
     
    -Please specify which gcc should be used by nvcc as the host compiler. [Default is /usr/bin/gcc]: 
    +Please specify the path to clang executable. [Default is /usr/lib/llvm-16/bin/clang]:
     
    +You have Clang 16.0.4 installed.
     
    -Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is -march=native -Wno-sign-compare]: 
    +Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is -Wno-sign-compare]:
     
     
    -Would you like to interactively configure ./WORKSPACE for Android builds? [y/N]: 
    +Would you like to interactively configure ./WORKSPACE for Android builds? [y/N]: n
     Not configuring the WORKSPACE for Android builds.
     
    +Do you wish to build TensorFlow with iOS support? [y/N]: n
    +No iOS support will be enabled for TensorFlow.
    +
     Preconfigured Bazel build configs. You can use any of the below by adding "--config=<>" to your build command. See .bazelrc for more details.
     	--config=mkl         	# Build with MKL support.
    +	--config=mkl_aarch64 	# Build with oneDNN and Compute Library for the Arm Architecture (ACL).
     	--config=monolithic  	# Config for mostly static monolithic build.
    -	--config=ngraph      	# Build with Intel nGraph support.
     	--config=numa        	# Build with NUMA support.
     	--config=dynamic_kernels	# (Experimental) Build kernels into separate shared objects.
    -	--config=v2          	# Build TensorFlow 2.x instead of 1.x.
    +	--config=v1          	# Build with TensorFlow 1 API instead of TF 2 API.
     Preconfigured Bazel build configs to DISABLE default on features:
    -	--config=noaws       	# Disable AWS S3 filesystem support.
     	--config=nogcp       	# Disable GCP support.
    -	--config=nohdfs      	# Disable HDFS support.
     	--config=nonccl      	# Disable NVIDIA NCCL support.
    -Configuration finished
    +
     
    @@ -177,7 +211,14 @@ Configuration finished #### GPU support -For [GPU support](./gpu.md), set `cuda=Y` during configuration and specify the +##### from v.2.18.0 +For [GPU support](./pip.md), set `cuda=Y` during configuration and specify the +versions of CUDA and cuDNN if required. Bazel will download CUDA and CUDNN +packages automatically or point to CUDA/CUDNN/NCCL redistributions on local file +system if required. + +##### before v.2.18.0 +For [GPU support](./pip.md), set `cuda=Y` during configuration and specify the versions of CUDA and cuDNN. If your system has multiple versions of CUDA or cuDNN installed, explicitly set the version instead of relying on the default. `./configure` creates symbolic links to your system's CUDA libraries—so if you @@ -188,8 +229,8 @@ building. For compilation optimization flags, the default (`-march=native`) optimizes the generated code for your machine's CPU type. However, if building TensorFlow for -a different CPU type, consider a more specific optimization flag. See the -[GCC manual](https://gcc.gnu.org/onlinedocs/gcc-4.5.3/gcc/i386-and-x86_002d64-Options.html){:.external} +a different CPU type, consider a more specific optimization flag. Check the +[GCC manual](https://gcc.gnu.org/onlinedocs/gcc-4.5.3/gcc/i386-and-x86_002d64-Options.html) for examples. #### Preconfigured configurations @@ -201,81 +242,55 @@ There are some preconfigured build configs available that can be added to the [CONTRIBUTING.md](https://github.com/tensorflow/tensorflow/blob/master/CONTRIBUTING.md) for details. * `--config=mkl` —Support for the - [Intel® MKL-DNN](https://github.com/intel/mkl-dnn){:.external}. + [Intel® MKL-DNN](https://github.com/intel/mkl-dnn). * `--config=monolithic` —Configuration for a mostly static, monolithic build. -* `--config=v1` —Build TensorFlow 1.x instead of 2.x. - -Note: Starting with TensorFlow 1.6, binaries use AVX instructions which may not -run on older CPUs. - - -## Build the pip package -### TensorFlow 2.x -[Install Bazel](https://docs.bazel.build/versions/master/install.html) and use -`bazel build` to create the TensorFlow 2.x package with *CPU-only* support: +## Build and install the pip package -
    -bazel build [--config=option] //tensorflow/tools/pip_package:build_pip_package
    -
    - -Note: GPU support can be enabled with `cuda=Y` during the `./configure` stage. - -### GPU support - -To build a TensorFlow package builder with GPU support: - -
    -bazel build --config=cuda [--config=option] //tensorflow/tools/pip_package:build_pip_package
    -
    - -### TensorFlow 1.x - -To build an older TensorFlow 1.x package, use the `--config=v1` option: - -
    -bazel build --config=v1 [--config=option] //tensorflow/tools/pip_package:build_pip_package
    -
    +#### Bazel build options -### Bazel build options - -See the Bazel [command-line reference](https://docs.bazel.build/versions/master/command-line-reference.html) +Refer to the Bazel +[command-line reference](https://bazel.build/reference/command-line-reference) for -[build options](https://docs.bazel.build/versions/master/command-line-reference.html#build-options). +[build options](https://bazel.build/reference/command-line-reference#build-options). Building TensorFlow from source can use a lot of RAM. If your system is memory-constrained, limit Bazel's RAM usage with: `--local_ram_resources=2048`. -The [official TensorFlow packages](./pip.html) are built with a GCC 7.3 -toolchain that complies with the manylinux2010 package standard. - -For GCC 5 and later, compatibility with the older ABI can be built using: -`--cxxopt="-D_GLIBCXX_USE_CXX11_ABI=0"`. ABI compatibility ensures that custom -ops built against the official TensorFlow package continue to work with the -GCC 5 built package. +The [official TensorFlow packages](./pip.md) are built with a Clang toolchain +that complies with the manylinux2014 package standard. ### Build the package -The `bazel build` command creates an executable named `build_pip_package`—this -is the program that builds the `pip` package. Run the executable as shown -below to build a `.whl` package in the `/tmp/tensorflow_pkg` directory. +To build pip package, you need to specify `--repo_env=WHEEL_NAME` flag. +depending on the provided name, package will be created, e.g: -To build from a release branch: +To build tensorflow CPU package: +
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow_cpu
    +
    +To build tensorflow GPU package:
    -./bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow --config=cuda --config=cuda_wheel
     
    -To build from master, use `--nightly_flag` to get the right dependencies: +To build tensorflow TPU package: +
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow_tpu --config=tpu
    +
    +To build nightly package, set `tf_nightly` instead of `tensorflow`, e.g. +to build CPU nightly package:
    -./bazel-bin/tensorflow/tools/pip_package/build_pip_package --nightly_flag /tmp/tensorflow_pkg
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tf_nightly_cpu
     
    -Although it is possible to build both CUDA and non-CUDA configurations under the -same source tree, it's recommended to run `bazel clean` when switching between -these two configurations in the same source tree. +As a result, generated wheel will be located in +
    +bazel-bin/tensorflow/tools/pip_package/wheel_house/
    +
    ### Install the package @@ -283,7 +298,7 @@ The filename of the generated `.whl` file depends on the TensorFlow version and your platform. Use `pip install` to install the package, for example:
    -pip install /tmp/tensorflow_pkg/tensorflow-version-tags.whl
    +pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     
    Success: TensorFlow is now installed. @@ -293,17 +308,17 @@ Success: TensorFlow is now installed. TensorFlow's Docker development images are an easy way to set up an environment to build Linux packages from source. These images already contain the source -code and dependencies required to build TensorFlow. See the TensorFlow -[Docker guide](./docker.md) for installation and the -[list of available image tags](https://hub.docker.com/r/tensorflow/tensorflow/tags/){:.external}. +code and dependencies required to build TensorFlow. Go to the TensorFlow +[Docker guide](./docker.md) for installation instructions and the +[list of available image tags](https://hub.docker.com/r/tensorflow/tensorflow/tags/). ### CPU-only The following example uses the `:devel` image to build a CPU-only package from -the latest TensorFlow source code. See the [Docker guide](./docker.md) for +the latest TensorFlow source code. Check the [Docker guide](./docker.md) for available TensorFlow `-devel` tags. -Download the latest development image and start a Docker container that we'll +Download the latest development image and start a Docker container that you'll use to build the *pip* package:
    @@ -331,20 +346,20 @@ docker run -it -w /tensorflow -v /path/to/tensorflow:/tensorflow -v $
     With the source tree set up, build the TensorFlow package within the container's
     virtual environment:
     
    -1.  Configure the build—this prompts the user to answer build configuration
    -    questions.
    -2.  Build the tool used to create the *pip* package.
    -3.  Run the tool to create the *pip* package.
    -4.  Adjust the ownership permissions of the file for outside the container.
    +1.  Optional: Configure the build—this prompts the user to answer build
    +    configuration questions.
    +2.  Build the *pip* package.
    +3.  Adjust the ownership permissions of the file for outside the container.
     
     
    -./configure  # answer prompts or use defaults
    -
    -bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package
    -
    -./bazel-bin/tensorflow/tools/pip_package/build_pip_package /mnt  # create package
    -
    -chown $HOST_PERMS /mnt/tensorflow-version-tags.whl
    +./configure  # if necessary
    +
    +
    +bazel build //tensorflow/tools/pip_package:wheel \
    +--repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow_cpu --config=opt
    +
    +`
    +chown $HOST_PERMS bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     
    Install and verify the package within the container: @@ -352,7 +367,7 @@ Install and verify the package within the container:
     pip uninstall tensorflow  # remove current version
     
    -pip install /mnt/tensorflow-version-tags.whl
    +pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     cd /tmp  # don't import from source directory
     python -c "import tensorflow as tf; print(tf.__version__)"
     
    @@ -365,12 +380,15 @@ On your host machine, the TensorFlow *pip* package is in the current directory ### GPU support +Note: Starting from Tensorflow v.2.18.0 the wheels can be built from +source on a machine without GPUs and without NVIDIA driver installed. + Docker is the easiest way to build GPU support for TensorFlow since the *host* machine only requires the -[NVIDIA® driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver){:.external} -(the *NVIDIA® CUDA® Toolkit* doesn't have to be installed). See the -[GPU support guide](./gpu.md) and the TensorFlow [Docker guide](./docker.md) to -set up [nvidia-docker](https://github.com/NVIDIA/nvidia-docker){:.external} +[NVIDIA® driver](https://github.com/NVIDIA/nvidia-docker/wiki/Frequently-Asked-Questions#how-do-i-install-the-nvidia-driver) +(the *NVIDIA® CUDA® Toolkit* doesn't have to be installed). Refer to the +[GPU support guide](./pip.md) and the TensorFlow [Docker guide](./docker.md) to +set up [nvidia-docker](https://github.com/NVIDIA/nvidia-docker) (Linux only). The following example downloads the TensorFlow `:devel-gpu` image and uses @@ -388,13 +406,15 @@ Then, within the container's virtual environment, build the TensorFlow package with GPU support:
    -./configure  # answer prompts or use defaults
    -
    -bazel build --config=opt --config=cuda //tensorflow/tools/pip_package:build_pip_package
    +./configure  # if necessary
     
    -./bazel-bin/tensorflow/tools/pip_package/build_pip_package /mnt  # create package
    +
    +bazel build //tensorflow/tools/pip_package:wheel \
    +--repo_env=USE_PYWRAP_RULES=1 --repo_env=WHEEL_NAME=tensorflow --config=cuda \
    +--config=cuda_wheel --config=opt
    +
     
    -chown $HOST_PERMS /mnt/tensorflow-version-tags.whl
    +chown $HOST_PERMS bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     
    Install and verify the package within the container and check for a GPU: @@ -402,7 +422,7 @@ Install and verify the package within the container and check for a GPU:
     pip uninstall tensorflow  # remove current version
     
    -pip install /mnt/tensorflow-version-tags.whl
    +pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     cd /tmp  # don't import from source directory
     python -c "import tensorflow as tf; print(\"Num GPUs Available: \", len(tf.config.list_physical_devices('GPU')))"
     
    @@ -419,6 +439,18 @@ Success: TensorFlow is now installed. + + + + + + + + + + + + @@ -450,6 +482,18 @@ Success: TensorFlow is now installed.
    VersionPython versionCompilerBuild tools
    tensorflow-2.20.03.9-3.13Clang 18.1.8Bazel 7.4.1
    tensorflow-2.19.03.9-3.12Clang 18.1.8Bazel 6.5.0
    tensorflow-2.18.03.9-3.12Clang 17.0.6Bazel 6.5.0
    tensorflow-2.17.03.9-3.12Clang 17.0.6Bazel 6.5.0
    tensorflow-2.16.13.9-3.12Clang 17.0.6Bazel 6.5.0
    tensorflow-2.15.03.9-3.11Clang 16.0.0Bazel 6.1.0
    tensorflow-2.14.03.9-3.11Clang 16.0.0Bazel 6.1.0
    tensorflow-2.13.03.8-3.11Clang 16.0.0Bazel 5.3.0
    tensorflow-2.12.03.8-3.11GCC 9.3.1Bazel 5.3.0
    tensorflow-2.11.03.7-3.10GCC 9.3.1Bazel 5.3.0
    tensorflow-2.10.03.7-3.10GCC 9.3.1Bazel 5.1.1
    tensorflow-2.9.03.7-3.10GCC 9.3.1Bazel 5.0.0
    tensorflow-2.8.03.7-3.10GCC 7.3.1Bazel 4.2.1
    tensorflow-2.7.03.7-3.9GCC 7.3.1Bazel 3.7.2
    tensorflow-2.6.03.6-3.9GCC 7.3.1Bazel 3.7.2
    + + + + + + + + + + + + @@ -483,6 +527,14 @@ Success: TensorFlow is now installed.
    VersionPython versionCompilerBuild toolscuDNNCUDA
    tensorflow-2.20.03.9-3.13Clang 18.1.8Bazel 7.4.19.312.5
    tensorflow-2.19.03.9-3.12Clang 18.1.8Bazel 6.5.09.312.5
    tensorflow-2.18.03.9-3.12Clang 17.0.6Bazel 6.5.09.312.5
    tensorflow-2.17.03.9-3.12Clang 17.0.6Bazel 6.5.08.912.3
    tensorflow-2.16.13.9-3.12Clang 17.0.6Bazel 6.5.08.912.3
    tensorflow-2.15.03.9-3.11Clang 16.0.0Bazel 6.1.08.912.2
    tensorflow-2.14.03.9-3.11Clang 16.0.0Bazel 6.1.08.711.8
    tensorflow-2.13.03.8-3.11Clang 16.0.0Bazel 5.3.08.611.8
    tensorflow-2.12.03.8-3.11GCC 9.3.1Bazel 5.3.08.611.8
    tensorflow-2.11.03.7-3.10GCC 9.3.1Bazel 5.3.08.111.2
    tensorflow-2.10.03.7-3.10GCC 9.3.1Bazel 5.1.18.111.2
    tensorflow-2.9.03.7-3.10GCC 9.3.1Bazel 5.0.08.111.2
    tensorflow-2.8.03.7-3.10GCC 7.3.1Bazel 4.2.18.111.2
    tensorflow-2.7.03.7-3.9GCC 7.3.1Bazel 3.7.28.111.2
    tensorflow-2.6.03.6-3.9GCC 7.3.1Bazel 3.7.28.111.2
    + + + + + + + + diff --git a/site/en/install/source_windows.md b/site/en/install/source_windows.md index bdded635a33..efc0f7a9286 100644 --- a/site/en/install/source_windows.md +++ b/site/en/install/source_windows.md @@ -1,9 +1,9 @@ # Build from source on Windows -Build a TensorFlow *pip* package from source and install it on Windows. +Build a TensorFlow *pip* package from the source and install it on Windows. Note: We already provide well-tested, pre-built -[TensorFlow packages](./pip.html) for Windows systems. +[TensorFlow packages](./pip.md) for Windows systems. ## Setup for Windows @@ -13,13 +13,14 @@ environment. ### Install Python and the TensorFlow package dependencies Install a -[Python 3.7+ 64-bit release for Windows](https://www.python.org/downloads/windows/){:.external}. +[Python 3.9+ 64-bit release for Windows](https://www.python.org/downloads/windows/). Select *pip* as an optional feature and add it to your `%PATH%` environmental variable. Install the TensorFlow *pip* package dependencies:
    +pip3 install -U pip
     pip3 install -U six numpy wheel packaging
     pip3 install -U keras_preprocessing --no-deps
     
    @@ -41,38 +42,53 @@ Add the location of the Bazel executable to your `%PATH%` environment variable. ### Install MSYS2 -[Install MSYS2](https://www.msys2.org/){:.external} for the bin tools needed to +[Install MSYS2](https://www.msys2.org/) for the bin tools needed to build TensorFlow. If MSYS2 is installed to `C:\msys64`, add `C:\msys64\usr\bin` to your `%PATH%` environment variable. Then, using `cmd.exe`, run:
    +pacman -Syu (requires a console restart)
     pacman -S git patch unzip
    +pacman -S git patch unzip rsync
     
    -### Install Visual C++ Build Tools 2019 +Note: Clang will be the preferred compiler to build TensorFlow CPU wheels on the Windows Platform starting with TF 2.16.1 The currently supported version is LLVM/clang 17.0.6. -Install the *Visual C++ build tools 2019*. This comes with *Visual Studio 2019* +Note: To build with Clang on Windows, it is required to install both LLVM and Visual C++ Build tools as although Windows uses clang-cl.exe as the compiler, Visual C++ Build tools are needed to link to Visual C++ libraries + +### Install Visual C++ Build Tools 2022 + +Install the *Visual C++ build tools 2022*. This comes with *Visual Studio Community 2022* but can be installed separately: 1. Go to the - [Visual Studio downloads](https://visualstudio.microsoft.com/downloads/){:.external}, -2. Select *Redistributables and Build Tools*, + [Visual Studio downloads](https://visualstudio.microsoft.com/downloads/), +2. Select *Tools for Visual Studio or Other Tools, Framework and Redistributables*, 3. Download and install: - - *Microsoft Visual C++ 2019 Redistributable* - - *Microsoft Build Tools 2019* + - *Build Tools for Visual Studio 2022* + - *Microsoft Visual C++ Redistributables for Visual Studio 2022* + +Note: TensorFlow is tested against the *Visual Studio Community 2022*. + +### Install LLVM + +1. Go to the + [LLVM downloads](https://github.com/llvm/llvm-project/releases/), +2. Download and install Windows-compatible LLVM in C:/Program Files/LLVM e.g., LLVM-17.0.6-win64.exe -Note: TensorFlow is tested against the *Visual Studio 2019*. ### Install GPU support (optional) See the Windows [GPU support](./gpu.md) guide to install the drivers and additional software required to run TensorFlow on a GPU. +Note: GPU support on native-Windows is only available for 2.10 or earlier versions, starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin + ### Download the TensorFlow source code -Use [Git](https://git-scm.com/){:.external} to clone the -[TensorFlow repository](https://github.com/tensorflow/tensorflow){:.external} +Use [Git](https://git-scm.com/) to clone the +[TensorFlow repository](https://github.com/tensorflow/tensorflow) (`git` is installed with MSYS2):
    @@ -80,8 +96,8 @@ Use [Git](https://git-scm.com/){:.external} to clone the
     cd tensorflow
     
    -The repo defaults to the `master` development branch. You can also checkout a -[release branch](https://github.com/tensorflow/tensorflow/releases){:.external} +The repo defaults to the `master` development branch. You can also check out a +[release branch](https://github.com/tensorflow/tensorflow/releases) to build:
    @@ -91,11 +107,38 @@ git checkout branch_name  # r1.9, r1.10, etc.
     Key Point: If you're having build problems on the latest development branch, try
     a release branch that is known to work.
     
    +## Optional: Environmental Variable Set Up
    +Run the following commands before running the build command to avoid issues with package creation:
    +(If the below commands were set up while installing the packages, please ignore them). Run `set` to check if all the paths were set correctly, run `echo %Environmental Variable%` e.g., `echo %BAZEL_VC%` to check the path set up for a specific Environmental Variable
    +
    + Python path set up issue [tensorflow:issue#59943](https://github.com/tensorflow/tensorflow/issues/59943),[tensorflow:issue#9436](https://github.com/tensorflow/tensorflow/issues/9436),[tensorflow:issue#60083](https://github.com/tensorflow/tensorflow/issues/60083)
    +
    +
    +set PATH=path/to/python;%PATH% # [e.g. (C:/Python311)]
    +set PATH=path/to/python/Scripts;%PATH% # [e.g. (C:/Python311/Scripts)] 
    +set PYTHON_BIN_PATH=path/to/python_virtualenv/Scripts/python.exe 
    +set PYTHON_LIB_PATH=path/to/python virtualenv/lib/site-packages 
    +set PYTHON_DIRECTORY=path/to/python_virtualenv/Scripts 
    +
    + +Bazel/MSVC/CLANG path set up issue [tensorflow:issue#54578](https://github.com/tensorflow/tensorflow/issues/54578) + +
    +set BAZEL_SH=C:/msys64/usr/bin/bash.exe 
    +set BAZEL_VS=C:/Program Files/Microsoft Visual Studio/2022/BuildTools 
    +set BAZEL_VC=C:/Program Files/Microsoft Visual Studio/2022/BuildTools/VC 
    +set Bazel_LLVM=C:/Program Files/LLVM (explicitly tell Bazel where LLVM is installed by BAZEL_LLVM, needed while using CLANG)
    +set PATH=C:/Program Files/LLVM/bin;%PATH% (Optional, needed while using CLANG as Compiler)
    +
    + +## Optional: Configure the build -## Configure the build +TensorFlow builds are configured by the `.bazelrc` file in the repository's +root directory. The `./configure` or `./configure.py` scripts can be used to +adjust common settings. -Configure your system build by running the following at the root of your -TensorFlow source tree: +If you need to change the configuration, run the `./configure` script from +the repository's root directory.
     python ./configure.py
    @@ -110,94 +153,99 @@ differ):
     

    View sample configuration session

     python ./configure.py
    -Starting local Bazel server and connecting to it...
    -................
    -You have bazel 0.15.0 installed.
    -Please specify the location of python. [Default is C:\python36\python.exe]:
    +You have bazel 6.5.0 installed.
    +Please specify the location of python. [Default is C:\Python311\python.exe]:
     
     Found possible Python library paths:
    -  C:\python36\lib\site-packages
    -Please input the desired Python library path to use.  Default is [C:\python36\lib\site-packages]
    -
    -Do you wish to build TensorFlow with CUDA support? [y/N]: Y
    -CUDA support will be enabled for TensorFlow.
    +C:\Python311\lib\site-packages
    +Please input the desired Python library path to use.  Default is [C:\Python311\lib\site-packages]
     
    -Please specify the CUDA SDK version you want to use. [Leave empty to default to CUDA 9.0]:
    +Do you wish to build TensorFlow with ROCm support? [y/N]:
    +No ROCm support will be enabled for TensorFlow.
     
    -Please specify the location where CUDA 9.0 toolkit is installed. Refer to README.md for more details. [Default is C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0]:
    +WARNING: Cannot build with CUDA support on Windows.
    +Starting in TF 2.11, CUDA build is not supported for Windows. To use TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2.
     
    -Please specify the cuDNN version you want to use. [Leave empty to default to cuDNN 7.0]: 7.0
    +Do you want to use Clang to build TensorFlow? [Y/n]:
    +Add "--config=win_clang" to compile TensorFlow with CLANG.
     
    -Please specify the location where cuDNN 7 library is installed. Refer to README.md for more details. [Default is C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v9.0]: C:\tools\cuda
    +Please specify the path to clang executable. [Default is C:\Program Files\LLVM\bin\clang.EXE]:
     
    -Please specify a list of comma-separated Cuda compute capabilities you want to build with.
    -You can find the compute capability of your device at: https://developer.nvidia.com/cuda-gpus.
    -Please note that each additional compute capability significantly increases your build time and binary size. [Default is: 3.5,7.0]: 3.7
    +You have Clang 17.0.6 installed.
     
     Please specify optimization flags to use during compilation when bazel option "--config=opt" is specified [Default is /arch:AVX]:
     
     Would you like to override eigen strong inline for some C++ compilation to reduce the compilation time? [Y/n]:
     Eigen strong inline overridden.
     
    -Configuration finished
    +Would you like to interactively configure ./WORKSPACE for Android builds? [y/N]:
    +Not configuring the WORKSPACE for Android builds.
    +
    +Preconfigured Bazel build configs. You can use any of the below by adding "--config=<>" to your build command. See .bazelrc for more details.
    +        --config=mkl            # Build with MKL support.
    +        --config=mkl_aarch64    # Build with oneDNN and Compute Library for the Arm Architecture (ACL).
    +        --config=monolithic     # Config for mostly static monolithic build.
    +        --config=numa           # Build with NUMA support.
    +        --config=dynamic_kernels        # (Experimental) Build kernels into separate shared objects.
    +        --config=v1             # Build with TensorFlow 1 API instead of TF 2 API.
    +Preconfigured Bazel build configs to DISABLE default on features:
    +        --config=nogcp          # Disable GCP support.
    +        --config=nonccl         # Disable NVIDIA NCCL support.
     
    -### Configuration options - -For [GPU support](./gpu.md), specify the versions of CUDA and cuDNN. If your -system has multiple versions of CUDA or cuDNN installed, explicitly set the -version instead of relying on the default. `./configure.py` creates symbolic -links to your system's CUDA libraries—so if you update your CUDA library paths, -this configuration step must be run again before building. - -Note: Starting with TensorFlow 1.6, binaries use AVX instructions which may not -run on older CPUs. - -Warning: TF-TRT Windows support is provided experimentally. No guarantee is made -regarding functionality or engineering support. Use at your own risk. +## Build and install the pip package -## Build the pip package +The pip package is built in two steps. A `bazel build` command creates a +"package-builder" program. You then run the package-builder to create the +package. -### TensorFlow 2.x +### Build the package-builder tensorflow:master repo has been updated to build 2.x by default. [Install Bazel](https://docs.bazel.build/versions/master/install.html) and use -`bazel build ` to create the TensorFlow package. +`bazel build ` to create the TensorFlow package-builder.
    -bazel build //tensorflow/tools/pip_package:build_pip_package
    +bazel build //tensorflow/tools/pip_package:wheel
     
    +#### CPU-only -### TensorFlow 1.x - -To build the 1.x version of TensorFlow from master, use -`bazel build --config=v1` to create a TensorFlow 1.x package. +Use `bazel` to make the TensorFlow package builder with CPU-only support: +##### Build with MSVC
    -bazel build --config=v1 //tensorflow/tools/pip_package:build_pip_package
    +bazel build --config=opt --repo_env=TF_PYTHON_VERSION=3.11 //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tensorflow_cpu
     
    -#### CPU-only - -Use `bazel` to make the TensorFlow package builder with CPU-only support: +##### Build with CLANG +Use --config=`win_clang` to build TenorFlow with the CLANG Compiler:
    -bazel build --config=opt //tensorflow/tools/pip_package:build_pip_package
    +bazel build --config=win_clang --repo_env=TF_PYTHON_VERSION=3.11 //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tensorflow_cpu
     
    #### GPU support +Note: GPU support on native-Windows is only available for 2.10 or earlier versions, starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin + To make the TensorFlow package builder with GPU support:
     bazel build --config=opt --config=cuda --define=no_tensorflow_py_deps=true //tensorflow/tools/pip_package:build_pip_package
     
    +Commands to clean the bazel cache to resolve errors due to invalid or outdated cached data, bazel clean with --expunge flag removes files permanently + +
    +bazel clean 
    +bazel clean --expunge  
    +
    + #### Bazel build options -Use this option when building to avoid issue with package creation: +Use this option when building to avoid issues with package creation: [tensorflow:issue#22390](https://github.com/tensorflow/tensorflow/issues/22390)
    @@ -216,30 +264,37 @@ to suppress nvcc warning messages.
     
     ### Build the package
     
    -The `bazel build` command creates an executable named `build_pip_package`—this
    -is the program that builds the `pip` package. For example, the following builds
    -a `.whl` package in the `C:/tmp/tensorflow_pkg` directory:
    +To build a pip package, you need to specify the --repo_env=WHEEL_NAME flag. 
    +Depending on the provided name, the package will be created. For example:
     
    -
    -bazel-bin\tensorflow\tools\pip_package\build_pip_package C:/tmp/tensorflow_pkg
    +To build tensorflow CPU package:
    +
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tensorflow_cpu
    +
    + +To build nightly package, set `tf_nightly` instead of `tensorflow`, e.g. +to build CPU nightly package: +
    +bazel build //tensorflow/tools/pip_package:wheel --repo_env=WHEEL_NAME=tf_nightly_cpu
    +
    + +As a result, generated wheel will be located in +
    +bazel-bin/tensorflow/tools/pip_package/wheel_house/
     
    -Although it is possible to build both CUDA and non-CUDA configs under the -same source tree, we recommend running `bazel clean` when switching between -these two configurations in the same source tree. ### Install the package The filename of the generated `.whl` file depends on the TensorFlow version and -your platform. Use `pip3 install` to install the package, for example: +your platform. Use `pip install` to install the package, for example: -
    -pip3 install C:/tmp/tensorflow_pkg/tensorflow-version-cp36-cp36m-win_amd64.whl
    +
    +pip install bazel-bin/tensorflow/tools/pip_package/wheel_house/tensorflow-version-tags.whl
     
    Success: TensorFlow is now installed. - ## Build using the MSYS shell TensorFlow can also be built using the MSYS shell. Make the changes listed @@ -261,12 +316,12 @@ considered a Unix absolute path since it starts with a slash.) Add the Bazel and Python installation directories to your `$PATH` environmental variable. If Bazel is installed to `C:\tools\bazel.exe`, and Python to -`C:\Python36\python.exe`, set your `PATH` with: +`C:\Python\python.exe`, set your `PATH` with:
     # Use Unix-style with ':' as separator
     export PATH="/c/tools:$PATH"
    -export PATH="/c/Python36:$PATH"
    +export PATH="/c/path/to/Python:$PATH"
     
    For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`: @@ -277,6 +332,8 @@ For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`: export PATH="/c/tools/cuda/bin:$PATH"
    +Note: Starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin + ## Tested build configurations @@ -284,6 +341,17 @@ For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`:
    VersionPython versionCompilerBuild tools
    tensorflow-2.16.13.9-3.12Clang from Xcode 13.6Bazel 6.5.0
    tensorflow-2.15.03.9-3.11Clang from xcode 10.15Bazel 6.1.0
    tensorflow-2.14.03.9-3.11Clang from xcode 10.15Bazel 6.1.0
    tensorflow-2.13.03.8-3.11Clang from xcode 10.15Bazel 5.3.0
    tensorflow-2.12.03.8-3.11Clang from xcode 10.15Bazel 5.3.0
    tensorflow-2.11.03.7-3.10Clang from xcode 10.14Bazel 5.3.0
    tensorflow-2.10.03.7-3.10Clang from xcode 10.14Bazel 5.1.1
    tensorflow-2.9.03.7-3.10Clang from xcode 10.14Bazel 5.0.0
    tensorflow-2.8.03.7-3.10Clang from xcode 10.14Bazel 4.2.1
    tensorflow-2.7.03.7-3.9Clang from xcode 10.11Bazel 3.7.2
    tensorflow-2.6.03.6-3.9Clang from xcode 10.11Bazel 3.7.2
    + + + + + + + + + + + @@ -312,9 +380,12 @@ For GPU support, add the CUDA and cuDNN bin directories to your `$PATH`:
    VersionPython versionCompilerBuild tools
    tensorflow-2.20.03.9-3.13CLANG 18.1.4Bazel 7.4.1
    tensorflow-2.19.03.9-3.12CLANG 18.1.4Bazel 6.5.0
    tensorflow-2.18.03.9-3.12CLANG 17.0.6Bazel 6.5.0
    tensorflow-2.17.03.9-3.12CLANG 17.0.6Bazel 6.5.0
    tensorflow-2.16.13.9-3.12CLANG 17.0.6Bazel 6.5.0
    tensorflow-2.15.03.9-3.11MSVC 2019Bazel 6.1.0
    tensorflow-2.14.03.9-3.11MSVC 2019Bazel 6.1.0
    tensorflow-2.12.03.8-3.11MSVC 2019Bazel 5.3.0
    tensorflow-2.11.03.7-3.10MSVC 2019Bazel 5.3.0
    tensorflow-2.10.03.7-3.10MSVC 2019Bazel 5.1.1
    tensorflow-2.9.03.7-3.10MSVC 2019Bazel 5.0.0
    tensorflow-2.8.03.7-3.10MSVC 2019Bazel 4.2.1
    tensorflow-2.7.03.7-3.9MSVC 2019Bazel 3.7.2
    tensorflow-2.6.03.6-3.9MSVC 2019Bazel 3.7.2
    ### GPU +Note: GPU support on native-Windows is only available for 2.10 or earlier versions, starting in TF 2.11, CUDA build is not supported for Windows. For using TensorFlow GPU on Windows, you will need to build/install TensorFlow in WSL2 or use tensorflow-cpu with TensorFlow-DirectML-Plugin + + diff --git a/site/en/r1/guide/autograph.ipynb b/site/en/r1/guide/autograph.ipynb index 5d8d7c97999..64d631a52b3 100644 --- a/site/en/r1/guide/autograph.ipynb +++ b/site/en/r1/guide/autograph.ipynb @@ -66,7 +66,7 @@ "source": [ "> Note: This is an archived TF1 notebook. These are configured\n", "to run in TF2's \n", - "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n", + "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n", "but will run in TF1 as well. To use TF1 in Colab, use the\n", "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n", "magic." @@ -78,7 +78,7 @@ "id": "CydFK2CL7ZHA" }, "source": [ - "[AutoGraph](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/) helps you write complicated graph code using normal Python. Behind the scenes, AutoGraph automatically transforms your code into the equivalent [TensorFlow graph code](https://www.tensorflow.org/r1/guide/graphs). AutoGraph already supports much of the Python language, and that coverage continues to grow. For a list of supported Python language features, see the [Autograph capabilities and limitations](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/limitations.md)." + "[AutoGraph](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/autograph/) helps you write complicated graph code using normal Python. Behind the scenes, AutoGraph automatically transforms your code into the equivalent [TensorFlow graph code](https://www.tensorflow.org/r1/guide/graphs). AutoGraph already supports much of the Python language, and that coverage continues to grow. For a list of supported Python language features, see the [Autograph capabilities and limitations](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/autograph/g3doc/reference/limitations.md)." ] }, { @@ -241,7 +241,7 @@ "id": "m-jWmsCmByyw" }, "source": [ - "AutoGraph supports common Python statements like `while`, `for`, `if`, `break`, and `return`, with support for nesting. Compare this function with the complicated graph verson displayed in the following code blocks:" + "AutoGraph supports common Python statements like `while`, `for`, `if`, `break`, and `return`, with support for nesting. Compare this function with the complicated graph version displayed in the following code blocks:" ] }, { diff --git a/site/en/r1/guide/checkpoints.md b/site/en/r1/guide/checkpoints.md index 682631449d5..41544f52b25 100644 --- a/site/en/r1/guide/checkpoints.md +++ b/site/en/r1/guide/checkpoints.md @@ -56,8 +56,8 @@ Suppose you call the Estimator's `train` method. For example: ```python classifier.train( - input_fn=lambda:train_input_fn(train_x, train_y, batch_size=100), - steps=200) + input_fn=lambda: train_input_fn(train_x, train_y, batch_size=100), + steps=200) ``` As suggested by the following diagrams, the first call to `train` diff --git a/site/en/r1/guide/custom_estimators.md b/site/en/r1/guide/custom_estimators.md index 87dce26a0dc..7bbf3573909 100644 --- a/site/en/r1/guide/custom_estimators.md +++ b/site/en/r1/guide/custom_estimators.md @@ -592,10 +592,10 @@ function for custom Estimators; everything else is the same. For more details, be sure to check out: * The - [official TensorFlow implementation of MNIST](https://github.com/tensorflow/models/tree/master/official/r1/mnist), + [official TensorFlow implementation of MNIST](https://github.com/tensorflow/models/tree/r1.15/official/r1/mnist), which uses a custom estimator. * The TensorFlow - [official models repository](https://github.com/tensorflow/models/tree/master/official), + [official models repository](https://github.com/tensorflow/models/tree/r1.15/official), which contains more curated examples using custom estimators. * This [TensorBoard video](https://youtu.be/eBbEDRsCmv4), which introduces TensorBoard. diff --git a/site/en/r1/guide/datasets.md b/site/en/r1/guide/datasets.md index b1ed1b6e113..d7c38bf2f92 100644 --- a/site/en/r1/guide/datasets.md +++ b/site/en/r1/guide/datasets.md @@ -437,7 +437,7 @@ dataset = dataset.batch(32) iterator = dataset.make_initializable_iterator() # You can feed the initializer with the appropriate filenames for the current -# phase of execution, e.g. training vs. validation. +# phase of execution, e.g., training vs. validation. # Initialize `iterator` with training data. training_filenames = ["/var/data/file1.tfrecord", "/var/data/file2.tfrecord"] @@ -639,7 +639,7 @@ TODO(mrry): Add this section. The simplest form of batching stacks `n` consecutive elements of a dataset into a single element. The `Dataset.batch()` transformation does exactly this, with the same constraints as the `tf.stack()` operator, applied to each component -of the elements: i.e. for each component *i*, all elements must have a tensor +of the elements: i.e., for each component *i*, all elements must have a tensor of the exact same shape. ```python diff --git a/site/en/r1/guide/debugger.md b/site/en/r1/guide/debugger.md index 2b4b6497ec4..963765b97db 100644 --- a/site/en/r1/guide/debugger.md +++ b/site/en/r1/guide/debugger.md @@ -10,7 +10,7 @@ due to TensorFlow's computation-graph paradigm. This guide focuses on the command-line interface (CLI) of `tfdbg`. For guide on how to use the graphical user interface (GUI) of tfdbg, i.e., the **TensorBoard Debugger Plugin**, please visit -[its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md). +[its README](https://github.com/tensorflow/tensorboard/blob/r1.15/tensorboard/plugins/debugger/README.md). Note: The TensorFlow debugger uses a [curses](https://en.wikipedia.org/wiki/Curses_\(programming_library\))-based text @@ -35,7 +35,7 @@ TensorFlow. Later sections of this document describe how to use **tfdbg** with higher-level APIs of TensorFlow, including `tf.estimator`, `tf.keras` / `keras` and `tf.contrib.slim`. To *observe* such an issue, run the following command without the debugger (the source code can be found -[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/debug/examples/v1/debug_mnist.py)): +[here](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_mnist.py)):
     python -m tensorflow.python.debug.examples.v1.debug_mnist
    @@ -64,7 +64,7 @@ numeric problem first surfaced.
     To add support for tfdbg in our example, all that is needed is to add the
     following lines of code and wrap the Session object with a debugger wrapper.
     This code is already added in
    -[debug_mnist.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/debug/examples/v1/debug_mnist.py),
    +[debug_mnist.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_mnist.py),
     so you can activate tfdbg CLI with the `--debug` flag at the command line.
     
     ```python
    @@ -370,7 +370,7 @@ traceback of the node's construction.
     
     From the traceback, you can see that the op is constructed at the following
     line:
    -[`debug_mnist.py`](https://www.tensorflow.org/code/tensorflow/python/debug/examples/v1/debug_mnist.py):
    +[`debug_mnist.py`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_mnist.py):
     
     ```python
     diff = y_ * tf.log(y)
    @@ -457,7 +457,7 @@ accuracy_score = classifier.evaluate(eval_input_fn,
     predict_results = classifier.predict(predict_input_fn, hooks=hooks)
     ```
     
    -[debug_tflearn_iris.py](https://www.tensorflow.org/code/tensorflow/python/debug/examples/v1/debug_tflearn_iris.py),
    +[debug_tflearn_iris.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/debug/examples/v1/debug_tflearn_iris.py),
     contains a full example of how to use the tfdbg with `Estimator`s. To run this
     example, do:
     
    @@ -501,7 +501,7 @@ TensorFlow backend. You just need to replace `tf.keras.backend` with
     ## Debugging tf-slim with TFDBG
     
     TFDBG supports debugging of training and evaluation with
    -[tf-slim](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/slim).
    +[tf-slim](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/slim).
     As detailed below, training and evaluation require slightly different debugging
     workflows.
     
    @@ -605,7 +605,7 @@ The `watch_fn` argument accepts a `Callable` that allows you to configure what
     If your model code is written in C++ or other languages, you can also
     modify the `debug_options` field of `RunOptions` to generate debug dumps that
     can be inspected offline. See
    -[the proto definition](https://www.tensorflow.org/code/tensorflow/core/protobuf/debug.proto)
    +[the proto definition](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/debug.proto)
     for more details.
     
     ### Debugging Remotely-Running Estimators
    @@ -648,7 +648,7 @@ python -m tensorflow.python.debug.cli.offline_analyzer \
            model, check out
     
        1. The profiling mode of tfdbg: `tfdbg> run -p`.
    -   2. [tfprof](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/core/profiler)
    +   2. [tfprof](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/core/profiler)
           and other profiling tools for TensorFlow.
     
     **Q**: _How do I link tfdbg against my `Session` in Bazel? Why do I see an
    @@ -808,4 +808,4 @@ tensor dumps.
            and conditional breakpoints, and tying tensors to their
            graph-construction source code, all in the browser environment.
            To get started, please visit
    -       [its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md).
    +       [its README](https://github.com/tensorflow/tensorboard/blob/r1.15/tensorboard/plugins/debugger/README.md).
    diff --git a/site/en/r1/guide/distribute_strategy.ipynb b/site/en/r1/guide/distribute_strategy.ipynb
    index f6d85912e16..4dd502d331b 100644
    --- a/site/en/r1/guide/distribute_strategy.ipynb
    +++ b/site/en/r1/guide/distribute_strategy.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -118,7 +118,7 @@
             "## Types of strategies\n",
             "`tf.distribute.Strategy` intends to cover a number of use cases along different axes. Some of these combinations are currently supported and others will be added in the future. Some of these axes are:\n",
             "\n",
    -        "* Syncronous vs asynchronous training: These are two common ways of distributing training with data parallelism. In sync training, all workers train over different slices of input data in sync, and aggregating gradients at each step. In async training, all workers are independently training over the input data and updating variables asynchronously. Typically sync training is supported via all-reduce and async through parameter server architecture.\n",
    +        "* Synchronous vs asynchronous training: These are two common ways of distributing training with data parallelism. In sync training, all workers train over different slices of input data in sync, and aggregating gradients at each step. In async training, all workers are independently training over the input data and updating variables asynchronously. Typically sync training is supported via all-reduce and async through parameter server architecture.\n",
             "* Hardware platform: Users may want to scale their training onto multiple GPUs on one machine, or multiple machines in a network (with 0 or more GPUs each), or on Cloud TPUs.\n",
             "\n",
             "In order to support these use cases, we have 4 strategies available. In the next section we will talk about which of these are supported in which scenarios in TF."
    @@ -223,7 +223,7 @@
             "id": "KY1nJHNkMl7b"
           },
           "source": [
    -        "This will create a `CentralStorageStrategy` instance which will use all visible GPUs and CPU. Update to variables on replicas will be aggragated before being applied to variables."
    +        "This will create a `CentralStorageStrategy` instance which will use all visible GPUs and CPU. Update to variables on replicas will be aggregated before being applied to variables."
           ]
         },
         {
    @@ -245,7 +245,7 @@
             "\n",
             "`tf.distribute.experimental.MultiWorkerMirroredStrategy` is very similar to `MirroredStrategy`. It implements synchronous distributed training across multiple workers, each with potentially multiple GPUs. Similar to `MirroredStrategy`, it creates copies of all variables in the model on each device across all workers.\n",
             "\n",
    -        "It uses [CollectiveOps](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/collective_ops.py) as the multi-worker all-reduce communication method used to keep variables in sync. A collective op is a single op in the TensorFlow graph which can automatically choose an all-reduce algorithm in the TensorFlow runtime according to hardware, network topology and tensor sizes.\n",
    +        "It uses [CollectiveOps](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/ops/collective_ops.py) as the multi-worker all-reduce communication method used to keep variables in sync. A collective op is a single op in the TensorFlow graph which can automatically choose an all-reduce algorithm in the TensorFlow runtime according to hardware, network topology and tensor sizes.\n",
             "\n",
             "It also implements additional performance optimizations. For example, it includes a static optimization that converts multiple all-reductions on small tensors into fewer all-reductions on larger tensors. In addition, we are designing it to have a plugin architecture - so that in the future, users will be able to plugin algorithms that are better tuned for their hardware. Note that collective ops also implement other collective operations such as broadcast and all-gather.\n",
             "\n",
    @@ -371,7 +371,7 @@
             "id": "hQv1lm9UPDFy"
           },
           "source": [
    -        "So far we've talked about what are the different stategies available and how you can instantiate them. In the next few sections, we will talk about the different ways in which you can use them to distribute your training. We will show short code snippets in this guide and link off to full tutorials which you can run end to end."
    +        "So far we've talked about what are the different strategies available and how you can instantiate them. In the next few sections, we will talk about the different ways in which you can use them to distribute your training. We will show short code snippets in this guide and link off to full tutorials which you can run end to end."
           ]
         },
         {
    @@ -490,8 +490,8 @@
             "Here is a list of tutorials and examples that illustrate the above integration end to end with Keras:\n",
             "\n",
             "1. [Tutorial](../tutorials/distribute/keras.ipynb) to train MNIST with `MirroredStrategy`.\n",
    -        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/master/official/vision/image_classification/resnet_imagenet_main.py) training with ImageNet data using `MirroredStrategy`.\n",
    -        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/master/models/experimental/resnet50_keras/resnet50.py) trained with Imagenet data on Cloud TPus with `TPUStrategy`."
    +        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/r1.15/official/vision/image_classification/resnet_imagenet_main.py) training with ImageNet data using `MirroredStrategy`.\n",
    +        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/1.15/models/experimental/resnet50_keras/resnet50.py) trained with Imagenet data on Cloud TPus with `TPUStrategy`."
           ]
         },
         {
    @@ -595,9 +595,9 @@
             "### Examples and Tutorials\n",
             "Here are some examples that show end to end usage of various strategies with Estimator:\n",
             "\n",
    -        "1. [End to end example](https://github.com/tensorflow/ecosystem/tree/master/distribution_strategy) for multi worker training in tensorflow/ecosystem using Kuberentes templates. This example starts with a Keras model and converts it to an Estimator using the `tf.keras.estimator.model_to_estimator` API.\n",
    -        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/master/official/r1/resnet/imagenet_main.py) model, which can be trained using either `MirroredStrategy` or `MultiWorkerMirroredStrategy`.\n",
    -        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/master/models/experimental/distribution_strategy/resnet_estimator.py) example with TPUStrategy."
    +        "1. [End to end example](https://github.com/tensorflow/ecosystem/tree/r1.15/distribution_strategy) for multi worker training in tensorflow/ecosystem using Kuberentes templates. This example starts with a Keras model and converts it to an Estimator using the `tf.keras.estimator.model_to_estimator` API.\n",
    +        "2. Official [ResNet50](https://github.com/tensorflow/models/blob/r1.15/official/r1/resnet/imagenet_main.py) model, which can be trained using either `MirroredStrategy` or `MultiWorkerMirroredStrategy`.\n",
    +        "3. [ResNet50](https://github.com/tensorflow/tpu/blob/1.15/models/experimental/distribution_strategy/resnet_estimator.py) example with TPUStrategy."
           ]
         },
         {
    @@ -607,7 +607,7 @@
           },
           "source": [
             "## Using `tf.distribute.Strategy` with custom training loops\n",
    -        "As you've seen, using `tf.distrbute.Strategy` with high level APIs is only a couple lines of code change. With a little more effort, `tf.distrbute.Strategy` can also be used by other users who are not using these frameworks.\n",
    +        "As you've seen, using `tf.distribute.Strategy` with high level APIs is only a couple lines of code change. With a little more effort, `tf.distribute.Strategy` can also be used by other users who are not using these frameworks.\n",
             "\n",
             "TensorFlow is used for a wide variety of use cases and some users (such as researchers) require more flexibility and control over their training loops. This makes it hard for them to use the high level frameworks such as Estimator or Keras. For instance, someone using a GAN may want to take a different number of generator or discriminator steps each round. Similarly, the high level frameworks are not very suitable for Reinforcement Learning training. So these users will usually write their own training loops.\n",
             "\n",
    diff --git a/site/en/r1/guide/eager.ipynb b/site/en/r1/guide/eager.ipynb
    index 230974ab5a4..f76acb4b702 100644
    --- a/site/en/r1/guide/eager.ipynb
    +++ b/site/en/r1/guide/eager.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -95,7 +95,7 @@
             "\n",
             "Eager execution supports most TensorFlow operations and GPU acceleration. For a\n",
             "collection of examples running in eager execution, see:\n",
    -        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples).\n",
    +        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/eager/python/examples).\n",
             "\n",
             "Note: Some models may experience increased overhead with eager execution\n",
             "enabled. Performance improvements are ongoing, but please\n",
    @@ -1160,7 +1160,7 @@
             "### Benchmarks\n",
             "\n",
             "For compute-heavy models, such as\n",
    -        "[ResNet50](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples/resnet50)\n",
    +        "[ResNet50](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/eager/python/examples/resnet50)\n",
             "training on a GPU, eager execution performance is comparable to graph execution.\n",
             "But this gap grows larger for models with less computation and there is work to\n",
             "be done for optimizing hot code paths for models with lots of small operations."
    @@ -1225,7 +1225,7 @@
             "production deployment. Use `tf.train.Checkpoint` to save and restore model\n",
             "variables, this allows movement between eager and graph execution environments.\n",
             "See the examples in:\n",
    -        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/eager/python/examples).\n"
    +        "[tensorflow/contrib/eager/python/examples](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/contrib/eager/python/examples).\n"
           ]
         },
         {
    diff --git a/site/en/r1/guide/extend/architecture.md b/site/en/r1/guide/extend/architecture.md
    index 1f2ac53066f..0753824e15e 100644
    --- a/site/en/r1/guide/extend/architecture.md
    +++ b/site/en/r1/guide/extend/architecture.md
    @@ -34,7 +34,7 @@ This document focuses on the following layers:
     *  **Client**:
        *  Defines the computation as a dataflow graph.
        *  Initiates graph execution using a [**session**](
    -      https://www.tensorflow.org/code/tensorflow/python/client/session.py).
    +      https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/client/session.py).
     *  **Distributed Master**
        *  Prunes a specific subgraph from the graph, as defined by the arguments
           to Session.run().
    @@ -144,8 +144,8 @@ The distributed master then ships the graph pieces to the distributed tasks.
     
     ### Code
     
    -*  [MasterService API definition](https://www.tensorflow.org/code/tensorflow/core/protobuf/master_service.proto)
    -*  [Master interface](https://www.tensorflow.org/code/tensorflow/core/distributed_runtime/master_interface.h)
    +*  [MasterService API definition](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/master_service.proto)
    +*  [Master interface](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/distributed_runtime/master_interface.h)
     
     ## Worker Service
     
    @@ -178,7 +178,7 @@ For transfers between tasks, TensorFlow uses multiple protocols, including:
     
     We also have preliminary support for NVIDIA's NCCL library for multi-GPU
     communication, see:
    -[`tf.contrib.nccl`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/nccl_ops.py).
    +[`tf.contrib.nccl`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/ops/nccl_ops.py).
     
     Partitioned Graph
     
    @@ -186,9 +186,9 @@ communication, see:
     
     ### Code
     
    -*   [WorkerService API definition](https://www.tensorflow.org/code/tensorflow/core/protobuf/worker_service.proto)
    -*   [Worker interface](https://www.tensorflow.org/code/tensorflow/core/distributed_runtime/worker_interface.h)
    -*   [Remote rendezvous (for Send and Recv implementations)](https://www.tensorflow.org/code/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.h)
    +*   [WorkerService API definition](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/worker_service.proto)
    +*   [Worker interface](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/distributed_runtime/worker_interface.h)
    +*   [Remote rendezvous (for Send and Recv implementations)](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/distributed_runtime/rpc/rpc_rendezvous_mgr.h)
     
     ## Kernel Implementations
     
    @@ -199,7 +199,7 @@ Many of the operation kernels are implemented using Eigen::Tensor, which uses
     C++ templates to generate efficient parallel code for multicore CPUs and GPUs;
     however, we liberally use libraries like cuDNN where a more efficient kernel
     implementation is possible. We have also implemented
    -[quantization](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/g3doc/performance/post_training_quantization.md), which enables
    +[quantization](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/lite/g3doc/performance/post_training_quantization.md), which enables
     faster inference in environments such as mobile devices and high-throughput
     datacenter applications, and use the
     [gemmlowp](https://github.com/google/gemmlowp) low-precision matrix library to
    @@ -215,4 +215,4 @@ experimental implementation of automatic kernel fusion.
     
     ### Code
     
    -*   [`OpKernel` interface](https://www.tensorflow.org/code/tensorflow/core/framework/op_kernel.h)
    +*   [`OpKernel` interface](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_kernel.h)
    diff --git a/site/en/r1/guide/extend/bindings.md b/site/en/r1/guide/extend/bindings.md
    index 9c10e90840f..7daa2212106 100644
    --- a/site/en/r1/guide/extend/bindings.md
    +++ b/site/en/r1/guide/extend/bindings.md
    @@ -112,11 +112,11 @@ There are a few ways to get a list of the `OpDef`s for the registered ops:
         to interpret the `OpDef` messages.
     -   The C++ function `OpRegistry::Global()->GetRegisteredOps()` returns the same
         list of all registered `OpDef`s (defined in
    -    [`tensorflow/core/framework/op.h`](https://www.tensorflow.org/code/tensorflow/core/framework/op.h)). This can be used to write the generator
    +    [`tensorflow/core/framework/op.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op.h)). This can be used to write the generator
         in C++ (particularly useful for languages that do not have protocol buffer
         support).
     -   The ASCII-serialized version of that list is periodically checked in to
    -    [`tensorflow/core/ops/ops.pbtxt`](https://www.tensorflow.org/code/tensorflow/core/ops/ops.pbtxt) by an automated process.
    +    [`tensorflow/core/ops/ops.pbtxt`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/ops.pbtxt) by an automated process.
     
     The `OpDef` specifies the following:
     
    @@ -159,7 +159,7 @@ between the generated code and the `OpDef`s checked into the repository, but is
     useful for languages where code is expected to be generated ahead of time like
     `go get` for Go and `cargo ops` for Rust. At the other end of the spectrum, for
     some languages the code could be generated dynamically from
    -[`tensorflow/core/ops/ops.pbtxt`](https://www.tensorflow.org/code/tensorflow/core/ops/ops.pbtxt).
    +[`tensorflow/core/ops/ops.pbtxt`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/ops.pbtxt).
     
     #### Handling Constants
     
    @@ -228,4 +228,4 @@ At this time, support for gradients, functions and control flow operations ("if"
     and "while") is not available in languages other than Python. This will be
     updated when the [C API] provides necessary support.
     
    -[C API]: https://www.tensorflow.org/code/tensorflow/c/c_api.h
    +[C API]: https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/c/c_api.h
    diff --git a/site/en/r1/guide/extend/filesystem.md b/site/en/r1/guide/extend/filesystem.md
    index 4d34c07102e..2d6ea0c4645 100644
    --- a/site/en/r1/guide/extend/filesystem.md
    +++ b/site/en/r1/guide/extend/filesystem.md
    @@ -54,7 +54,7 @@ To implement a custom filesystem plugin, you must do the following:
     ### The FileSystem interface
     
     The `FileSystem` interface is an abstract C++ interface defined in
    -[file_system.h](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/file_system.h).
    +[file_system.h](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/file_system.h).
     An implementation of the `FileSystem` interface should implement all relevant
     the methods defined by the interface. Implementing the interface requires
     defining operations such as creating `RandomAccessFile`, `WritableFile`, and
    @@ -70,26 +70,26 @@ involves calling `stat()` on the file and then returns the filesize as reported
     by the return of the stat object. Similarly, for the `HDFSFileSystem`
     implementation, these calls simply delegate to the `libHDFS` implementation of
     similar functionality, such as `hdfsDelete` for
    -[DeleteFile](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/hadoop/hadoop_file_system.cc#L386).
    +[DeleteFile](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/hadoop/hadoop_file_system.cc#L386).
     
     We suggest looking through these code examples to get an idea of how different
     filesystem implementations call their existing libraries. Examples include:
     
     *   [POSIX
    -    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/posix/posix_file_system.h)
    +    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/posix/posix_file_system.h)
     *   [HDFS
    -    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/hadoop/hadoop_file_system.h)
    +    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/hadoop/hadoop_file_system.h)
     *   [GCS
    -    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/cloud/gcs_file_system.h)
    +    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/cloud/gcs_file_system.h)
     *   [S3
    -    plugin](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/s3/s3_file_system.h)
    +    plugin](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/s3/s3_file_system.h)
     
     #### The File interfaces
     
     Beyond operations that allow you to query and manipulate files and directories
     in a filesystem, the `FileSystem` interface requires you to implement factories
     that return implementations of abstract objects such as the
    -[RandomAccessFile](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/platform/file_system.h#L223),
    +[RandomAccessFile](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/platform/file_system.h#L223),
     the `WritableFile`, so that TensorFlow code and read and write to files in that
     `FileSystem` implementation.
     
    @@ -224,7 +224,7 @@ it will use the `FooBarFileSystem` implementation.
     
     Next, you must build a shared object containing this implementation. An example
     of doing so using bazel's `cc_binary` rule can be found
    -[here](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/BUILD#L244),
    +[here](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/BUILD#L244),
     but you may use any build system to do so. See the section on [building the op library](../extend/op.md#build_the_op_library) for similar
     instructions.
     
    @@ -236,7 +236,7 @@ passing the path to the shared object. Calling this in your client program loads
     the shared object in the process, thus registering your implementation as
     available for any file operations going through the `FileSystem` interface. You
     can see
    -[test_file_system.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/framework/file_system_test.py)
    +[test_file_system.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/framework/file_system_test.py)
     for an example.
     
     ## What goes through this interface?
    diff --git a/site/en/r1/guide/extend/formats.md b/site/en/r1/guide/extend/formats.md
    index 3b7b4aafbd6..bdebee5487d 100644
    --- a/site/en/r1/guide/extend/formats.md
    +++ b/site/en/r1/guide/extend/formats.md
    @@ -28,11 +28,11 @@ individual records in a file. There are several examples of "reader" datasets
     that are already built into TensorFlow:
     
     *   `tf.data.TFRecordDataset`
    -    ([source in `kernels/data/reader_dataset_ops.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/data/reader_dataset_ops.cc))
    +    ([source in `kernels/data/reader_dataset_ops.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/data/reader_dataset_ops.cc))
     *   `tf.data.FixedLengthRecordDataset`
    -    ([source in `kernels/data/reader_dataset_ops.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/data/reader_dataset_ops.cc))
    +    ([source in `kernels/data/reader_dataset_ops.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/data/reader_dataset_ops.cc))
     *   `tf.data.TextLineDataset`
    -    ([source in `kernels/data/reader_dataset_ops.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/data/reader_dataset_ops.cc))
    +    ([source in `kernels/data/reader_dataset_ops.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/data/reader_dataset_ops.cc))
     
     Each of these implementations comprises three related classes:
     
    @@ -279,7 +279,7 @@ if __name__ == "__main__":
     ```
     
     You can see some examples of `Dataset` wrapper classes in
    -[`tensorflow/python/data/ops/dataset_ops.py`](https://www.tensorflow.org/code/tensorflow/python/data/ops/dataset_ops.py).
    +[`tensorflow/python/data/ops/dataset_ops.py`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/data/ops/dataset_ops.py).
     
     ## Writing an Op for a record format
     
    @@ -297,7 +297,7 @@ Examples of Ops useful for decoding records:
     
     Note that it can be useful to use multiple Ops to decode a particular record
     format.  For example, you may have an image saved as a string in
    -[a `tf.train.Example` protocol buffer](https://www.tensorflow.org/code/tensorflow/core/example/example.proto).
    +[a `tf.train.Example` protocol buffer](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto).
     Depending on the format of that image, you might take the corresponding output
     from a `tf.parse_single_example` op and call `tf.image.decode_jpeg`,
     `tf.image.decode_png`, or `tf.decode_raw`.  It is common to take the output
    diff --git a/site/en/r1/guide/extend/model_files.md b/site/en/r1/guide/extend/model_files.md
    index 30e73a5169e..e590fcf1f27 100644
    --- a/site/en/r1/guide/extend/model_files.md
    +++ b/site/en/r1/guide/extend/model_files.md
    @@ -28,7 +28,7 @@ by calling `as_graph_def()`, which returns a `GraphDef` object.
     
     The GraphDef class is an object created by the ProtoBuf library from the
     definition in
    -[tensorflow/core/framework/graph.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto). The protobuf tools parse
    +[tensorflow/core/framework/graph.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/graph.proto). The protobuf tools parse
     this text file, and generate the code to load, store, and manipulate graph
     definitions. If you see a standalone TensorFlow file representing a model, it's
     likely to contain a serialized version of one of these `GraphDef` objects
    @@ -87,7 +87,7 @@ for node in graph_def.node
     ```
     
     Each node is a `NodeDef` object, defined in
    -[tensorflow/core/framework/node_def.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/node_def.proto). These
    +[tensorflow/core/framework/node_def.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/node_def.proto). These
     are the fundamental building blocks of TensorFlow graphs, with each one defining
     a single operation along with its input connections. Here are the members of a
     `NodeDef`, and what they mean.
    @@ -107,7 +107,7 @@ This defines what operation to run, for example `"Add"`, `"MatMul"`, or
     `"Conv2D"`. When a graph is run, this op name is looked up in a registry to
     find an implementation. The registry is populated by calls to the
     `REGISTER_OP()` macro, like those in
    -[tensorflow/core/ops/nn_ops.cc](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/ops/nn_ops.cc).
    +[tensorflow/core/ops/nn_ops.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/nn_ops.cc).
     
     ### `input`
     
    @@ -133,7 +133,7 @@ size of filters for convolutions, or the values of constant ops. Because there
     can be so many different types of attribute values, from strings, to ints, to
     arrays of tensor values, there's a separate protobuf file defining the data
     structure that holds them, in
    -[tensorflow/core/framework/attr_value.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto).
    +[tensorflow/core/framework/attr_value.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/attr_value.proto).
     
     Each attribute has a unique name string, and the expected attributes are listed
     when the operation is defined. If an attribute isn't present in a node, but it
    @@ -151,7 +151,7 @@ the file format during training. Instead, they're held in separate checkpoint
     files, and there are `Variable` ops in the graph that load the latest values
     when they're initialized. It's often not very convenient to have separate files
     when you're deploying to production, so there's the
    -[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/tools/freeze_graph.py) script that takes a graph definition and a set
    +[freeze_graph.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/tools/freeze_graph.py) script that takes a graph definition and a set
     of checkpoints and freezes them together into a single file.
     
     What this does is load the `GraphDef`, pull in the values for all the variables
    @@ -167,7 +167,7 @@ the most common problems is extracting and interpreting the weight values. A
     common way to store them, for example in graphs created by the freeze_graph
     script, is as `Const` ops containing the weights as `Tensors`. These are
     defined in
    -[tensorflow/core/framework/tensor.proto](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto), and contain information
    +[tensorflow/core/framework/tensor.proto](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor.proto), and contain information
     about the size and type of the data, as well as the values themselves. In
     Python, you get a `TensorProto` object from a `NodeDef` representing a `Const`
     op by calling something like `some_node_def.attr['value'].tensor`.
    diff --git a/site/en/r1/guide/extend/op.md b/site/en/r1/guide/extend/op.md
    index d006a6251d0..186d9c28c04 100644
    --- a/site/en/r1/guide/extend/op.md
    +++ b/site/en/r1/guide/extend/op.md
    @@ -47,7 +47,7 @@ To incorporate your custom op you'll need to:
         test the op in C++. If you define gradients, you can verify them with the
         Python `tf.test.compute_gradient_error`.
         See
    -    [`relu_op_test.py`](https://www.tensorflow.org/code/tensorflow/python/kernel_tests/relu_op_test.py) as
    +    [`relu_op_test.py`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/kernel_tests/relu_op_test.py) as
         an example that tests the forward functions of Relu-like operators and
         their gradients.
     
    @@ -155,17 +155,17 @@ REGISTER_KERNEL_BUILDER(Name("ZeroOut").Device(DEVICE_CPU), ZeroOutOp);
     >   Important: Instances of your OpKernel may be accessed concurrently.
     >   Your `Compute` method must be thread-safe. Guard any access to class
     >   members with a mutex. Or better yet, don't share state via class members!
    ->   Consider using a [`ResourceMgr`](https://www.tensorflow.org/code/tensorflow/core/framework/resource_mgr.h)
    +>   Consider using a [`ResourceMgr`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/resource_mgr.h)
     >   to keep track of op state.
     
     ### Multi-threaded CPU kernels
     
     To write a multi-threaded CPU kernel, the Shard function in
    -[`work_sharder.h`](https://www.tensorflow.org/code/tensorflow/core/util/work_sharder.h)
    +[`work_sharder.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/util/work_sharder.h)
     can be used. This function shards a computation function across the
     threads configured to be used for intra-op threading (see
     intra_op_parallelism_threads in
    -[`config.proto`](https://www.tensorflow.org/code/tensorflow/core/protobuf/config.proto)).
    +[`config.proto`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/config.proto)).
     
     ### GPU kernels
     
    @@ -348,12 +348,13 @@ g++ -std=c++11 -shared zero_out.cc -o zero_out.so -fPIC ${TF_CFLAGS[@]} ${TF_LFL
     On macOS, the additional flag "-undefined dynamic_lookup" is required when
     building the `.so` file.
     
    ->   Note on `gcc` version `>=5`: gcc uses the new C++
    ->   [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`. The binary pip
    ->   packages available on the TensorFlow website are built with `gcc4` that uses
    ->   the older ABI. If you compile your op library with `gcc>=5`, add
    ->   `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to make the library
    ->   compatible with the older abi.
    +> Note on `gcc` version `>=5`: gcc uses the new C++
    +> [ABI](https://gcc.gnu.org/gcc-5/changes.html#libstdcxx) since version `5`.
    +> TensorFlow 2.8 and earlier were built with `gcc4` that uses the older ABI. If
    +> you are using these versions of TensorFlow and are trying to compile your op
    +> library with `gcc>=5`, add `-D_GLIBCXX_USE_CXX11_ABI=0` to the command line to
    +> make the library compatible with the older ABI. TensorFlow 2.9+ packages are
    +> compatible with the newer ABI by default.
     
     ### Compile the op using bazel (TensorFlow source installation)
     
    @@ -485,13 +486,13 @@ This asserts that the input is a vector, and returns having set the
     
     *   The `context`, which can either be an `OpKernelContext` or
         `OpKernelConstruction` pointer (see
    -    [`tensorflow/core/framework/op_kernel.h`](https://www.tensorflow.org/code/tensorflow/core/framework/op_kernel.h)),
    +    [`tensorflow/core/framework/op_kernel.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_kernel.h)),
         for its `SetStatus()` method.
     *   The condition.  For example, there are functions for validating the shape
         of a tensor in
    -    [`tensorflow/core/framework/tensor_shape.h`](https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.h)
    +    [`tensorflow/core/framework/tensor_shape.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor_shape.h)
     *   The error itself, which is represented by a `Status` object, see
    -    [`tensorflow/core/lib/core/status.h`](https://www.tensorflow.org/code/tensorflow/core/lib/core/status.h). A
    +    [`tensorflow/core/lib/core/status.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/lib/core/status.h). A
         `Status` has both a type (frequently `InvalidArgument`, but see the list of
         types) and a message.  Functions for constructing an error may be found in
         [`tensorflow/core/lib/core/errors.h`][validation-macros].
    @@ -632,7 +633,7 @@ define an attr with constraints, you can use the following ``s:
     
         The specific lists of types allowed by these are defined by the functions
         (like `NumberTypes()`) in
    -    [`tensorflow/core/framework/types.h`](https://www.tensorflow.org/code/tensorflow/core/framework/types.h).
    +    [`tensorflow/core/framework/types.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.h).
         In this example the attr `t` must be one of the numeric types:
     
         ```c++
    @@ -1179,7 +1180,7 @@ There are several ways to preserve backwards-compatibility.
        type into a list of varying types).
     
     The full list of safe and unsafe changes can be found in
    -[`tensorflow/core/framework/op_compatibility_test.cc`](https://www.tensorflow.org/code/tensorflow/core/framework/op_compatibility_test.cc).
    +[`tensorflow/core/framework/op_compatibility_test.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_compatibility_test.cc).
     If you cannot make your change to an operation backwards compatible, then create
     a new operation with a new name with the new semantics.
     
    @@ -1189,23 +1190,23 @@ callers.  The Python API may be kept compatible by careful changes in a
     hand-written Python wrapper, by keeping the old signature except possibly adding
     new optional arguments to the end.  Generally incompatible changes may only be
     made when TensorFlow changes major versions, and must conform to the
    -[`GraphDef` version semantics](../guide/version_compat.md#compatibility_of_graphs_and_checkpoints).
    +[`GraphDef` version semantics](../version_compat.md).
     
     ### GPU Support
     
     You can implement different OpKernels and register one for CPU and another for
     GPU, just like you can [register kernels for different types](#polymorphism).
     There are several examples of kernels with GPU support in
    -[`tensorflow/core/kernels/`](https://www.tensorflow.org/code/tensorflow/core/kernels/).
    +[`tensorflow/core/kernels/`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/).
     Notice some kernels have a CPU version in a `.cc` file, a GPU version in a file
     ending in `_gpu.cu.cc`, and some code shared in common in a `.h` file.
     
     For example, the `tf.pad` has
     everything but the GPU kernel in [`tensorflow/core/kernels/pad_op.cc`][pad_op].
     The GPU kernel is in
    -[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op_gpu.cu.cc),
    +[`tensorflow/core/kernels/pad_op_gpu.cu.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/pad_op_gpu.cu.cc),
     and the shared code is a templated class defined in
    -[`tensorflow/core/kernels/pad_op.h`](https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.h).
    +[`tensorflow/core/kernels/pad_op.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/pad_op.h).
     We organize the code this way for two reasons: it allows you to share common
     code among the CPU and GPU implementations, and it puts the GPU implementation
     into a separate file so that it can be compiled only by the GPU compiler.
    @@ -1226,16 +1227,16 @@ kept on the CPU, add a `HostMemory()` call to the kernel registration, e.g.:
     #### Compiling the kernel for the GPU device
     
     Look at
    -[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
    +[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
     for an example that uses a CUDA kernel to implement an op. The
     `tf_custom_op_library` accepts a `gpu_srcs` argument in which the list of source
     files containing the CUDA kernels (`*.cu.cc` files) can be specified. For use
     with a binary installation of TensorFlow, the CUDA kernels have to be compiled
     with NVIDIA's `nvcc` compiler. Here is the sequence of commands you can use to
     compile the
    -[cuda_op_kernel.cu.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
    +[cuda_op_kernel.cu.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/adding_an_op/cuda_op_kernel.cu.cc)
     and
    -[cuda_op_kernel.cc](https://www.tensorflow.org/code/tensorflow/examples/adding_an_op/cuda_op_kernel.cc)
    +[cuda_op_kernel.cc](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/adding_an_op/cuda_op_kernel.cc)
     into a single dynamically loadable library:
     
     ```bash
    @@ -1360,7 +1361,7 @@ be set to the first input's shape. If the output is selected by its index as in
     
     There are a number of common shape functions
     that apply to many ops, such as `shape_inference::UnchangedShape` which can be
    -found in [common_shape_fns.h](https://www.tensorflow.org/code/tensorflow/core/framework/common_shape_fns.h) and used as follows:
    +found in [common_shape_fns.h](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/common_shape_fns.h) and used as follows:
     
     ```c++
     REGISTER_OP("ZeroOut")
    @@ -1407,7 +1408,7 @@ provides access to the attributes of the op).
     
     Since shape inference is an optional feature, and the shapes of tensors may vary
     dynamically, shape functions must be robust to incomplete shape information for
    -any of the inputs. The `Merge` method in [`InferenceContext`](https://www.tensorflow.org/code/tensorflow/core/framework/shape_inference.h)
    +any of the inputs. The `Merge` method in [`InferenceContext`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/shape_inference.h)
     allows the caller to assert that two shapes are the same, even if either
     or both of them do not have complete information. Shape functions are defined
     for all of the core TensorFlow ops and provide many different usage examples.
    @@ -1432,7 +1433,7 @@ If you have a complicated shape function, you should consider adding a test for
     validating that various input shape combinations produce the expected output
     shape combinations.  You can see examples of how to write these tests in some
     our
    -[core ops tests](https://www.tensorflow.org/code/tensorflow/core/ops/array_ops_test.cc).
    +[core ops tests](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/array_ops_test.cc).
     (The syntax of `INFER_OK` and `INFER_ERROR` are a little cryptic, but try to be
     compact in representing input and output shape specifications in tests.  For
     now, see the surrounding comments in those tests to get a sense of the shape
    @@ -1445,20 +1446,20 @@ To build a `pip` package for your op, see the
     guide shows how to build custom ops from the TensorFlow pip package instead
     of building TensorFlow from source.
     
    -[core-array_ops]:https://www.tensorflow.org/code/tensorflow/core/ops/array_ops.cc
    -[python-user_ops]:https://www.tensorflow.org/code/tensorflow/python/user_ops/user_ops.py
    -[tf-kernels]:https://www.tensorflow.org/code/tensorflow/core/kernels/
    -[user_ops]:https://www.tensorflow.org/code/tensorflow/core/user_ops/
    -[pad_op]:https://www.tensorflow.org/code/tensorflow/core/kernels/pad_op.cc
    -[standard_ops-py]:https://www.tensorflow.org/code/tensorflow/python/ops/standard_ops.py
    -[standard_ops-cc]:https://www.tensorflow.org/code/tensorflow/cc/ops/standard_ops.h
    -[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD
    -[validation-macros]:https://www.tensorflow.org/code/tensorflow/core/lib/core/errors.h
    -[op_def_builder]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.h
    -[register_types]:https://www.tensorflow.org/code/tensorflow/core/framework/register_types.h
    -[FinalizeAttr]:https://www.tensorflow.org/code/tensorflow/core/framework/op_def_builder.cc
    -[DataTypeString]:https://www.tensorflow.org/code/tensorflow/core/framework/types.cc
    -[python-BUILD]:https://www.tensorflow.org/code/tensorflow/python/BUILD
    -[types-proto]:https://www.tensorflow.org/code/tensorflow/core/framework/types.proto
    -[TensorShapeProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor_shape.proto
    -[TensorProto]:https://www.tensorflow.org/code/tensorflow/core/framework/tensor.proto
    +[core-array_ops]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/ops/array_ops.cc
    +[python-user_ops]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/user_ops/user_ops.py
    +[tf-kernels]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/
    +[user_ops]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/user_ops/
    +[pad_op]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/kernels/pad_op.cc
    +[standard_ops-py]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/ops/standard_ops.py
    +[standard_ops-cc]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/ops/standard_ops.h
    +[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/BUILD
    +[validation-macros]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/lib/core/errors.h
    +[op_def_builder]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_def_builder.h
    +[register_types]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/register_types.h
    +[FinalizeAttr]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_def_builder.cc
    +[DataTypeString]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.cc
    +[python-BUILD]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/BUILD
    +[types-proto]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.proto
    +[TensorShapeProto]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor_shape.proto
    +[TensorProto]:https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor.proto
    diff --git a/site/en/r1/guide/feature_columns.md b/site/en/r1/guide/feature_columns.md
    index 5a4dfbbf46d..e4259f85e9f 100644
    --- a/site/en/r1/guide/feature_columns.md
    +++ b/site/en/r1/guide/feature_columns.md
    @@ -562,7 +562,7 @@ For more examples on feature columns, view the following:
     
     * The [Low Level Introduction](../guide/low_level_intro.md#feature_columns) demonstrates how
       experiment directly with `feature_columns` using TensorFlow's low level APIs.
    -* The [Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep)
    +* The [Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep)
       solves a binary classification problem using `feature_columns` on a variety of
       input data types.
     
    diff --git a/site/en/r1/guide/graph_viz.md b/site/en/r1/guide/graph_viz.md
    index 1965378e03e..1e3780e7928 100644
    --- a/site/en/r1/guide/graph_viz.md
    +++ b/site/en/r1/guide/graph_viz.md
    @@ -251,7 +251,7 @@ is a snippet from the train and test section of a modification of the
     [Estimators MNIST tutorial](../tutorials/estimators/cnn.md), in which we have
     recorded summaries and
     runtime statistics. See the
    -[Tensorboard](https://tensorflow.org/tensorboard)
    +[TensorBoard](https://tensorflow.org/tensorboard)
     for details on how to record summaries.
     Full source is [here](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/examples/tutorials/mnist/mnist_with_summaries.py).
     
    diff --git a/site/en/r1/guide/keras.ipynb b/site/en/r1/guide/keras.ipynb
    index 08a778b60a5..3a0cd8e55c5 100644
    --- a/site/en/r1/guide/keras.ipynb
    +++ b/site/en/r1/guide/keras.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -1211,8 +1211,7 @@
         "colab": {
           "collapsed_sections": [],
           "name": "keras.ipynb",
    -      "provenance": [],
    -      "toc_visible": true
    +            "toc_visible": true
         },
         "kernelspec": {
           "display_name": "Python 3",
    diff --git a/site/en/r1/guide/performance/benchmarks.md b/site/en/r1/guide/performance/benchmarks.md
    index 8998c0723db..a56959ea416 100644
    --- a/site/en/r1/guide/performance/benchmarks.md
    +++ b/site/en/r1/guide/performance/benchmarks.md
    @@ -401,7 +401,7 @@ GPUs | InceptionV3 (batch size 32) | ResNet-50 (batch size 32)
     ## Methodology
     
     This
    -[script](https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks)
    +[script](https://github.com/tensorflow/benchmarks/tree/r1.15/scripts/tf_cnn_benchmarks)
     was run on the various platforms to generate the above results.
     
     In order to create results that are as repeatable as possible, each test was run
    diff --git a/site/en/r1/guide/performance/overview.md b/site/en/r1/guide/performance/overview.md
    index af74f0f28c6..be7217f4b99 100644
    --- a/site/en/r1/guide/performance/overview.md
    +++ b/site/en/r1/guide/performance/overview.md
    @@ -19,9 +19,9 @@ Reading large numbers of small files significantly impacts I/O performance.
     One approach to get maximum I/O throughput is to preprocess input data into
     larger (~100MB) `TFRecord` files. For smaller data sets (200MB-1GB), the best
     approach is often to load the entire data set into memory. The document
    -[Downloading and converting to TFRecord format](https://github.com/tensorflow/models/tree/master/research/slim#downloading-and-converting-to-tfrecord-format)
    +[Downloading and converting to TFRecord format](https://github.com/tensorflow/models/tree/r1.15/research/slim#downloading-and-converting-to-tfrecord-format)
     includes information and scripts for creating `TFRecord`s, and this
    -[script](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10_estimator/generate_cifar10_tfrecords.py)
    +[script](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10_estimator/generate_cifar10_tfrecords.py)
     converts the CIFAR-10 dataset into `TFRecord`s.
     
     While feeding data using a `feed_dict` offers a high level of flexibility, in
    @@ -122,7 +122,7 @@ tf.Session(config=config)
     Intel® has added optimizations to TensorFlow for Intel® Xeon® and Intel® Xeon
     Phi™ through the use of the Intel® Math Kernel Library for Deep Neural Networks
     (Intel® MKL-DNN) optimized primitives. The optimizations also provide speedups
    -for the consumer line of processors, e.g. i5 and i7 Intel processors. The Intel
    +for the consumer line of processors, e.g., i5 and i7 Intel processors. The Intel
     published paper
     [TensorFlow* Optimizations on Modern Intel® Architecture](https://software.intel.com/en-us/articles/tensorflow-optimizations-on-modern-intel-architecture)
     contains additional details on the implementation.
    @@ -255,7 +255,7 @@ bazel build -c opt --copt=-march="broadwell" --config=cuda //tensorflow/tools/pi
       a docker container, the data is not cached and the penalty is paid each time
       TensorFlow starts. The best practice is to include the
       [compute capabilities](http://developer.nvidia.com/cuda-gpus)
    -  of the GPUs that will be used, e.g. P100: 6.0, Titan X (Pascal): 6.1,
    +  of the GPUs that will be used, e.g., P100: 6.0, Titan X (Pascal): 6.1,
       Titan X (Maxwell): 5.2, and K80: 3.7.
     * Use a version of `gcc` that supports all of the optimizations of the target
       CPU. The recommended minimum gcc version is 4.8.3. On macOS, upgrade to the
    diff --git a/site/en/r1/guide/ragged_tensors.ipynb b/site/en/r1/guide/ragged_tensors.ipynb
    index 61bce66ecfb..289d29ce82e 100644
    --- a/site/en/r1/guide/ragged_tensors.ipynb
    +++ b/site/en/r1/guide/ragged_tensors.ipynb
    @@ -57,7 +57,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -1010,7 +1010,7 @@
             "    `tf.RaggedTensor.values`\n",
             "    and\n",
             "    `tf.RaggedTensor.row_splits`\n",
    -        "    properties, or row-paritioning methods such as `tf.RaggedTensor.row_lengths()`\n",
    +        "    properties, or row-partitioning methods such as `tf.RaggedTensor.row_lengths()`\n",
             "    and `tf.RaggedTensor.value_rowids()`."
           ]
         },
    diff --git a/site/en/r1/guide/saved_model.md b/site/en/r1/guide/saved_model.md
    index 623863a9df9..34447ffe861 100644
    --- a/site/en/r1/guide/saved_model.md
    +++ b/site/en/r1/guide/saved_model.md
    @@ -23,7 +23,7 @@ TensorFlow saves variables in binary *checkpoint files* that map variable
     names to tensor values.
     
     Caution: TensorFlow model files are code. Be careful with untrusted code.
    -See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md)
    +See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/r1.15/SECURITY.md)
     for details.
     
     ### Save variables
    @@ -148,7 +148,7 @@ Notes:
        `tf.variables_initializer` for more information.
     
     *  To inspect the variables in a checkpoint, you can use the
    -   [`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py)
    +   [`inspect_checkpoint`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/tools/inspect_checkpoint.py)
        library, particularly the `print_tensors_in_checkpoint_file` function.
     
     *  By default, `Saver` uses the value of the `tf.Variable.name` property
    @@ -159,7 +159,7 @@ Notes:
     ### Inspect variables in a checkpoint
     
     We can quickly inspect variables in a checkpoint with the
    -[`inspect_checkpoint`](https://www.tensorflow.org/code/tensorflow/python/tools/inspect_checkpoint.py) library.
    +[`inspect_checkpoint`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/tools/inspect_checkpoint.py) library.
     
     Continuing from the save/restore examples shown earlier:
     
    @@ -216,7 +216,7 @@ simple_save(session,
     
     This configures the `SavedModel` so it can be loaded by
     [TensorFlow serving](https://www.tensorflow.org/tfx/tutorials/serving/rest_simple) and supports the
    -[Predict API](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/predict.proto).
    +[Predict API](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/predict.proto).
     To access the classify, regress, or multi-inference APIs, use the manual
     `SavedModel` builder APIs or an `tf.estimator.Estimator`.
     
    @@ -328,7 +328,7 @@ with tf.Session(graph=tf.Graph()) as sess:
     ### Load a SavedModel in C++
     
     The C++ version of the SavedModel
    -[loader](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/loader.h)
    +[loader](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/saved_model/loader.h)
     provides an API to load a SavedModel from a path, while allowing
     `SessionOptions` and `RunOptions`.
     You have to specify the tags associated with the graph to be loaded.
    @@ -383,20 +383,20 @@ reuse and share across tools consistently.
     You may use sets of tags to uniquely identify a `MetaGraphDef` saved in a
     SavedModel. A subset of commonly used tags is specified in:
     
    -* [Python](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/tag_constants.py)
    -* [C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/tag_constants.h)
    +* [Python](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/saved_model/tag_constants.py)
    +* [C++](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/saved_model/tag_constants.h)
     
     
     #### Standard SignatureDef constants
     
    -A [**SignatureDef**](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto)
    +A [**SignatureDef**](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/meta_graph.proto)
     is a protocol buffer that defines the signature of a computation
     supported by a graph.
     Commonly used input keys, output keys, and method names are
     defined in:
     
    -* [Python](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/saved_model/signature_constants.py)
    -* [C++](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/signature_constants.h)
    +* [Python](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/saved_model/signature_constants.py)
    +* [C++](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/cc/saved_model/signature_constants.h)
     
     ## Using SavedModel with Estimators
     
    @@ -408,7 +408,7 @@ To prepare a trained Estimator for serving, you must export it in the standard
     SavedModel format. This section explains how to:
     
     * Specify the output nodes and the corresponding
    -  [APIs](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto)
    +  [APIs](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/prediction_service.proto)
       that can be served (Classify, Regress, or Predict).
     * Export your model to the SavedModel format.
     * Serve the model from a local server and request predictions.
    @@ -506,7 +506,7 @@ Each `output` value must be an `ExportOutput` object  such as
     `tf.estimator.export.PredictOutput`.
     
     These output types map straightforwardly to the
    -[TensorFlow Serving APIs](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto),
    +[TensorFlow Serving APIs](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/prediction_service.proto),
     and so determine which request types will be honored.
     
     Note: In the multi-headed case, a `SignatureDef` will be generated for each
    @@ -515,7 +515,7 @@ the same keys.  These `SignatureDef`s differ only in their outputs, as
     provided by the corresponding `ExportOutput` entry.  The inputs are always
     those provided by the `serving_input_receiver_fn`.
     An inference request may specify the head by name.  One head must be named
    -using [`signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`](https://www.tensorflow.org/code/tensorflow/python/saved_model/signature_constants.py)
    +using [`signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/saved_model/signature_constants.py)
     indicating which `SignatureDef` will be served when an inference request
     does not specify one.
     
    @@ -566,9 +566,9 @@ Now you have a server listening for inference requests via gRPC on port 9000!
     ### Request predictions from a local server
     
     The server responds to gRPC requests according to the
    -[PredictionService](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto#L15)
    +[PredictionService](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis/prediction_service.proto#L15)
     gRPC API service definition.  (The nested protocol buffers are defined in
    -various [neighboring files](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis)).
    +various [neighboring files](https://github.com/tensorflow/serving/blob/r1.15/tensorflow_serving/apis)).
     
     From the API service definition, the gRPC framework generates client libraries
     in various languages providing remote access to the API.  In a project using the
    @@ -620,7 +620,7 @@ The returned result in this example is a `ClassificationResponse` protocol
     buffer.
     
     This is a skeletal example; please see the [Tensorflow Serving](../deploy/index.md)
    -documentation and [examples](https://github.com/tensorflow/serving/tree/master/tensorflow_serving/example)
    +documentation and [examples](https://github.com/tensorflow/serving/tree/r1.15/tensorflow_serving/example)
     for more details.
     
     > Note: `ClassificationRequest` and `RegressionRequest` contain a
    diff --git a/site/en/r1/guide/using_tpu.md b/site/en/r1/guide/using_tpu.md
    index 74169092189..e3e338adf49 100644
    --- a/site/en/r1/guide/using_tpu.md
    +++ b/site/en/r1/guide/using_tpu.md
    @@ -7,8 +7,8 @@ changing the *hardware accelerator* in your notebook settings:
     TPU-enabled Colab notebooks are available to test:
     
       1. [A quick test, just to measure FLOPS](https://colab.research.google.com/notebooks/tpu.ipynb).
    -  2. [A CNN image classifier with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/fashion_mnist.ipynb).
    -  3. [An LSTM markov chain text generator with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/shakespeare_with_tpu_and_keras.ipynb)
    +  2. [A CNN image classifier with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/r1.15/tools/colab/fashion_mnist.ipynb).
    +  3. [An LSTM markov chain text generator with `tf.keras`](https://colab.research.google.com/github/tensorflow/tpu/blob/r1.15/tools/colab/shakespeare_with_tpu_and_keras.ipynb)
     
     ## TPUEstimator
     
    @@ -25,7 +25,7 @@ Cloud TPU is to define the model's inference phase (from inputs to predictions)
     outside of the `model_fn`. Then maintain separate implementations of the
     `Estimator` setup and `model_fn`, both wrapping this inference step. For an
     example of this pattern compare the `mnist.py` and `mnist_tpu.py` implementation in
    -[tensorflow/models](https://github.com/tensorflow/models/tree/master/official/r1/mnist).
    +[tensorflow/models](https://github.com/tensorflow/models/tree/r1.15/official/r1/mnist).
     
     ### Run a TPUEstimator locally
     
    @@ -350,10 +350,10 @@ in bytes. A minimum of a few MB (`buffer_size=8*1024*1024`) is recommended so
     that data is available when needed.
     
     The TPU-demos repo includes
    -[a script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py)
    +[a script](https://github.com/tensorflow/tpu/blob/1.15/tools/datasets/imagenet_to_gcs.py)
     for downloading the imagenet dataset and converting it to an appropriate format.
     This together with the imagenet
    -[models](https://github.com/tensorflow/tpu/tree/master/models)
    +[models](https://github.com/tensorflow/tpu/tree/r1.15/models)
     included in the repo demonstrate all of these best-practices.
     
     ## Next steps
    diff --git a/site/en/r1/guide/version_compat.md b/site/en/r1/guide/version_compat.md
    index 6702f6e0819..a765620518d 100644
    --- a/site/en/r1/guide/version_compat.md
    +++ b/site/en/r1/guide/version_compat.md
    @@ -49,19 +49,19 @@ patch versions.  The public APIs consist of
       submodules, but is not documented, then it is **not** considered part of the
       public API.
     
    -* The [C API](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/c/c_api.h).
    +* The [C API](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/c/c_api.h).
     
     * The following protocol buffer files:
    -    * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/attr_value.proto)
    -    * [`config`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/config.proto)
    -    * [`event`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/event.proto)
    -    * [`graph`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/graph.proto)
    -    * [`op_def`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/op_def.proto)
    -    * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/reader_base.proto)
    -    * [`summary`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/summary.proto)
    -    * [`tensor`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor.proto)
    -    * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/tensor_shape.proto)
    -    * [`types`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/types.proto)
    +    * [`attr_value`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/attr_value.proto)
    +    * [`config`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/protobuf/config.proto)
    +    * [`event`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/util/event.proto)
    +    * [`graph`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/graph.proto)
    +    * [`op_def`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/op_def.proto)
    +    * [`reader_base`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/reader_base.proto)
    +    * [`summary`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/summary.proto)
    +    * [`tensor`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor.proto)
    +    * [`tensor_shape`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/tensor_shape.proto)
    +    * [`types`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/types.proto)
     
     
     ## What is *not* covered
    @@ -79,7 +79,7 @@ backward incompatible ways between minor releases. These include:
         such as:
     
       - [C++](./extend/cc.md) (exposed through header files in
    -    [`tensorflow/cc`](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/cc)).
    +    [`tensorflow/cc`](https://github.com/tensorflow/tensorflow/tree/r1.15/tensorflow/cc)).
       - [Java](../api_docs/java/reference/org/tensorflow/package-summary),
       - [Go](https://pkg.go.dev/github.com/tensorflow/tensorflow/tensorflow/go)
       - [JavaScript](https://js.tensorflow.org)
    @@ -209,7 +209,7 @@ guidelines for evolving `GraphDef` versions.
     There are different data versions for graphs and checkpoints. The two data
     formats evolve at different rates from each other and also at different rates
     from TensorFlow. Both versioning systems are defined in
    -[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h).
    +[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/public/version.h).
     Whenever a new version is added, a note is added to the header detailing what
     changed and the date.
     
    @@ -224,7 +224,7 @@ We distinguish between the following kinds of data version information:
       (`min_producer`).
     
     Each piece of versioned data has a [`VersionDef
    -versions`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/framework/versions.proto)
    +versions`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/framework/versions.proto)
     field which records the `producer` that made the data, the `min_consumer`
     that it is compatible with, and a list of `bad_consumers` versions that are
     disallowed.
    @@ -239,7 +239,7 @@ accept a piece of data if the following are all true:
     *   `consumer` not in data's `bad_consumers`
     
     Since both producers and consumers come from the same TensorFlow code base,
    -[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/public/version.h)
    +[`core/public/version.h`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/public/version.h)
     contains a main data version which is treated as either `producer` or
     `consumer` depending on context and both `min_consumer` and `min_producer`
     (needed by producers and consumers, respectively). Specifically,
    @@ -309,7 +309,7 @@ existing producer scripts will not suddenly use the new functionality.
     1.  Add a new similar op named `SomethingV2` or similar and go through the
         process of adding it and switching existing Python wrappers to use it.
         To ensure forward compatibility use the checks suggested in
    -    [compat.py](https://www.tensorflow.org/code/tensorflow/python/compat/compat.py)
    +    [compat.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/compat/compat.py)
         when changing the Python wrappers.
     2.  Remove the old op (Can only take place with a major version change due to
         backward compatibility).
    diff --git a/site/en/r1/tutorials/README.md b/site/en/r1/tutorials/README.md
    index 5094e645e6e..9ff164ad77c 100644
    --- a/site/en/r1/tutorials/README.md
    +++ b/site/en/r1/tutorials/README.md
    @@ -68,4 +68,4 @@ implement common ML algorithms. See the
     * [Boosted trees](./estimators/boosted_trees.ipynb)
     * [Gradient Boosted Trees: Model understanding](./estimators/boosted_trees_model_understanding.ipynb)
     * [Build a Convolutional Neural Network using Estimators](./estimators/cnn.ipynb)
    -* [Wide and deep learning with Estimators](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep)
    +* [Wide and deep learning with Estimators](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep)
    diff --git a/site/en/r1/tutorials/_index.ipynb b/site/en/r1/tutorials/_index.ipynb
    index e2fe960d125..eca1450964f 100644
    --- a/site/en/r1/tutorials/_index.ipynb
    +++ b/site/en/r1/tutorials/_index.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/distribute/keras.ipynb b/site/en/r1/tutorials/distribute/keras.ipynb
    index b8d3c87bfab..14e8bf739a9 100644
    --- a/site/en/r1/tutorials/distribute/keras.ipynb
    +++ b/site/en/r1/tutorials/distribute/keras.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -86,7 +86,7 @@
             "Essentially, it copies all of the model's variables to each processor.\n",
             "Then, it uses [all-reduce](http://mpitutorial.com/tutorials/mpi-reduce-and-allreduce/) to combine the gradients from all processors and applies the combined value to all copies of the model.\n",
             "\n",
    -        "`MirroredStategy` is one of several distribution strategy available in TensorFlow core. You can read about more strategies at [distribution strategy guide](../../guide/distribute_strategy.ipynb).\n"
    +        "`MirroredStrategy` is one of several distribution strategy available in TensorFlow core. You can read about more strategies at [distribution strategy guide](../../guide/distribute_strategy.ipynb).\n"
           ]
         },
         {
    @@ -345,7 +345,7 @@
           "source": [
             "The callbacks used here are:\n",
             "\n",
    -        "*   *Tensorboard*: This callback writes a log for Tensorboard which allows you to visualize the graphs.\n",
    +        "*   *TensorBoard*: This callback writes a log for TensorBoard which allows you to visualize the graphs.\n",
             "*   *Model Checkpoint*: This callback saves the model after every epoch.\n",
             "*   *Learning Rate Scheduler*: Using this callback, you can schedule the learning rate to change after every epoch/batch.\n",
             "\n",
    @@ -554,7 +554,7 @@
           },
           "outputs": [],
           "source": [
    -        "tf.keras.experimental.export_saved_model(model, path)"
    +        "model.save(path)"
           ]
         },
         {
    @@ -574,7 +574,7 @@
           },
           "outputs": [],
           "source": [
    -        "unreplicated_model = tf.keras.experimental.load_from_saved_model(path)\n",
    +        "unreplicated_model = tf.keras.models.load_model(path)\n",
             "\n",
             "unreplicated_model.compile(\n",
             "    loss='sparse_categorical_crossentropy',\n",
    diff --git a/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb b/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb
    index 6d09d2623de..c61f893ca4c 100644
    --- a/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb
    +++ b/site/en/r1/tutorials/distribute/tpu_custom_training.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/distribute/training_loops.ipynb b/site/en/r1/tutorials/distribute/training_loops.ipynb
    index 1343e8c8b6b..8eb72c13030 100644
    --- a/site/en/r1/tutorials/distribute/training_loops.ipynb
    +++ b/site/en/r1/tutorials/distribute/training_loops.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/eager/automatic_differentiation.ipynb b/site/en/r1/tutorials/eager/automatic_differentiation.ipynb
    index bbbb689a617..df843bac3b8 100644
    --- a/site/en/r1/tutorials/eager/automatic_differentiation.ipynb
    +++ b/site/en/r1/tutorials/eager/automatic_differentiation.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/eager/custom_layers.ipynb b/site/en/r1/tutorials/eager/custom_layers.ipynb
    index c82458cb857..48b55ed943e 100644
    --- a/site/en/r1/tutorials/eager/custom_layers.ipynb
    +++ b/site/en/r1/tutorials/eager/custom_layers.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -230,7 +230,7 @@
           "source": [
             "## Models: composing layers\n",
             "\n",
    -        "Many interesting layer-like things in machine learning models are implemented by composing existing layers. For example, each residual block in a resnet is a composition of convolutions, batch normalizations, and a shortcut.\n",
    +        "Many interesting layer-like things in machine learning models are implemented by composing existing layers. For example, each residual block in a ResNet is a composition of convolutions, batch normalizations, and a shortcut.\n",
             "\n",
             "The main class used when creating a layer-like thing which contains other layers is tf.keras.Model. Implementing one is done by inheriting from tf.keras.Model."
           ]
    diff --git a/site/en/r1/tutorials/eager/custom_training.ipynb b/site/en/r1/tutorials/eager/custom_training.ipynb
    index 72beefe89ad..f0f7faffa7f 100644
    --- a/site/en/r1/tutorials/eager/custom_training.ipynb
    +++ b/site/en/r1/tutorials/eager/custom_training.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb b/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb
    index a4839429827..3989f3e44bc 100644
    --- a/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb
    +++ b/site/en/r1/tutorials/eager/custom_training_walkthrough.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/eager/eager_basics.ipynb b/site/en/r1/tutorials/eager/eager_basics.ipynb
    index 90d7c02f18d..acd00ec2e20 100644
    --- a/site/en/r1/tutorials/eager/eager_basics.ipynb
    +++ b/site/en/r1/tutorials/eager/eager_basics.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/images/deep_cnn.md b/site/en/r1/tutorials/images/deep_cnn.md
    index 00a914d8976..885f3907aa7 100644
    --- a/site/en/r1/tutorials/images/deep_cnn.md
    +++ b/site/en/r1/tutorials/images/deep_cnn.md
    @@ -80,15 +80,15 @@ for details.  It consists of 1,068,298 learnable parameters and requires about
     ## Code Organization
     
     The code for this tutorial resides in
    -[`models/tutorials/image/cifar10/`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/).
    +[`models/tutorials/image/cifar10/`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/).
     
     File | Purpose
     --- | ---
    -[`cifar10_input.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_input.py) | Loads CIFAR-10 dataset using [tensorflow-datasets library](https://github.com/tensorflow/datasets).
    -[`cifar10.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model.
    -[`cifar10_train.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU.
    -[`cifar10_multi_gpu_train.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs.
    -[`cifar10_eval.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model.
    +[`cifar10_input.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_input.py) | Loads CIFAR-10 dataset using [tensorflow-datasets library](https://github.com/tensorflow/datasets).
    +[`cifar10.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10.py) | Builds the CIFAR-10 model.
    +[`cifar10_train.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_train.py) | Trains a CIFAR-10 model on a CPU or GPU.
    +[`cifar10_multi_gpu_train.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_multi_gpu_train.py) | Trains a CIFAR-10 model on multiple GPUs.
    +[`cifar10_eval.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10_eval.py) | Evaluates the predictive performance of a CIFAR-10 model.
     
     To run this tutorial, you will need to:
     
    @@ -99,7 +99,7 @@ pip install tensorflow-datasets
     ## CIFAR-10 Model
     
     The CIFAR-10 network is largely contained in
    -[`cifar10.py`](https://github.com/tensorflow/models/tree/master/research/tutorials/image/cifar10/cifar10.py).
    +[`cifar10.py`](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/image/cifar10/cifar10.py).
     The complete training
     graph contains roughly 765 operations. We find that we can make the code most
     reusable by constructing the graph with the following modules:
    @@ -108,7 +108,7 @@ reusable by constructing the graph with the following modules:
     operations that read and preprocess CIFAR images for evaluation and training,
     respectively.
     1. [**Model prediction:**](#model-prediction) `inference()`
    -adds operations that perform inference, i.e. classification, on supplied images.
    +adds operations that perform inference, i.e., classification, on supplied images.
     1. [**Model training:**](#model-training) `loss()` and `train()`
     add operations that compute the loss,
     gradients, variable updates and visualization summaries.
    @@ -405,7 +405,7 @@ a "tower". We must set two attributes for each tower:
     * A unique name for all operations within a tower.
     `tf.name_scope` provides
     this unique name by prepending a scope. For instance, all operations in
    -the first tower are prepended with `tower_0`, e.g. `tower_0/conv1/Conv2D`.
    +the first tower are prepended with `tower_0`, e.g., `tower_0/conv1/Conv2D`.
     
     * A preferred hardware device to run the operation within a tower.
     `tf.device` specifies this. For
    diff --git a/site/en/r1/tutorials/images/hub_with_keras.ipynb b/site/en/r1/tutorials/images/hub_with_keras.ipynb
    index ece9c0fa4a9..f4e683e8936 100644
    --- a/site/en/r1/tutorials/images/hub_with_keras.ipynb
    +++ b/site/en/r1/tutorials/images/hub_with_keras.ipynb
    @@ -60,7 +60,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -841,7 +841,7 @@
             "t = time.time()\n",
             "\n",
             "export_path = \"/tmp/saved_models/{}\".format(int(t))\n",
    -        "tf.keras.experimental.export_saved_model(model, export_path)\n",
    +        "model.save(export_path)\n",
             "\n",
             "export_path"
           ]
    @@ -863,7 +863,7 @@
           },
           "outputs": [],
           "source": [
    -        "reloaded = tf.keras.experimental.load_from_saved_model(export_path, custom_objects={'KerasLayer':hub.KerasLayer})"
    +        "reloaded = tf.keras.models.load_model(export_path, custom_objects={'KerasLayer':hub.KerasLayer})"
           ]
         },
         {
    diff --git a/site/en/r1/tutorials/images/image_recognition.md b/site/en/r1/tutorials/images/image_recognition.md
    index 0be884de403..cb66e594629 100644
    --- a/site/en/r1/tutorials/images/image_recognition.md
    +++ b/site/en/r1/tutorials/images/image_recognition.md
    @@ -140,13 +140,13 @@ score of 0.8.
       
     
     
    -Next, try it out on your own images by supplying the --image= argument, e.g.
    +Next, try it out on your own images by supplying the --image= argument, e.g.,
     
     ```bash
     bazel-bin/tensorflow/examples/label_image/label_image --image=my_image.png
     ```
     
    -If you look inside the [`tensorflow/examples/label_image/main.cc`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc)
    +If you look inside the [`tensorflow/examples/label_image/main.cc`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/label_image/main.cc)
     file, you can find out
     how it works. We hope this code will help you integrate TensorFlow into
     your own applications, so we will walk step by step through the main functions:
    @@ -164,7 +164,7 @@ training. If you have a graph that you've trained yourself, you'll just need
     to adjust the values to match whatever you used during your training process.
     
     You can see how they're applied to an image in the
    -[`ReadTensorFromImageFile()`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc#L88)
    +[`ReadTensorFromImageFile()`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/label_image/main.cc#L88)
     function.
     
     ```C++
    @@ -334,7 +334,7 @@ The `PrintTopLabels()` function takes those sorted results, and prints them out
     friendly way. The `CheckTopLabel()` function is very similar, but just makes sure that
     the top label is the one we expect, for debugging purposes.
     
    -At the end, [`main()`](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/label_image/main.cc#L252)
    +At the end, [`main()`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/label_image/main.cc#L252)
     ties together all of these calls.
     
     ```C++
    diff --git a/site/en/r1/tutorials/images/transfer_learning.ipynb b/site/en/r1/tutorials/images/transfer_learning.ipynb
    index c695da4ebb7..232ffa40bdd 100644
    --- a/site/en/r1/tutorials/images/transfer_learning.ipynb
    +++ b/site/en/r1/tutorials/images/transfer_learning.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -148,7 +148,7 @@
           },
           "outputs": [],
           "source": [
    -        "zip_file = tf.keras.utils.get_file(origin=\"https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip/",\n",
    +        "zip_file = tf.keras.utils.get_file(origin=\"https://download.mlcc.google.com/mledu-datasets/cats_and_dogs_filtered.zip/",\n",
             "                                   fname=\"cats_and_dogs_filtered.zip\", extract=True)\n",
             "base_dir, _ = os.path.splitext(zip_file)"
           ]
    diff --git a/site/en/r1/tutorials/keras/basic_classification.ipynb b/site/en/r1/tutorials/keras/basic_classification.ipynb
    index be7f5e9e8b1..14950538ce4 100644
    --- a/site/en/r1/tutorials/keras/basic_classification.ipynb
    +++ b/site/en/r1/tutorials/keras/basic_classification.ipynb
    @@ -96,7 +96,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/keras/basic_regression.ipynb b/site/en/r1/tutorials/keras/basic_regression.ipynb
    index 7d9cb711efa..4bffd62f982 100644
    --- a/site/en/r1/tutorials/keras/basic_regression.ipynb
    +++ b/site/en/r1/tutorials/keras/basic_regression.ipynb
    @@ -96,7 +96,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/keras/basic_text_classification.ipynb b/site/en/r1/tutorials/keras/basic_text_classification.ipynb
    index 0303d54d973..5424185bcbd 100644
    --- a/site/en/r1/tutorials/keras/basic_text_classification.ipynb
    +++ b/site/en/r1/tutorials/keras/basic_text_classification.ipynb
    @@ -96,7 +96,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb b/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb
    index a8f266f9869..8e35b06e556 100644
    --- a/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb
    +++ b/site/en/r1/tutorials/keras/overfit_and_underfit.ipynb
    @@ -96,7 +96,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/keras/save_and_restore_models.ipynb b/site/en/r1/tutorials/keras/save_and_restore_models.ipynb
    index 7911e37e139..04cc94417a9 100644
    --- a/site/en/r1/tutorials/keras/save_and_restore_models.ipynb
    +++ b/site/en/r1/tutorials/keras/save_and_restore_models.ipynb
    @@ -96,7 +96,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -115,7 +115,7 @@
             "\n",
             "Sharing this data helps others understand how the model works and try it themselves with new data.\n",
             "\n",
    -        "Caution: Be careful with untrusted code—TensorFlow models are code. See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md) for details.\n",
    +        "Caution: Be careful with untrusted code—TensorFlow models are code. See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/r1.15/SECURITY.md) for details.\n",
             "\n",
             "### Options\n",
             "\n",
    @@ -698,7 +698,7 @@
             "id": "B7qfpvpY9HCe"
           },
           "source": [
    -        "Load the the saved model."
    +        "Load the saved model."
           ]
         },
         {
    diff --git a/site/en/r1/tutorials/load_data/images.ipynb b/site/en/r1/tutorials/load_data/images.ipynb
    index dbee204323b..923b95130d1 100644
    --- a/site/en/r1/tutorials/load_data/images.ipynb
    +++ b/site/en/r1/tutorials/load_data/images.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/load_data/tf_records.ipynb b/site/en/r1/tutorials/load_data/tf_records.ipynb
    index 8b57d3f2f1e..45635034c69 100644
    --- a/site/en/r1/tutorials/load_data/tf_records.ipynb
    +++ b/site/en/r1/tutorials/load_data/tf_records.ipynb
    @@ -57,7 +57,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -141,7 +141,7 @@
           "source": [
             "Fundamentally a `tf.Example` is a `{\"string\": tf.train.Feature}` mapping.\n",
             "\n",
    -        "The `tf.train.Feature` message type can accept one of the following three types (See the [`.proto` file](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto) for reference). Most other generic types can be coerced into one of these.\n",
    +        "The `tf.train.Feature` message type can accept one of the following three types (See the [`.proto` file](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/feature.proto) for reference). Most other generic types can be coerced into one of these.\n",
             "\n",
             "1. `tf.train.BytesList` (the following types can be coerced)\n",
             "\n",
    @@ -276,7 +276,7 @@
             "\n",
             "1. We create a map (dictionary) from the feature name string to the encoded feature value produced in #1.\n",
             "\n",
    -        "1. The map produced in #2 is converted to a [`Features` message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/feature.proto#L85)."
    +        "1. The map produced in #2 is converted to a [`Features` message](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/feature.proto#L85)."
           ]
         },
         {
    @@ -365,7 +365,7 @@
             "id": "XftzX9CN_uGT"
           },
           "source": [
    -        "For example, suppose we have a single observation from the dataset, `[False, 4, bytes('goat'), 0.9876]`. We can create and print the `tf.Example` message for this observation using `create_message()`. Each single observation will be written as a `Features` message as per the above. Note that the `tf.Example` [message](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/example/example.proto#L88) is just a wrapper around the `Features` message."
    +        "For example, suppose we have a single observation from the dataset, `[False, 4, bytes('goat'), 0.9876]`. We can create and print the `tf.Example` message for this observation using `create_message()`. Each single observation will be written as a `Features` message as per the above. Note that the `tf.Example` [message](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto#L88) is just a wrapper around the `Features` message."
           ]
         },
         {
    @@ -632,7 +632,7 @@
           "source": [
             "We can also read the TFRecord file using the `tf.data.TFRecordDataset` class.\n",
             "\n",
    -        "More information on consuming TFRecord files using `tf.data` can be found [here](https://www.tensorflow.org/r1/guide/datasets#consuming_tfrecord_data).\n",
    +        "More information on consuming TFRecord files using `tf.data` can be found [here](https://www.tensorflow.org/guide/data#consuming_tfrecord_data).\n",
             "\n",
             "Using `TFRecordDataset`s can be useful for standardizing input data and optimizing performance."
           ]
    diff --git a/site/en/r1/tutorials/non-ml/mandelbrot.ipynb b/site/en/r1/tutorials/non-ml/mandelbrot.ipynb
    index 88177211896..bca8a142be4 100644
    --- a/site/en/r1/tutorials/non-ml/mandelbrot.ipynb
    +++ b/site/en/r1/tutorials/non-ml/mandelbrot.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/non-ml/pdes.ipynb b/site/en/r1/tutorials/non-ml/pdes.ipynb
    index d2646daa8da..832fa450523 100644
    --- a/site/en/r1/tutorials/non-ml/pdes.ipynb
    +++ b/site/en/r1/tutorials/non-ml/pdes.ipynb
    @@ -64,7 +64,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    diff --git a/site/en/r1/tutorials/representation/kernel_methods.md b/site/en/r1/tutorials/representation/kernel_methods.md
    index 67adc4951c6..227fe81d515 100644
    --- a/site/en/r1/tutorials/representation/kernel_methods.md
    +++ b/site/en/r1/tutorials/representation/kernel_methods.md
    @@ -24,7 +24,7 @@ following sources for an introduction:
     Currently, TensorFlow supports explicit kernel mappings for dense features only;
     TensorFlow will provide support for sparse features at a later release.
     
    -This tutorial uses [tf.contrib.learn](https://www.tensorflow.org/code/tensorflow/contrib/learn/python/learn)
    +This tutorial uses [tf.contrib.learn](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/contrib/learn/python/learn)
     (TensorFlow's high-level Machine Learning API) Estimators for our ML models.
     If you are not familiar with this API, The [Estimator guide](../../guide/estimators.md)
     is a good place to start. We will use the MNIST dataset. The tutorial consists
    @@ -131,7 +131,7 @@ In addition to experimenting with the (training) batch size and the number of
     training steps, there are a couple other parameters that can be tuned as well.
     For instance, you can change the optimization method used to minimize the loss
     by explicitly selecting another optimizer from the collection of
    -[available optimizers](https://www.tensorflow.org/code/tensorflow/python/training).
    +[available optimizers](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/python/training).
     As an example, the following code constructs a LinearClassifier estimator that
     uses the Follow-The-Regularized-Leader (FTRL) optimization strategy with a
     specific learning rate and L2-regularization.
    diff --git a/site/en/r1/tutorials/representation/linear.md b/site/en/r1/tutorials/representation/linear.md
    index 5516672b34a..d996a13bc1f 100644
    --- a/site/en/r1/tutorials/representation/linear.md
    +++ b/site/en/r1/tutorials/representation/linear.md
    @@ -12,7 +12,7 @@ those tools. It explains:
     
     Read this overview to decide whether the Estimator's linear model tools  might
     be useful to you. Then work through the
    -[Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep)
    +[Estimator wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep)
     to give it a try. This overview uses code samples from the tutorial, but the
     tutorial walks through the code in greater detail.
     
    @@ -177,7 +177,7 @@ the name of a `FeatureColumn`. Each key's value is a tensor containing the
     values of that feature for all data instances. See
     [Premade Estimators](../../guide/premade_estimators.md#input_fn) for a
     more comprehensive look at input functions, and `input_fn` in the
    -[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep)
    +[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep)
     for an example implementation of an input function.
     
     The input function is passed to the `train()` and `evaluate()` calls that
    @@ -236,4 +236,4 @@ e = tf.estimator.DNNLinearCombinedClassifier(
         dnn_hidden_units=[100, 50])
     ```
     For more information, see the
    -[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/master/official/r1/wide_deep).
    +[wide and deep learning tutorial](https://github.com/tensorflow/models/tree/r1.15/official/r1/wide_deep).
    diff --git a/site/en/r1/tutorials/representation/unicode.ipynb b/site/en/r1/tutorials/representation/unicode.ipynb
    index 6762a483a42..f76977c3c92 100644
    --- a/site/en/r1/tutorials/representation/unicode.ipynb
    +++ b/site/en/r1/tutorials/representation/unicode.ipynb
    @@ -57,7 +57,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -136,7 +136,7 @@
             "id": "jsMPnjb6UDJ1"
           },
           "source": [
    -        "Note: When using python to construct strings, the handling of unicode differs betweeen v2 and v3. In v2, unicode strings are indicated by the \"u\" prefix, as above. In v3, strings are unicode-encoded by default."
    +        "Note: When using python to construct strings, the handling of unicode differs between v2 and v3. In v2, unicode strings are indicated by the \"u\" prefix, as above. In v3, strings are unicode-encoded by default."
           ]
         },
         {
    @@ -425,7 +425,7 @@
           "source": [
             "### Character substrings\n",
             "\n",
    -        "Similarly, the `tf.strings.substr` operation accepts the \"`unit`\" parameter, and uses it to determine what kind of offsets the \"`pos`\" and \"`len`\" paremeters contain."
    +        "Similarly, the `tf.strings.substr` operation accepts the \"`unit`\" parameter, and uses it to determine what kind of offsets the \"`pos`\" and \"`len`\" parameters contain."
           ]
         },
         {
    @@ -587,7 +587,7 @@
             "id": "CapnbShuGU8i"
           },
           "source": [
    -        "First, we decode the sentences into character codepoints, and find the script identifeir for each character."
    +        "First, we decode the sentences into character codepoints, and find the script identifier for each character."
           ]
         },
         {
    diff --git a/site/en/r1/tutorials/representation/word2vec.md b/site/en/r1/tutorials/representation/word2vec.md
    index f6a27c68f3c..517a5dbc5c5 100644
    --- a/site/en/r1/tutorials/representation/word2vec.md
    +++ b/site/en/r1/tutorials/representation/word2vec.md
    @@ -36,7 +36,7 @@ like to get your hands dirty with the details.
     
     Image and audio processing systems work with rich, high-dimensional datasets
     encoded as vectors of the individual raw pixel-intensities for image data, or
    -e.g. power spectral density coefficients for audio data. For tasks like object
    +e.g., power spectral density coefficients for audio data. For tasks like object
     or speech recognition we know that all the information required to successfully
     perform the task is encoded in the data (because humans can perform these tasks
     from the raw data).  However, natural language processing systems traditionally
    @@ -109,7 +109,7 @@ $$
     where \\(\text{score}(w_t, h)\\) computes the compatibility of word \\(w_t\\)
     with the context \\(h\\) (a dot product is commonly used). We train this model
     by maximizing its [log-likelihood](https://en.wikipedia.org/wiki/Likelihood_function)
    -on the training set, i.e. by maximizing
    +on the training set, i.e., by maximizing
     
     $$
     \begin{align}
    @@ -176,7 +176,7 @@ As an example, let's consider the dataset
     We first form a dataset of words and the contexts in which they appear. We
     could define 'context' in any way that makes sense, and in fact people have
     looked at syntactic contexts (i.e. the syntactic dependents of the current
    -target word, see e.g.
    +target word, see e.g.,
     [Levy et al.](https://levyomer.files.wordpress.com/2014/04/dependency-based-word-embeddings-acl-2014.pdf)),
     words-to-the-left of the target, words-to-the-right of the target, etc. For now,
     let's stick to the vanilla definition and define 'context' as the window
    @@ -204,7 +204,7 @@ where the goal is to predict `the` from `quick`. We select `num_noise` number
     of noisy (contrastive) examples by drawing from some noise distribution,
     typically the unigram distribution, \\(P(w)\\). For simplicity let's say
     `num_noise=1` and we select `sheep` as a noisy example. Next we compute the
    -loss for this pair of observed and noisy examples, i.e. the objective at time
    +loss for this pair of observed and noisy examples, i.e., the objective at time
     step \\(t\\) becomes
     
     $$J^{(t)}_\text{NEG} = \log Q_\theta(D=1 | \text{the, quick}) +
    @@ -212,7 +212,7 @@ $$J^{(t)}_\text{NEG} = \log Q_\theta(D=1 | \text{the, quick}) +
     
     The goal is to make an update to the embedding parameters \\(\theta\\) to improve
     (in this case, maximize) this objective function.  We do this by deriving the
    -gradient of the loss with respect to the embedding parameters \\(\theta\\), i.e.
    +gradient of the loss with respect to the embedding parameters \\(\theta\\), i.e.,
     \\(\frac{\partial}{\partial \theta} J_\text{NEG}\\) (luckily TensorFlow provides
     easy helper functions for doing this!). We then perform an update to the
     embeddings by taking a small step in the direction of the gradient. When this
    @@ -227,7 +227,7 @@ When we inspect these visualizations it becomes apparent that the vectors
     capture some general, and in fact quite useful, semantic information about
     words and their relationships to one another. It was very interesting when we
     first discovered that certain directions in the induced vector space specialize
    -towards certain semantic relationships, e.g. *male-female*, *verb tense* and
    +towards certain semantic relationships, e.g., *male-female*, *verb tense* and
     even *country-capital* relationships between words, as illustrated in the figure
     below (see also for example
     [Mikolov et al., 2013](https://www.aclweb.org/anthology/N13-1090)).
    @@ -327,7 +327,7 @@ for inputs, labels in generate_batch(...):
     ```
     
     See the full example code in
    -[tensorflow/examples/tutorials/word2vec/word2vec_basic.py](https://www.tensorflow.org/code/tensorflow/examples/tutorials/word2vec/word2vec_basic.py).
    +[tensorflow/examples/tutorials/word2vec/word2vec_basic.py](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/examples/tutorials/word2vec/word2vec_basic.py).
     
     ## Visualizing the learned embeddings
     
    @@ -341,7 +341,7 @@ t-SNE.
     Et voila! As expected, words that are similar end up clustering nearby each
     other. For a more heavyweight implementation of word2vec that showcases more of
     the advanced features of TensorFlow, see the implementation in
    -[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/research/tutorials/embedding/word2vec.py).
    +[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/embedding/word2vec.py).
     
     ## Evaluating embeddings: analogical reasoning
     
    @@ -357,7 +357,7 @@ Download the dataset for this task from
     
     To see how we do this evaluation, have a look at the `build_eval_graph()` and
     `eval()` functions in
    -[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/master/research/tutorials/embedding/word2vec.py).
    +[models/tutorials/embedding/word2vec.py](https://github.com/tensorflow/models/tree/r1.15/research/tutorials/embedding/word2vec.py).
     
     The choice of hyperparameters can strongly influence the accuracy on this task.
     To achieve state-of-the-art performance on this task requires training over a
    diff --git a/site/en/r1/tutorials/sequences/audio_recognition.md b/site/en/r1/tutorials/sequences/audio_recognition.md
    index 8ad71b88a3c..0388514ec92 100644
    --- a/site/en/r1/tutorials/sequences/audio_recognition.md
    +++ b/site/en/r1/tutorials/sequences/audio_recognition.md
    @@ -159,9 +159,9 @@ accuracy. If the training accuracy increases but the validation doesn't, that's
     a sign that overfitting is occurring, and your model is only learning things
     about the training clips, not broader patterns that generalize.
     
    -## Tensorboard
    +## TensorBoard
     
    -A good way to visualize how the training is progressing is using Tensorboard. By
    +A good way to visualize how the training is progressing is using TensorBoard. By
     default, the script saves out events to /tmp/retrain_logs, and you can load
     these by running:
     
    diff --git a/site/en/r1/tutorials/sequences/recurrent.md b/site/en/r1/tutorials/sequences/recurrent.md
    index 6654795d944..e7c1f8c0b16 100644
    --- a/site/en/r1/tutorials/sequences/recurrent.md
    +++ b/site/en/r1/tutorials/sequences/recurrent.md
    @@ -2,7 +2,7 @@
     
     ## Introduction
     
    -See [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/){:.external}
    +See [Understanding LSTM Networks](https://colah.github.io/posts/2015-08-Understanding-LSTMs/)
     for an introduction to recurrent neural networks and LSTMs.
     
     ## Language Modeling
    diff --git a/site/en/r1/tutorials/sequences/recurrent_quickdraw.md b/site/en/r1/tutorials/sequences/recurrent_quickdraw.md
    index 435076f629c..d6a85377d17 100644
    --- a/site/en/r1/tutorials/sequences/recurrent_quickdraw.md
    +++ b/site/en/r1/tutorials/sequences/recurrent_quickdraw.md
    @@ -109,7 +109,7 @@ This download will take a while and download a bit more than 23GB of data.
     
     To convert the `ndjson` files to
     [TFRecord](../../api_guides/python/python_io.md#TFRecords_Format_Details) files containing
    -[`tf.train.Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
    +[`tf.train.Example`](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto)
     protos run the following command.
     
     ```shell
    @@ -213,7 +213,7 @@ screen coordinates and normalize the size such that the drawing has unit height.
     
     Finally, we compute the differences between consecutive points and store these
     as a `VarLenFeature` in a
    -[tensorflow.Example](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
    +[tensorflow.Example](https://github.com/tensorflow/tensorflow/blob/r1.15/tensorflow/core/example/example.proto)
     under the key `ink`. In addition we store the `class_index` as a single entry
     `FixedLengthFeature` and the `shape` of the `ink` as a `FixedLengthFeature` of
     length 2.
    diff --git a/site/en/r1/tutorials/sequences/text_generation.ipynb b/site/en/r1/tutorials/sequences/text_generation.ipynb
    index 423a7f659f0..84d942c8bd0 100644
    --- a/site/en/r1/tutorials/sequences/text_generation.ipynb
    +++ b/site/en/r1/tutorials/sequences/text_generation.ipynb
    @@ -65,7 +65,7 @@
           "source": [
             "> Note: This is an archived TF1 notebook. These are configured\n",
             "to run in TF2's \n",
    -        "[compatbility mode](https://www.tensorflow.org/guide/migrate)\n",
    +        "[compatibility mode](https://www.tensorflow.org/guide/migrate)\n",
             "but will run in TF1 as well. To use TF1 in Colab, use the\n",
             "[%tensorflow_version 1.x](https://colab.research.google.com/notebooks/tensorflow_version.ipynb)\n",
             "magic."
    @@ -77,9 +77,9 @@
             "id": "BwpJ5IffzRG6"
           },
           "source": [
    -        "This tutorial demonstrates how to generate text using a character-based RNN. We will work with a dataset of Shakespeare's writing from Andrej Karpathy's [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/). Given a sequence of characters from this data (\"Shakespear\"), train a model to predict the next character in the sequence (\"e\"). Longer sequences of text can be generated by calling the model repeatedly.\n",
    +        "This tutorial demonstrates how to generate text using a character-based RNN. You will work with a dataset of Shakespeare's writing from Andrej Karpathy's [The Unreasonable Effectiveness of Recurrent Neural Networks](http://karpathy.github.io/2015/05/21/rnn-effectiveness/). Given a sequence of characters from this data (\"Shakespear\"), train a model to predict the next character in the sequence (\"e\"). Longer sequences of text can be generated by calling the model repeatedly.\n",
             "\n",
    -        "Note: Enable GPU acceleration to execute this notebook faster. In Colab: *Runtime > Change runtime type > Hardware acclerator > GPU*. If running locally make sure TensorFlow version >= 1.11.\n",
    +        "Note: Enable GPU acceleration to execute this notebook faster. In Colab: *Runtime > Change runtime type > Hardware accelerator > GPU*. If running locally make sure TensorFlow version >= 1.11.\n",
             "\n",
             "This tutorial includes runnable code implemented using [tf.keras](https://www.tensorflow.org/programmers_guide/keras) and [eager execution](https://www.tensorflow.org/programmers_guide/eager). The following is sample output when the model in this tutorial trained for 30 epochs, and started with the string \"Q\":\n",
             "\n",
    @@ -98,7 +98,7 @@
             "To watch the next way with his father with his face?\n",
             "\n",
             "ESCALUS:\n",
    -        "The cause why then we are all resolved more sons.\n",
    +        "The cause why then us all resolved more sons.\n",
             "\n",
             "VOLUMNIA:\n",
             "O, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, no, it is no sin it should be dead,\n",
    @@ -248,7 +248,7 @@
           "source": [
             "### Vectorize the text\n",
             "\n",
    -        "Before training, we need to map strings to a numerical representation. Create two lookup tables: one mapping characters to numbers, and another for numbers to characters."
    +        "Before training, you need to map strings to a numerical representation. Create two lookup tables: one mapping characters to numbers, and another for numbers to characters."
           ]
         },
         {
    @@ -272,7 +272,7 @@
             "id": "tZfqhkYCymwX"
           },
           "source": [
    -        "Now we have an integer representation for each character. Notice that we mapped the character as indexes from 0 to `len(unique)`."
    +        "Now you have an integer representation for each character. Notice that you mapped the character as indexes from 0 to `len(unique)`."
           ]
         },
         {
    @@ -316,7 +316,7 @@
             "id": "wssHQ1oGymwe"
           },
           "source": [
    -        "Given a character, or a sequence of characters, what is the most probable next character? This is the task we're training the model to perform. The input to the model will be a sequence of characters, and we train the model to predict the output—the following character at each time step.\n",
    +        "Given a character, or a sequence of characters, what is the most probable next character? This is the task you are training the model to perform. The input to the model will be a sequence of characters, and you train the model to predict the output—the following character at each time step.\n",
             "\n",
             "Since RNNs maintain an internal state that depends on the previously seen elements, given all the characters computed until this moment, what is the next character?\n"
           ]
    @@ -346,7 +346,7 @@
           },
           "outputs": [],
           "source": [
    -        "# The maximum length sentence we want for a single input in characters\n",
    +        "# The maximum length sentence you want for a single input in characters\n",
             "seq_length = 100\n",
             "examples_per_epoch = len(text)//seq_length\n",
             "\n",
    @@ -458,7 +458,7 @@
           "source": [
             "### Create training batches\n",
             "\n",
    -        "We used `tf.data` to split the text into manageable sequences. But before feeding this data into the model, we need to shuffle the data and pack it into batches."
    +        "You used `tf.data` to split the text into manageable sequences. But before feeding this data into the model, you need to shuffle the data and pack it into batches."
           ]
         },
         {
    @@ -650,7 +650,7 @@
             "id": "uwv0gEkURfx1"
           },
           "source": [
    -        "To get actual predictions from the model we need to sample from the output distribution, to get actual character indices. This distribution is defined by the logits over the character vocabulary.\n",
    +        "To get actual predictions from the model you need to sample from the output distribution, to get actual character indices. This distribution is defined by the logits over the character vocabulary.\n",
             "\n",
             "Note: It is important to _sample_ from this distribution as taking the _argmax_ of the distribution can easily get the model stuck in a loop.\n",
             "\n",
    @@ -746,7 +746,7 @@
           "source": [
             "The standard `tf.keras.losses.sparse_categorical_crossentropy` loss function works in this case because it is applied across the last dimension of the predictions.\n",
             "\n",
    -        "Because our model returns logits, we need to set the `from_logits` flag.\n"
    +        "Because our model returns logits, you need to set the `from_logits` flag.\n"
           ]
         },
         {
    @@ -771,7 +771,7 @@
             "id": "jeOXriLcymww"
           },
           "source": [
    -        "Configure the training procedure using the `tf.keras.Model.compile` method. We'll use `tf.train.AdamOptimizer` with default arguments and the loss function."
    +        "Configure the training procedure using the `tf.keras.Model.compile` method. You'll use `tf.train.AdamOptimizer` with default arguments and the loss function."
           ]
         },
         {
    @@ -891,7 +891,7 @@
             "\n",
             "Because of the way the RNN state is passed from timestep to timestep, the model only accepts a fixed batch size once built.\n",
             "\n",
    -        "To run the model with a different `batch_size`, we need to rebuild the model and restore the weights from the checkpoint.\n"
    +        "To run the model with a different `batch_size`, you need to rebuild the model and restore the weights from the checkpoint.\n"
           ]
         },
         {
    @@ -992,7 +992,7 @@
             "      predictions = predictions / temperature\n",
             "      predicted_id = tf.multinomial(predictions, num_samples=1)[-1,0].numpy()\n",
             "\n",
    -        "      # We pass the predicted word as the next input to the model\n",
    +        "      # You pass the predicted word as the next input to the model\n",
             "      # along with the previous hidden state\n",
             "      input_eval = tf.expand_dims([predicted_id], 0)\n",
             "\n",
    @@ -1035,11 +1035,11 @@
             "\n",
             "So now that you've seen how to run the model manually let's unpack the training loop, and implement it ourselves. This gives a starting point, for example, to implement _curriculum learning_ to help stabilize the model's open-loop output.\n",
             "\n",
    -        "We will use `tf.GradientTape` to track the gradients. You can learn more about this approach by reading the [eager execution guide](https://www.tensorflow.org/r1/guide/eager).\n",
    +        "You will use `tf.GradientTape` to track the gradients. You can learn more about this approach by reading the [eager execution guide](https://www.tensorflow.org/r1/guide/eager).\n",
             "\n",
             "The procedure works as follows:\n",
             "\n",
    -        "* First, initialize the RNN state. We do this by calling the `tf.keras.Model.reset_states` method.\n",
    +        "* First, initialize the RNN state. You do this by calling the `tf.keras.Model.reset_states` method.\n",
             "\n",
             "* Next, iterate over the dataset (batch by batch) and calculate the *predictions* associated with each.\n",
             "\n",
    diff --git a/site/en/tutorials/_index.yaml b/site/en/tutorials/_index.yaml
    index 718f187990e..0d09f04c5c7 100644
    --- a/site/en/tutorials/_index.yaml
    +++ b/site/en/tutorials/_index.yaml
    @@ -16,8 +16,9 @@ landing_page:
         - description: >
             

    The TensorFlow tutorials are written as Jupyter notebooks and run - directly in Google Colab—a hosted notebook environment that requires - no setup. Click the Run in Google Colab button. + directly in Google Colab—a hosted notebook environment that requires + no setup. At the top of each tutorial, you'll see a Run in Google Colab button. Click + the button to open the notebook and run the code yourself.

    - classname: devsite-landing-row-100 @@ -84,38 +85,16 @@ landing_page: - classname: devsite-landing-row-100 items: - description: > - - Subscribe to the - TensorFlow blog, - YouTube channel, - and Twitter - for the latest updates. + + Check out these videos for an introduction to machine learning with TensorFlow: - items: - - heading: "Intro to Machine Learning" - path: "https://www.youtube.com/watch?v=KNAWp2S3w94" + - heading: "TensorFlow ML Zero to Hero" + path: "https://www.youtube.com/watch?v=KNAWp2S3w94&list=PLQY2H8rRoyvwWuPiWnuTDBHe7I0fMSsfO" youtube_id: "KNAWp2S3w94?rel=0&show_info=0" - - heading: "TensorFlow 2.0 and Keras" - path: "https://www.youtube.com/watch?v=wGI_VtE9CJM" - youtube_id: "wGI_VtE9CJM?rel=0&show_info=0" - - - classname: devsite-landing-row-cards - items: - - heading: "Looking Back at 2019" - path: https://blog.tensorflow.org/2019/12/looking-back-at-2019.html - buttons: - - label: "Read on the TensorFlow blog" - path: https://blog.tensorflow.org/2019/12/looking-back-at-2019.html - - heading: "TensorFlow 2 is now available" - path: https://blog.tensorflow.org/2019/09/tensorflow-20-is-now-available.html - buttons: - - label: "Read on the TensorFlow blog" - path: https://blog.tensorflow.org/2019/09/tensorflow-20-is-now-available.html - - heading: "Standardizing on Keras: Guidance on High-level APIs in TensorFlow 2" - path: https://blog.tensorflow.org/2018/12/standardizing-on-keras-guidance.html - buttons: - - label: "Read on the TensorFlow blog" - path: https://blog.tensorflow.org/2018/12/standardizing-on-keras-guidance.html + - heading: "Basic Computer Vision with ML" + path: "https://www.youtube.com/watch?v=bemDFpNooA8&list=PLQY2H8rRoyvwWuPiWnuTDBHe7I0fMSsfO" + youtube_id: "bemDFpNooA8?rel=0&show_info=0" - classname: devsite-landing-row-100 items: @@ -243,7 +222,7 @@ landing_page: path: /xla icon: @@ -295,3 +274,13 @@ landing_page: icon_name: chevron_right foreground: theme background: grey + + - classname: devsite-landing-row-100 + items: + - description: > + + Subscribe to the + TensorFlow blog, + YouTube channel, + and Twitter + for the latest updates. diff --git a/site/en/tutorials/_toc.yaml b/site/en/tutorials/_toc.yaml index fc46bbfaa30..a3907ffe9a4 100644 --- a/site/en/tutorials/_toc.yaml +++ b/site/en/tutorials/_toc.yaml @@ -35,6 +35,9 @@ toc: section: - title: "Images" path: /tutorials/load_data/images + - title: "Video" + path: /tutorials/load_data/video + status: new - title: "CSV" path: /tutorials/load_data/csv - title: "NumPy" @@ -74,6 +77,12 @@ toc: section: - title: "Distributed training with Keras" path: /tutorials/distribute/keras + - title: "Distributed training with DTensors" + path: /tutorials/distribute/dtensor_ml_tutorial + status: experimental + - title: "Using DTensors with Keras" + path: /tutorials/distribute/dtensor_keras_tutorial + status: experimental - title: "Custom training loops" path: /tutorials/distribute/custom_training - title: "Multi-worker training with Keras" @@ -88,9 +97,14 @@ toc: - title: "Distributed input" path: /tutorials/distribute/input -- title: "Images" +- title: "Vision" style: accordion section: + - title: "Computer vision" + path: /tutorials/images + - title: "KerasCV" + path: https://keras.io/keras_cv/ + status: external - title: "Convolutional Neural Network" path: /tutorials/images/cnn - title: "Image classification" @@ -104,31 +118,27 @@ toc: - title: "Image segmentation" path: /tutorials/images/segmentation - title: "Object detection with TF Hub" - path: https://github.com/tensorflow/hub/blob/master/examples/colab/tf2_object_detection.ipynb + path: /hub/tutorials/tf2_object_detection status: external + - title: "Video classification" + status: new + path: /tutorials/video/video_classification + - title: "Transfer learning with MoViNet" + status: new + path: /tutorials/video/transfer_learning_with_movinet - title: "Text" style: accordion section: - - title: "Word embeddings" - path: /text/guide/word_embeddings - status: external - - title: "Word2Vec" - path: /tutorials/text/word2vec - - title: "Text classification with an RNN" - path: /text/tutorials/text_classification_rnn - status: external - - title: "Classify Text with BERT" - path: /text/tutorials/classify_text_with_bert - status: external - - title: "Solve GLUE tasks using BERT on TPU" - path: /text/tutorials/bert_glue + - title: "Text and natural language processing" + path: /tutorials/text/index + - title: "Get started with KerasNLP" + path: https://keras.io/guides/keras_nlp/getting_started/ status: external - - title: "Neural machine translation with attention" - path: /text/tutorials/nmt_with_attention + - title: "Text and NLP guide" + path: /text status: external - - title: "Image captioning" - path: /tutorials/text/image_captioning + - title: "Audio" style: accordion section: @@ -136,10 +146,8 @@ toc: path: /tutorials/audio/simple_audio - title: "Transfer learning for audio recognition" path: /tutorials/audio/transfer_learning_audio - status: new - title: "Generate music with an RNN" path: /tutorials/audio/music_generation - status: new - title: "Structured data" style: accordion @@ -160,6 +168,9 @@ toc: - title: "Generative" style: accordion section: + - title: "Stable Diffusion" + status: new + path: /tutorials/generative/generate_images_with_stable_diffusion - title: "Neural style transfer" path: /tutorials/generative/style_transfer - title: "DeepDream" @@ -176,6 +187,17 @@ toc: path: /tutorials/generative/autoencoder - title: "Variational Autoencoder" path: /tutorials/generative/cvae + - title: "Lossy data compression" + path: /tutorials/generative/data_compression + +- title: "Model optimization" + style: accordion + section: + - title: "Scalable model compression with EPR" + path: /tutorials/optimization/compression + - title: "TensorFlow model optimization" + status: external + path: /model_optimization - title: "Model Understanding" style: accordion @@ -187,6 +209,7 @@ toc: - title: "Probabilistic regression" path: /probability/examples/Probabilistic_Layers_Regression status: external + - title: "Reinforcement learning" style: accordion section: @@ -198,6 +221,7 @@ toc: - title: "tf.Estimator" style: accordion + status: deprecated section: - title: "Premade estimator" path: /tutorials/estimator/premade diff --git a/site/en/tutorials/audio/music_generation.ipynb b/site/en/tutorials/audio/music_generation.ipynb index 89802d0447b..e1423ef7cf2 100644 --- a/site/en/tutorials/audio/music_generation.ipynb +++ b/site/en/tutorials/audio/music_generation.ipynb @@ -68,9 +68,9 @@ "id": "hr78EkAY-FFg" }, "source": [ - "This tutorial shows you how to generate musical notes using a simple RNN. You will train a model using a collection of piano MIDI files from the [MAESTRO dataset](https://magenta.tensorflow.org/datasets/maestro). Given a sequence of notes, your model will learn to predict the next note in the sequence. You can generate a longer sequences of notes by calling the model repeatedly.\n", + "This tutorial shows you how to generate musical notes using a simple recurrent neural network (RNN). You will train a model using a collection of piano MIDI files from the [MAESTRO dataset](https://magenta.tensorflow.org/datasets/maestro). Given a sequence of notes, your model will learn to predict the next note in the sequence. You can generate longer sequences of notes by calling the model repeatedly.\n", "\n", - "This tutorial contains complete code to parse and create MIDI files. You can learn more about how RNNs work by visiting [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation)." + "This tutorial contains complete code to parse and create MIDI files. You can learn more about how RNNs work by visiting the [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation) tutorial." ] }, { @@ -145,7 +145,7 @@ "\n", "from IPython import display\n", "from matplotlib import pyplot as plt\n", - "from typing import Dict, List, Optional, Sequence, Tuple" + "from typing import Optional" ] }, { @@ -680,7 +680,7 @@ "id": "xIBLvj-cODWS" }, "source": [ - "Next, create a [tf.data.Dataset](https://www.tensorflow.org/datasets) from the parsed notes." + "Next, create a `tf.data.Dataset` from the parsed notes." ] }, { @@ -713,7 +713,7 @@ "id": "Sj9SXRCjt3I7" }, "source": [ - "You will train the model on batches of sequences of notes. Each example will consist of a sequence of notes as the input features, and next note as the label. In this way, the model will be trained to predict the next note in a sequence. You can find a diagram explaining this process (and more details) in [Text classification with an RNN](https://www.tensorflow.org/text/tutorials/text_generation).\n", + "You will train the model on batches of sequences of notes. Each example will consist of a sequence of notes as the input features, and the next note as the label. In this way, the model will be trained to predict the next note in a sequence. You can find a diagram describing this process (and more details) in [Text classification with an RNN](https://www.tensorflow.org/text/tutorials/text_generation).\n", "\n", "You can use the handy [window](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#window) function with size `seq_length` to create the features and labels in this format." ] @@ -857,7 +857,7 @@ "id": "iGQn32q-hdK2" }, "source": [ - "The model will have three outputs, one for each note variable. For `pitch` and `duration`, you will use a custom loss function based on mean squared error that encourages the model to output non-negative values." + "The model will have three outputs, one for each note variable. For `step` and `duration`, you will use a custom loss function based on mean squared error that encourages the model to output non-negative values." ] }, { @@ -1056,7 +1056,7 @@ "source": [ "To use the model to generate notes, you will first need to provide a starting sequence of notes. The function below generates one note from a sequence of notes. \n", "\n", - "For note pitch, it draws a sample from softmax distribution of notes produced by the model, and does not simply pick the note with the highest probability.\n", + "For note pitch, it draws a sample from the softmax distribution of notes produced by the model, and does not simply pick the note with the highest probability.\n", "Always picking the note with the highest probability would lead to repetitive sequences of notes being generated.\n", "\n", "The `temperature` parameter can be used to control the randomness of notes generated. You can find more details on temperature in [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation)." @@ -1072,9 +1072,9 @@ "source": [ "def predict_next_note(\n", " notes: np.ndarray, \n", - " keras_model: tf.keras.Model, \n", - " temperature: float = 1.0) -> int:\n", - " \"\"\"Generates a note IDs using a trained sequence model.\"\"\"\n", + " model: tf.keras.Model, \n", + " temperature: float = 1.0) -> tuple[int, float, float]:\n", + " \"\"\"Generates a note as a tuple of (pitch, step, duration), using a trained sequence model.\"\"\"\n", "\n", " assert temperature > 0\n", "\n", @@ -1229,9 +1229,8 @@ "source": [ "In the above plots, you will notice the change in distribution of the note variables.\n", "Since there is a feedback loop between the model's outputs and inputs, the model tends to generate similar sequences of outputs to reduce the loss. \n", - "This is particularly relevant for `step` and `duration`, which has uses MSE loss.\n", - "For `pitch`, you can increase the randomness by increasing the `temperature` in `predict_next_note`.\n", - "\n" + "This is particularly relevant for `step` and `duration`, which uses the MSE loss.\n", + "For `pitch`, you can increase the randomness by increasing the `temperature` in `predict_next_note`.\n" ] }, { @@ -1244,7 +1243,7 @@ "\n", "This tutorial demonstrated the mechanics of using an RNN to generate sequences of notes from a dataset of MIDI files. To learn more, you can visit the closely related [Text generation with an RNN](https://www.tensorflow.org/text/tutorials/text_generation) tutorial, which contains additional diagrams and explanations. \n", "\n", - "An alternative to using RNNs for music generation is using GANs. Rather than generating audio, a GAN-based approach can generate a entire sequence in parallel. The Magenta team has done impressive work on this approach with [GANSynth](https://magenta.tensorflow.org/gansynth). You can also find many wonderful music and art projects and open-source code on [Magenta project website](https://magenta.tensorflow.org/)." + "One of the alternatives to using RNNs for music generation is using GANs. Rather than generating audio, a GAN-based approach can generate an entire sequence in parallel. The Magenta team has done impressive work on this approach with [GANSynth](https://magenta.tensorflow.org/gansynth). You can also find many wonderful music and art projects and open-source code on [Magenta project website](https://magenta.tensorflow.org/)." ] } ], @@ -1253,7 +1252,6 @@ "colab": { "collapsed_sections": [], "name": "music_generation.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/tutorials/audio/simple_audio.ipynb b/site/en/tutorials/audio/simple_audio.ipynb index 2bf92c54cb7..9d79742fbb7 100644 --- a/site/en/tutorials/audio/simple_audio.ipynb +++ b/site/en/tutorials/audio/simple_audio.ipynb @@ -74,9 +74,9 @@ "id": "SPfDNFlb66XF" }, "source": [ - "This tutorial demonstrates how to preprocess audio files in the WAV format and build and train a basic automatic speech recognition (ASR) model for recognizing ten different words. You will use a portion of the [Speech Commands dataset](https://www.tensorflow.org/datasets/catalog/speech_commands) (Warden, 2018), which contains short (one-second or less) audio clips of commands, such as \"down\", \"go\", \"left\", \"no\", \"right\", \"stop\", \"up\" and \"yes\".\n", + "This tutorial demonstrates how to preprocess audio files in the WAV format and build and train a basic [automatic speech recognition](https://en.wikipedia.org/wiki/Speech_recognition) (ASR) model for recognizing ten different words. You will use a portion of the [Speech Commands dataset](https://www.tensorflow.org/datasets/catalog/speech_commands) ([Warden, 2018](https://arxiv.org/abs/1804.03209)), which contains short (one-second or less) audio clips of commands, such as \"down\", \"go\", \"left\", \"no\", \"right\", \"stop\", \"up\" and \"yes\".\n", "\n", - "Real-world speech and audio recognition systems are complex. But, like [image classification with the MNIST dataset](../quickstart/beginner.ipynb), this tutorial should give you a basic understanding of the techniques involved." + "Real-world speech and audio recognition [systems](https://ai.googleblog.com/search/label/Speech%20Recognition) are complex. But, like [image classification with the MNIST dataset](../quickstart/beginner.ipynb), this tutorial should give you a basic understanding of the techniques involved." ] }, { @@ -87,7 +87,18 @@ "source": [ "## Setup\n", "\n", - "Import necessary modules and dependencies. Note that you'll be using seaborn for visualization in this tutorial." + "Import necessary modules and dependencies. You'll be using `tf.keras.utils.audio_dataset_from_directory` (introduced in TensorFlow 2.10), which helps generate audio classification datasets from directories of `.wav` files. You'll also need [seaborn](https://seaborn.pydata.org) for visualization in this tutorial." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hhNW45sjDEDe" + }, + "outputs": [], + "source": [ + "!pip install -U -q tensorflow tensorflow_datasets" ] }, { @@ -124,7 +135,7 @@ "source": [ "## Import the mini Speech Commands dataset\n", "\n", - "To save time with data loading, you will be working with a smaller version of the Speech Commands dataset. The [original dataset](https://www.tensorflow.org/datasets/catalog/speech_commands) consists of over 105,000 audio files in the WAV (Waveform) audio file format of people saying 35 different words. This data was collected by Google and released under a CC BY license.\n", + "To save time with data loading, you will be working with a smaller version of the Speech Commands dataset. The [original dataset](https://www.tensorflow.org/datasets/catalog/speech_commands) consists of over 105,000 audio files in the [WAV (Waveform) audio file format](https://www.aelius.com/njh/wavemetatools/doc/riffmci.pdf) of people saying 35 different words. This data was collected by Google and released under a CC BY license.\n", "\n", "Download and extract the `mini_speech_commands.zip` file containing the smaller Speech Commands datasets with `tf.keras.utils.get_file`:" ] @@ -166,218 +177,140 @@ "outputs": [], "source": [ "commands = np.array(tf.io.gfile.listdir(str(data_dir)))\n", - "commands = commands[commands != 'README.md']\n", + "commands = commands[(commands != 'README.md') & (commands != '.DS_Store')]\n", "print('Commands:', commands)" ] }, { "cell_type": "markdown", "metadata": { - "id": "aMvdU9SY8WXN" - }, - "source": [ - "Extract the audio clips into a list called `filenames`, and shuffle it:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "hlX685l1wD9k" - }, - "outputs": [], - "source": [ - "filenames = tf.io.gfile.glob(str(data_dir) + '/*/*')\n", - "filenames = tf.random.shuffle(filenames)\n", - "num_samples = len(filenames)\n", - "print('Number of total examples:', num_samples)\n", - "print('Number of examples per label:',\n", - " len(tf.io.gfile.listdir(str(data_dir/commands[0]))))\n", - "print('Example file tensor:', filenames[0])" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9vK3ymy23MCP" + "id": "TZ7GJjDvHqtt" }, "source": [ - "Split `filenames` into training, validation and test sets using a 80:10:10 ratio, respectively:" + "Divided into directories this way, you can easily load the data using `keras.utils.audio_dataset_from_directory`. \n", + "\n", + "The audio clips are 1 second or less at 16kHz. The `output_sequence_length=16000` pads the short ones to exactly 1 second (and would trim longer ones) so that they can be easily batched." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "Cv_wts-l3KgD" + "id": "mFM4c3aMC8Qv" }, "outputs": [], "source": [ - "train_files = filenames[:6400]\n", - "val_files = filenames[6400: 6400 + 800]\n", - "test_files = filenames[-800:]\n", + "train_ds, val_ds = tf.keras.utils.audio_dataset_from_directory(\n", + " directory=data_dir,\n", + " batch_size=64,\n", + " validation_split=0.2,\n", + " seed=0,\n", + " output_sequence_length=16000,\n", + " subset='both')\n", "\n", - "print('Training set size', len(train_files))\n", - "print('Validation set size', len(val_files))\n", - "print('Test set size', len(test_files))" + "label_names = np.array(train_ds.class_names)\n", + "print()\n", + "print(\"label names:\", label_names)" ] }, { "cell_type": "markdown", "metadata": { - "id": "g2Cj9FyvfweD" + "id": "cestp83qFnU5" }, "source": [ - "## Read the audio files and their labels" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "j1zjcWteOcBy" - }, - "source": [ - "In this section you will preprocess the dataset, creating decoded tensors for the waveforms and the corresponding labels. Note that:\n", - "\n", - "- Each WAV file contains time-series data with a set number of samples per second.\n", - "- Each sample represents the amplitude of the audio signal at that specific time.\n", - "- In a 16-bit system, like the WAV files in the mini Speech Commands dataset, the amplitude values range from -32,768 to 32,767.\n", - "- The sample rate for this dataset is 16kHz.\n", - "\n", - "The shape of the tensor returned by `tf.audio.decode_wav` is `[samples, channels]`, where `channels` is `1` for mono or `2` for stereo. The mini Speech Commands dataset only contains mono recordings. " + "The dataset now contains batches of audio clips and integer labels. The audio clips have a shape of `(batch, samples, channels)`. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "d16bb8416f90" + "id": "3yU6SQGIFb3H" }, "outputs": [], "source": [ - "test_file = tf.io.read_file(DATASET_PATH+'/down/0a9f9af7_nohash_0.wav')\n", - "test_audio, _ = tf.audio.decode_wav(contents=test_file)\n", - "test_audio.shape" + "train_ds.element_spec" ] }, { "cell_type": "markdown", "metadata": { - "id": "e6bb8defd2ef" + "id": "ppG9Dgq2Ex8R" }, "source": [ - "Now, let's define a function that preprocesses the dataset's raw WAV audio files into audio tensors:" + "This dataset only contains single channel audio, so use the `tf.squeeze` function to drop the extra axis:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "9PjJ2iXYwftD" + "id": "Xl-tnniUIBlM" }, "outputs": [], "source": [ - "def decode_audio(audio_binary):\n", - " # Decode WAV-encoded audio files to `float32` tensors, normalized\n", - " # to the [-1.0, 1.0] range. Return `float32` audio and a sample rate.\n", - " audio, _ = tf.audio.decode_wav(contents=audio_binary)\n", - " # Since all the data is single channel (mono), drop the `channels`\n", - " # axis from the array.\n", - " return tf.squeeze(audio, axis=-1)" + "def squeeze(audio, labels):\n", + " audio = tf.squeeze(audio, axis=-1)\n", + " return audio, labels\n", + "\n", + "train_ds = train_ds.map(squeeze, tf.data.AUTOTUNE)\n", + "val_ds = val_ds.map(squeeze, tf.data.AUTOTUNE)" ] }, { "cell_type": "markdown", "metadata": { - "id": "GPQseZElOjVN" + "id": "DtsCSWZN5ILv" }, "source": [ - "Define a function that creates labels using the parent directories for each file:\n", - "\n", - "- Split the file paths into `tf.RaggedTensor`s (tensors with ragged dimensions—with slices that may have different lengths)." + "The `utils.audio_dataset_from_directory` function only returns up to two splits. It's a good idea to keep a test set separate from your validation set.\n", + "Ideally you'd keep it in a separate directory, but in this case you can use `Dataset.shard` to split the validation set into two halves. Note that iterating over **any** shard will load **all** the data, and only keep its fraction. " ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "8VTtX1nr3YT-" + "id": "u5UEGsqM5Gss" }, "outputs": [], "source": [ - "def get_label(file_path):\n", - " parts = tf.strings.split(\n", - " input=file_path,\n", - " sep=os.path.sep)\n", - " # Note: You'll use indexing here instead of tuple unpacking to enable this\n", - " # to work in a TensorFlow graph.\n", - " return parts[-2]" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "E8Y9w_5MOsr-" - }, - "source": [ - "Define another helper function—`get_waveform_and_label`—that puts it all together:\n", - "\n", - "- The input is the WAV audio filename.\n", - "- The output is a tuple containing the audio and label tensors ready for supervised learning." + "test_ds = val_ds.shard(num_shards=2, index=0)\n", + "val_ds = val_ds.shard(num_shards=2, index=1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "WdgUD5T93NyT" + "id": "xIeoJcwJH5h9" }, "outputs": [], "source": [ - "def get_waveform_and_label(file_path):\n", - " label = get_label(file_path)\n", - " audio_binary = tf.io.read_file(file_path)\n", - " waveform = decode_audio(audio_binary)\n", - " return waveform, label" + "for example_audio, example_labels in train_ds.take(1): \n", + " print(example_audio.shape)\n", + " print(example_labels.shape)" ] }, { "cell_type": "markdown", "metadata": { - "id": "nvN8W_dDjYjc" + "id": "voxGEwvuh2L7" }, "source": [ - "Build the training set to extract the audio-label pairs:\n", - "\n", - "- Create a `tf.data.Dataset` with `Dataset.from_tensor_slices` and `Dataset.map`, using `get_waveform_and_label` defined earlier.\n", - "\n", - "You'll build the validation and test sets using a similar procedure later on." + "Let's plot a few audio waveforms:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "0SQl8yXl3kNP" + "id": "dYtGq2zYNHuT" }, "outputs": [], "source": [ - "AUTOTUNE = tf.data.AUTOTUNE\n", - "\n", - "files_ds = tf.data.Dataset.from_tensor_slices(train_files)\n", - "\n", - "waveform_ds = files_ds.map(\n", - " map_func=get_waveform_and_label,\n", - " num_parallel_calls=AUTOTUNE)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "voxGEwvuh2L7" - }, - "source": [ - "Let's plot a few audio waveforms:" + "label_names[[1,1,3,0]]" ] }, { @@ -388,21 +321,17 @@ }, "outputs": [], "source": [ + "plt.figure(figsize=(16, 10))\n", "rows = 3\n", "cols = 3\n", "n = rows * cols\n", - "fig, axes = plt.subplots(rows, cols, figsize=(10, 12))\n", - "\n", - "for i, (audio, label) in enumerate(waveform_ds.take(n)):\n", - " r = i // cols\n", - " c = i % cols\n", - " ax = axes[r][c]\n", - " ax.plot(audio.numpy())\n", - " ax.set_yticks(np.arange(-1.2, 1.2, 0.2))\n", - " label = label.numpy().decode('utf-8')\n", - " ax.set_title(label)\n", - "\n", - "plt.show()" + "for i in range(n):\n", + " plt.subplot(rows, cols, i+1)\n", + " audio_signal = example_audio[i]\n", + " plt.plot(audio_signal)\n", + " plt.title(label_names[example_labels[i]])\n", + " plt.yticks(np.arange(-1.2, 1.2, 0.2))\n", + " plt.ylim([-1.1, 1.1])" ] }, { @@ -413,14 +342,14 @@ "source": [ "## Convert waveforms to spectrograms\n", "\n", - "The waveforms in the dataset are represented in the time domain. Next, you'll transform the waveforms from the time-domain signals into the time-frequency-domain signals by computing the short-time Fourier transform (STFT) to convert the waveforms to as spectrograms, which show frequency changes over time and can be represented as 2D images. You will feed the spectrogram images into your neural network to train the model.\n", + "The waveforms in the dataset are represented in the time domain. Next, you'll transform the waveforms from the time-domain signals into the time-frequency-domain signals by computing the [short-time Fourier transform (STFT)](https://en.wikipedia.org/wiki/Short-time_Fourier_transform) to convert the waveforms to as [spectrograms](https://en.wikipedia.org/wiki/Spectrogram), which show frequency changes over time and can be represented as 2D images. You will feed the spectrogram images into your neural network to train the model.\n", "\n", "A Fourier transform (`tf.signal.fft`) converts a signal to its component frequencies, but loses all time information. In comparison, STFT (`tf.signal.stft`) splits the signal into windows of time and runs a Fourier transform on each window, preserving some time information, and returning a 2D tensor that you can run standard convolutions on.\n", "\n", "Create a utility function for converting waveforms to spectrograms:\n", "\n", "- The waveforms need to be of the same length, so that when you convert them to spectrograms, the results have similar dimensions. This can be done by simply zero-padding the audio clips that are shorter than one second (using `tf.zeros`).\n", - "- When calling `tf.signal.stft`, choose the `frame_length` and `frame_step` parameters such that the generated spectrogram \"image\" is almost square. For more information on the STFT parameters choice, refer to this Coursera video on audio signal processing and STFT.\n", + "- When calling `tf.signal.stft`, choose the `frame_length` and `frame_step` parameters such that the generated spectrogram \"image\" is almost square. For more information on the STFT parameters choice, refer to [this Coursera video](https://www.coursera.org/lecture/audio-signal-processing/stft-2-tjEQe) on audio signal processing and STFT.\n", "- The STFT produces an array of complex numbers representing magnitude and phase. However, in this tutorial you'll only use the magnitude, which you can derive by applying `tf.abs` on the output of `tf.signal.stft`." ] }, @@ -433,20 +362,9 @@ "outputs": [], "source": [ "def get_spectrogram(waveform):\n", - " # Zero-padding for an audio waveform with less than 16,000 samples.\n", - " input_len = 16000\n", - " waveform = waveform[:input_len]\n", - " zero_padding = tf.zeros(\n", - " [16000] - tf.shape(waveform),\n", - " dtype=tf.float32)\n", - " # Cast the waveform tensors' dtype to float32.\n", - " waveform = tf.cast(waveform, dtype=tf.float32)\n", - " # Concatenate the waveform with `zero_padding`, which ensures all audio\n", - " # clips are of the same length.\n", - " equal_length = tf.concat([waveform, zero_padding], 0)\n", " # Convert the waveform to a spectrogram via a STFT.\n", " spectrogram = tf.signal.stft(\n", - " equal_length, frame_length=255, frame_step=128)\n", + " waveform, frame_length=255, frame_step=128)\n", " # Obtain the magnitude of the STFT.\n", " spectrogram = tf.abs(spectrogram)\n", " # Add a `channels` dimension, so that the spectrogram can be used\n", @@ -473,15 +391,16 @@ }, "outputs": [], "source": [ - "for waveform, label in waveform_ds.take(1):\n", - " label = label.numpy().decode('utf-8')\n", + "for i in range(3):\n", + " label = label_names[example_labels[i]]\n", + " waveform = example_audio[i]\n", " spectrogram = get_spectrogram(waveform)\n", "\n", - "print('Label:', label)\n", - "print('Waveform shape:', waveform.shape)\n", - "print('Spectrogram shape:', spectrogram.shape)\n", - "print('Audio playback')\n", - "display.display(display.Audio(waveform, rate=16000))" + " print('Label:', label)\n", + " print('Waveform shape:', waveform.shape)\n", + " print('Spectrogram shape:', spectrogram.shape)\n", + " print('Audio playback')\n", + " display.display(display.Audio(waveform, rate=16000))" ] }, { @@ -541,6 +460,7 @@ "\n", "plot_spectrogram(spectrogram.numpy(), axes[1])\n", "axes[1].set_title('Spectrogram')\n", + "plt.suptitle(label.title())\n", "plt.show()" ] }, @@ -550,30 +470,21 @@ "id": "GyYXjW07jCHA" }, "source": [ - "Now, define a function that transforms the waveform dataset into spectrograms and their corresponding labels as integer IDs:" + "Now, create spectrogram datasets from the audio datasets:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "43IS2IouEV40" + "id": "mAD0LpkgqtQo" }, "outputs": [], "source": [ - "def get_spectrogram_and_label_id(audio, label):\n", - " spectrogram = get_spectrogram(audio)\n", - " label_id = tf.argmax(label == commands)\n", - " return spectrogram, label_id" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cf5d5b033a45" - }, - "source": [ - "Map `get_spectrogram_and_label_id` across the dataset's elements with `Dataset.map`:" + "def make_spec_ds(ds):\n", + " return ds.map(\n", + " map_func=lambda audio,label: (get_spectrogram(audio), label),\n", + " num_parallel_calls=tf.data.AUTOTUNE)" ] }, { @@ -584,9 +495,9 @@ }, "outputs": [], "source": [ - "spectrogram_ds = waveform_ds.map(\n", - " map_func=get_spectrogram_and_label_id,\n", - " num_parallel_calls=AUTOTUNE)" + "train_spectrogram_ds = make_spec_ds(train_ds)\n", + "val_spectrogram_ds = make_spec_ds(val_ds)\n", + "test_spectrogram_ds = make_spec_ds(test_ds)" ] }, { @@ -602,89 +513,44 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "QUbHfTuon4iF" - }, - "outputs": [], - "source": [ - "rows = 3\n", - "cols = 3\n", - "n = rows*cols\n", - "fig, axes = plt.subplots(rows, cols, figsize=(10, 10))\n", - "\n", - "for i, (spectrogram, label_id) in enumerate(spectrogram_ds.take(n)):\n", - " r = i // cols\n", - " c = i % cols\n", - " ax = axes[r][c]\n", - " plot_spectrogram(spectrogram.numpy(), ax)\n", - " ax.set_title(commands[label_id.numpy()])\n", - " ax.axis('off')\n", - "\n", - "plt.show()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "z5KdY8IF8rkt" - }, - "source": [ - "## Build and train the model\n", - "\n", - "Repeat the training set preprocessing on the validation and test sets:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "10UI32QH_45b" + "id": "EaM2q5aGis-d" }, "outputs": [], "source": [ - "def preprocess_dataset(files):\n", - " files_ds = tf.data.Dataset.from_tensor_slices(files)\n", - " output_ds = files_ds.map(\n", - " map_func=get_waveform_and_label,\n", - " num_parallel_calls=AUTOTUNE)\n", - " output_ds = output_ds.map(\n", - " map_func=get_spectrogram_and_label_id,\n", - " num_parallel_calls=AUTOTUNE)\n", - " return output_ds" + "for example_spectrograms, example_spect_labels in train_spectrogram_ds.take(1):\n", + " break" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "HNv4xwYkB2P6" + "id": "QUbHfTuon4iF" }, "outputs": [], "source": [ - "train_ds = spectrogram_ds\n", - "val_ds = preprocess_dataset(val_files)\n", - "test_ds = preprocess_dataset(test_files)" + "rows = 3\n", + "cols = 3\n", + "n = rows*cols\n", + "fig, axes = plt.subplots(rows, cols, figsize=(16, 9))\n", + "\n", + "for i in range(n):\n", + " r = i // cols\n", + " c = i % cols\n", + " ax = axes[r][c]\n", + " plot_spectrogram(example_spectrograms[i].numpy(), ax)\n", + " ax.set_title(label_names[example_spect_labels[i].numpy()])\n", + "\n", + "plt.show()" ] }, { "cell_type": "markdown", "metadata": { - "id": "assnWo6SB3lR" - }, - "source": [ - "Batch the training and validation sets for model training:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "UgY9WYzn61EX" + "id": "z5KdY8IF8rkt" }, - "outputs": [], "source": [ - "batch_size = 64\n", - "train_ds = train_ds.batch(batch_size)\n", - "val_ds = val_ds.batch(batch_size)" + "## Build and train the model" ] }, { @@ -704,8 +570,9 @@ }, "outputs": [], "source": [ - "train_ds = train_ds.cache().prefetch(AUTOTUNE)\n", - "val_ds = val_ds.cache().prefetch(AUTOTUNE)" + "train_spectrogram_ds = train_spectrogram_ds.cache().shuffle(10000).prefetch(tf.data.AUTOTUNE)\n", + "val_spectrogram_ds = val_spectrogram_ds.cache().prefetch(tf.data.AUTOTUNE)\n", + "test_spectrogram_ds = test_spectrogram_ds.cache().prefetch(tf.data.AUTOTUNE)" ] }, { @@ -732,16 +599,15 @@ }, "outputs": [], "source": [ - "for spectrogram, _ in spectrogram_ds.take(1):\n", - " input_shape = spectrogram.shape\n", + "input_shape = example_spectrograms.shape[1:]\n", "print('Input shape:', input_shape)\n", - "num_labels = len(commands)\n", + "num_labels = len(label_names)\n", "\n", "# Instantiate the `tf.keras.layers.Normalization` layer.\n", "norm_layer = layers.Normalization()\n", "# Fit the state of the layer to the spectrograms\n", "# with `Normalization.adapt`.\n", - "norm_layer.adapt(data=spectrogram_ds.map(map_func=lambda spec, label: spec))\n", + "norm_layer.adapt(data=train_spectrogram_ds.map(map_func=lambda spec, label: spec))\n", "\n", "model = models.Sequential([\n", " layers.Input(shape=input_shape),\n", @@ -805,8 +671,8 @@ "source": [ "EPOCHS = 10\n", "history = model.fit(\n", - " train_ds,\n", - " validation_data=val_ds,\n", + " train_spectrogram_ds,\n", + " validation_data=val_spectrogram_ds,\n", " epochs=EPOCHS,\n", " callbacks=tf.keras.callbacks.EarlyStopping(verbose=1, patience=2),\n", ")" @@ -830,9 +696,20 @@ "outputs": [], "source": [ "metrics = history.history\n", + "plt.figure(figsize=(16,6))\n", + "plt.subplot(1,2,1)\n", "plt.plot(history.epoch, metrics['loss'], metrics['val_loss'])\n", "plt.legend(['loss', 'val_loss'])\n", - "plt.show()" + "plt.ylim([0, max(plt.ylim())])\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Loss [CrossEntropy]')\n", + "\n", + "plt.subplot(1,2,2)\n", + "plt.plot(history.epoch, 100*np.array(metrics['accuracy']), 100*np.array(metrics['val_accuracy']))\n", + "plt.legend(['accuracy', 'val_accuracy'])\n", + "plt.ylim([0, 100])\n", + "plt.xlabel('Epoch')\n", + "plt.ylabel('Accuracy [%]')" ] }, { @@ -850,45 +727,55 @@ "cell_type": "code", "execution_count": null, "metadata": { - "id": "biU2MwzyAo8o" + "id": "FapuRT_SsWGQ" }, "outputs": [], "source": [ - "test_audio = []\n", - "test_labels = []\n", - "\n", - "for audio, label in test_ds:\n", - " test_audio.append(audio.numpy())\n", - " test_labels.append(label.numpy())\n", + "model.evaluate(test_spectrogram_ds, return_dict=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "en9Znt1NOabH" + }, + "source": [ + "### Display a confusion matrix\n", "\n", - "test_audio = np.array(test_audio)\n", - "test_labels = np.array(test_labels)" + "Use a [confusion matrix](https://developers.google.com/machine-learning/glossary#confusion-matrix) to check how well the model did classifying each of the commands in the test set:\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "ktUanr9mRZky" + "id": "5Y6vmWWQuuT1" }, "outputs": [], "source": [ - "y_pred = np.argmax(model.predict(test_audio), axis=1)\n", - "y_true = test_labels\n", - "\n", - "test_acc = sum(y_pred == y_true) / len(y_true)\n", - "print(f'Test set accuracy: {test_acc:.0%}')" + "y_pred = model.predict(test_spectrogram_ds)" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": { - "id": "en9Znt1NOabH" + "id": "d6F0il82u7lW" }, + "outputs": [], "source": [ - "### Display a confusion matrix\n", - "\n", - "Use a confusion matrix to check how well the model did classifying each of the commands in the test set:\n" + "y_pred = tf.argmax(y_pred, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vHSNoBYLvX81" + }, + "outputs": [], + "source": [ + "y_true = tf.concat(list(test_spectrogram_ds.map(lambda s,lab: lab)), axis=0)" ] }, { @@ -902,8 +789,8 @@ "confusion_mtx = tf.math.confusion_matrix(y_true, y_pred)\n", "plt.figure(figsize=(10, 8))\n", "sns.heatmap(confusion_mtx,\n", - " xticklabels=commands,\n", - " yticklabels=commands,\n", + " xticklabels=label_names,\n", + " yticklabels=label_names,\n", " annot=True, fmt='g')\n", "plt.xlabel('Prediction')\n", "plt.ylabel('Label')\n", @@ -929,15 +816,21 @@ }, "outputs": [], "source": [ - "sample_file = data_dir/'no/01bb6a2a_nohash_0.wav'\n", + "x = data_dir/'no/01bb6a2a_nohash_0.wav'\n", + "x = tf.io.read_file(str(x))\n", + "x, sample_rate = tf.audio.decode_wav(x, desired_channels=1, desired_samples=16000,)\n", + "x = tf.squeeze(x, axis=-1)\n", + "waveform = x\n", + "x = get_spectrogram(x)\n", + "x = x[tf.newaxis,...]\n", "\n", - "sample_ds = preprocess_dataset([str(sample_file)])\n", + "prediction = model(x)\n", + "x_labels = ['no', 'yes', 'down', 'go', 'left', 'up', 'right', 'stop']\n", + "plt.bar(x_labels, tf.nn.softmax(prediction[0]))\n", + "plt.title('No')\n", + "plt.show()\n", "\n", - "for spectrogram, label in sample_ds.batch(1):\n", - " prediction = model(spectrogram)\n", - " plt.bar(commands, tf.nn.softmax(prediction[0]))\n", - " plt.title(f'Predictions for \"{commands[label[0]]}\"')\n", - " plt.show()" + "display.display(display.Audio(waveform, rate=16000))" ] }, { @@ -949,6 +842,106 @@ "As the output suggests, your model should have recognized the audio command as \"no\"." ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "h1icqlM3ISW0" + }, + "source": [ + "## Export the model with preprocessing" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r7HX-MjgIbji" + }, + "source": [ + "The model's not very easy to use if you have to apply those preprocessing steps before passing data to the model for inference. So build an end-to-end version:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2lIeXdWjIbDE" + }, + "outputs": [], + "source": [ + "class ExportModel(tf.Module):\n", + " def __init__(self, model):\n", + " self.model = model\n", + "\n", + " # Accept either a string-filename or a batch of waveforms.\n", + " # You could add additional signatures for a single wave, or a ragged-batch. \n", + " self.__call__.get_concrete_function(\n", + " x=tf.TensorSpec(shape=(), dtype=tf.string))\n", + " self.__call__.get_concrete_function(\n", + " x=tf.TensorSpec(shape=[None, 16000], dtype=tf.float32))\n", + "\n", + "\n", + " @tf.function\n", + " def __call__(self, x):\n", + " # If they pass a string, load the file and decode it. \n", + " if x.dtype == tf.string:\n", + " x = tf.io.read_file(x)\n", + " x, _ = tf.audio.decode_wav(x, desired_channels=1, desired_samples=16000,)\n", + " x = tf.squeeze(x, axis=-1)\n", + " x = x[tf.newaxis, :]\n", + " \n", + " x = get_spectrogram(x) \n", + " result = self.model(x, training=False)\n", + " \n", + " class_ids = tf.argmax(result, axis=-1)\n", + " class_names = tf.gather(label_names, class_ids)\n", + " return {'predictions':result,\n", + " 'class_ids': class_ids,\n", + " 'class_names': class_names}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gtZBmUiB9HGY" + }, + "source": [ + "Test run the \"export\" model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z1_8TYaCIRue" + }, + "outputs": [], + "source": [ + "export = ExportModel(model)\n", + "export(tf.constant(str(data_dir/'no/01bb6a2a_nohash_0.wav')))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1J6Iuz829Cxo" + }, + "source": [ + "Save and reload the model, the reloaded model gives identical output:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wTAg4vsn3oEb" + }, + "outputs": [], + "source": [ + "tf.saved_model.save(export, \"saved\")\n", + "imported = tf.saved_model.load(\"saved\")\n", + "imported(waveform[tf.newaxis, :])" + ] + }, { "cell_type": "markdown", "metadata": { @@ -960,17 +953,17 @@ "This tutorial demonstrated how to carry out simple audio classification/automatic speech recognition using a convolutional neural network with TensorFlow and Python. To learn more, consider the following resources:\n", "\n", "- The [Sound classification with YAMNet](https://www.tensorflow.org/hub/tutorials/yamnet) tutorial shows how to use transfer learning for audio classification.\n", - "- The notebooks from Kaggle's TensorFlow speech recognition challenge.\n", + "- The notebooks from [Kaggle's TensorFlow speech recognition challenge](https://www.kaggle.com/c/tensorflow-speech-recognition-challenge/overview).\n", "- The \n", - "TensorFlow.js - Audio recognition using transfer learning codelab teaches how to build your own interactive web app for audio classification.\n", - "- A tutorial on deep learning for music information retrieval (Choi et al., 2017) on arXiv.\n", + "[TensorFlow.js - Audio recognition using transfer learning codelab](https://codelabs.developers.google.com/codelabs/tensorflowjs-audio-codelab/index.html#0) teaches how to build your own interactive web app for audio classification.\n", + "- [A tutorial on deep learning for music information retrieval](https://arxiv.org/abs/1709.04396) (Choi et al., 2017) on arXiv.\n", "- TensorFlow also has additional support for [audio data preparation and augmentation](https://www.tensorflow.org/io/tutorials/audio) to help with your own audio-based projects.\n", - "- Consider using the librosa library—a Python package for music and audio analysis." + "- Consider using the [librosa](https://librosa.org/) library for music and audio analysis." ] } ], "metadata": { - "accelerator": "GPU", + "accelerator": "CPU", "colab": { "collapsed_sections": [], "name": "simple_audio.ipynb", diff --git a/site/en/tutorials/audio/transfer_learning_audio.ipynb b/site/en/tutorials/audio/transfer_learning_audio.ipynb index 16c679aed61..160aeeb7103 100644 --- a/site/en/tutorials/audio/transfer_learning_audio.ipynb +++ b/site/en/tutorials/audio/transfer_learning_audio.ipynb @@ -99,7 +99,9 @@ }, "outputs": [], "source": [ - "!pip install tensorflow_io" + "!pip install -q \"tensorflow==2.11.*\"\n", + "# tensorflow_io 0.28 is compatible with TensorFlow 2.11\n", + "!pip install -q \"tensorflow_io==0.28.*\"" ] }, { @@ -235,7 +237,7 @@ "_ = plt.plot(testing_wav_data)\n", "\n", "# Play the audio file.\n", - "display.Audio(testing_wav_data,rate=16000)" + "display.Audio(testing_wav_data, rate=16000)" ] }, { @@ -286,7 +288,7 @@ "source": [ "scores, embeddings, spectrogram = yamnet_model(testing_wav_data)\n", "class_scores = tf.reduce_mean(scores, axis=0)\n", - "top_class = tf.argmax(class_scores)\n", + "top_class = tf.math.argmax(class_scores)\n", "inferred_class = class_names[top_class]\n", "\n", "print(f'The main sound is: {inferred_class}')\n", @@ -736,7 +738,7 @@ "outputs": [], "source": [ "reloaded_results = reloaded_model(testing_wav_data)\n", - "cat_or_dog = my_classes[tf.argmax(reloaded_results)]\n", + "cat_or_dog = my_classes[tf.math.argmax(reloaded_results)]\n", "print(f'The main sound is: {cat_or_dog}')" ] }, @@ -758,7 +760,7 @@ "outputs": [], "source": [ "serving_results = reloaded_model.signatures['serving_default'](testing_wav_data)\n", - "cat_or_dog = my_classes[tf.argmax(serving_results['classifier'])]\n", + "cat_or_dog = my_classes[tf.math.argmax(serving_results['classifier'])]\n", "print(f'The main sound is: {cat_or_dog}')\n" ] }, @@ -805,13 +807,13 @@ "# Run the model, check the output.\n", "scores, embeddings, spectrogram = yamnet_model(waveform)\n", "class_scores = tf.reduce_mean(scores, axis=0)\n", - "top_class = tf.argmax(class_scores)\n", + "top_class = tf.math.argmax(class_scores)\n", "inferred_class = class_names[top_class]\n", "top_score = class_scores[top_class]\n", "print(f'[YAMNet] The main sound is: {inferred_class} ({top_score})')\n", "\n", "reloaded_results = reloaded_model(waveform)\n", - "your_top_class = tf.argmax(reloaded_results)\n", + "your_top_class = tf.math.argmax(reloaded_results)\n", "your_inferred_class = my_classes[your_top_class]\n", "class_probabilities = tf.nn.softmax(reloaded_results, axis=-1)\n", "your_top_score = class_probabilities[your_top_class]\n", diff --git a/site/en/tutorials/customization/basics.ipynb b/site/en/tutorials/customization/basics.ipynb index fa13409feaa..2df0840ad5e 100644 --- a/site/en/tutorials/customization/basics.ipynb +++ b/site/en/tutorials/customization/basics.ipynb @@ -70,10 +70,10 @@ "source": [ "This is an introductory TensorFlow tutorial that shows how to:\n", "\n", - "* Import the required package\n", - "* Create and use tensors\n", - "* Use GPU acceleration\n", - "* Demonstrate `tf.data.Dataset`" + "* Import the required package.\n", + "* Create and use tensors.\n", + "* Use GPU acceleration.\n", + "* Build a data pipeline with `tf.data.Dataset`." ] }, { @@ -106,7 +106,7 @@ "source": [ "## Tensors\n", "\n", - "A Tensor is a multi-dimensional array. Similar to NumPy `ndarray` objects, `tf.Tensor` objects have a data type and a shape. Additionally, `tf.Tensor`s can reside in accelerator memory (like a GPU). TensorFlow offers a rich library of operations ([tf.add](https://www.tensorflow.org/api_docs/python/tf/add), [tf.matmul](https://www.tensorflow.org/api_docs/python/tf/matmul), [tf.linalg.inv](https://www.tensorflow.org/api_docs/python/tf/linalg/inv) etc.) that consume and produce `tf.Tensor`s. These operations automatically convert built-in Python types, for example:\n" + "A Tensor is a multi-dimensional array. Similar to NumPy `ndarray` objects, `tf.Tensor` objects have a data type and a shape. Additionally, `tf.Tensor`s can reside in accelerator memory (like a GPU). TensorFlow offers a rich library of operations (for example, `tf.math.add`, `tf.linalg.matmul`, and `tf.linalg.inv`) that consume and produce `tf.Tensor`s. These operations automatically convert built-in Python types. For example:\n" ] }, { @@ -118,13 +118,13 @@ }, "outputs": [], "source": [ - "print(tf.add(1, 2))\n", - "print(tf.add([1, 2], [3, 4]))\n", - "print(tf.square(5))\n", - "print(tf.reduce_sum([1, 2, 3]))\n", + "print(tf.math.add(1, 2))\n", + "print(tf.math.add([1, 2], [3, 4]))\n", + "print(tf.math.square(5))\n", + "print(tf.math.reduce_sum([1, 2, 3]))\n", "\n", "# Operator overloading is also supported\n", - "print(tf.square(2) + tf.square(3))" + "print(tf.math.square(2) + tf.math.square(3))" ] }, { @@ -144,7 +144,7 @@ }, "outputs": [], "source": [ - "x = tf.matmul([[1]], [[2, 3]])\n", + "x = tf.linalg.matmul([[1]], [[2, 3]])\n", "print(x)\n", "print(x.shape)\n", "print(x.dtype)" @@ -168,9 +168,9 @@ "id": "Dwi1tdW3JBw6" }, "source": [ - "### NumPy Compatibility\n", + "### NumPy compatibility\n", "\n", - "Converting between a TensorFlow `tf.Tensor`s and a NumPy `ndarray` is easy:\n", + "Converting between a TensorFlow `tf.Tensor` and a NumPy `ndarray` is easy:\n", "\n", "* TensorFlow operations automatically convert NumPy ndarrays to Tensors.\n", "* NumPy operations automatically convert Tensors to NumPy ndarrays.\n", @@ -191,11 +191,11 @@ "ndarray = np.ones([3, 3])\n", "\n", "print(\"TensorFlow operations convert numpy arrays to Tensors automatically\")\n", - "tensor = tf.multiply(ndarray, 42)\n", + "tensor = tf.math.multiply(ndarray, 42)\n", "print(tensor)\n", "\n", "\n", - "print(\"And NumPy operations convert Tensors to numpy arrays automatically\")\n", + "print(\"And NumPy operations convert Tensors to NumPy arrays automatically\")\n", "print(np.add(tensor, 1))\n", "\n", "print(\"The .numpy() method explicitly converts a Tensor to a numpy array\")\n", @@ -210,7 +210,7 @@ "source": [ "## GPU acceleration\n", "\n", - "Many TensorFlow operations are accelerated using the GPU for computation. Without any annotations, TensorFlow automatically decides whether to use the GPU or CPU for an operation—copying the tensor between CPU and GPU memory, if necessary. Tensors produced by an operation are typically backed by the memory of the device on which the operation executed, for example:" + "Many TensorFlow operations are accelerated using the GPU for computation. Without any annotations, TensorFlow automatically decides whether to use the GPU or CPU for an operation—copying the tensor between CPU and GPU memory, if necessary. Tensors produced by an operation are typically backed by the memory of the device on which the operation executed. For example:" ] }, { @@ -237,7 +237,7 @@ "id": "vpgYzgVXW2Ud" }, "source": [ - "### Device Names\n", + "### Device names\n", "\n", "The `Tensor.device` property provides a fully qualified string name of the device hosting the contents of the tensor. This name encodes many details, such as an identifier of the network address of the host on which this program is executing and the device within that host. This is required for distributed execution of a TensorFlow program. The string ends with `GPU:` if the tensor is placed on the `N`-th GPU on the host." ] @@ -248,9 +248,11 @@ "id": "ZWZQCimzuqyP" }, "source": [ - "### Explicit Device Placement\n", + "### Explicit device placement\n", "\n", - "In TensorFlow, *placement* refers to how individual operations are assigned (placed on) a device for execution. As mentioned, when there is no explicit guidance provided, TensorFlow automatically decides which device to execute an operation and copies tensors to that device, if needed. However, TensorFlow operations can be explicitly placed on specific devices using the `tf.device` context manager, for example:" + "In TensorFlow, *placement* refers to how individual operations are assigned (placed on) a device for execution. As mentioned, when there is no explicit guidance provided, TensorFlow automatically decides which device to execute an operation and copies tensors to that device, if needed.\n", + "\n", + "However, TensorFlow operations can be explicitly placed on specific devices using the `tf.device` context manager. For example:" ] }, { @@ -266,7 +268,7 @@ "def time_matmul(x):\n", " start = time.time()\n", " for loop in range(10):\n", - " tf.matmul(x, x)\n", + " tf.linalg.matmul(x, x)\n", "\n", " result = time.time()-start\n", "\n", @@ -296,7 +298,7 @@ "source": [ "## Datasets\n", "\n", - "This section uses the [`tf.data.Dataset` API](https://www.tensorflow.org/guide/datasets) to build a pipeline for feeding data to your model. The `tf.data.Dataset` API is used to build performant, complex input pipelines from simple, re-usable pieces that will feed your model's training or evaluation loops." + "This section uses the `tf.data.Dataset` API to build a pipeline for feeding data to your model. `tf.data.Dataset` is used to build performant, complex input pipelines from simple, re-usable pieces that will feed your model's training or evaluation loops. (Refer to the [tf.data: Build TensorFlow input pipelines](../../guide/data.ipynb) guide to learn more.)" ] }, { @@ -307,7 +309,7 @@ "source": [ "### Create a source `Dataset`\n", "\n", - "Create a *source* dataset using one of the factory functions like [`Dataset.from_tensors`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensors), [`Dataset.from_tensor_slices`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#from_tensor_slices), or using objects that read from files like [`TextLineDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset) or [`TFRecordDataset`](https://www.tensorflow.org/api_docs/python/tf/data/TFRecordDataset). See the [TensorFlow Dataset guide](https://www.tensorflow.org/guide/datasets#reading_input_data) for more information." + "Create a *source* dataset using one of the factory functions like `tf.data.Dataset.from_tensors`, `tf.data.Dataset.from_tensor_slices`, or using objects that read from files like `tf.data.TextLineDataset` or `tf.data.TFRecordDataset`. Refer to the _Reading input data_ section of the [tf.data: Build TensorFlow input pipelines](../../guide/data.ipynb) guide for more information." ] }, { @@ -341,7 +343,7 @@ "source": [ "### Apply transformations\n", "\n", - "Use the transformations functions like [`map`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#map), [`batch`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#batch), and [`shuffle`](https://www.tensorflow.org/api_docs/python/tf/data/Dataset#shuffle) to apply transformations to dataset records." + "Use the transformations functions like `tf.data.Dataset.map`, `tf.data.Dataset.batch`, and `tf.data.Dataset.shuffle` to apply transformations to dataset records." ] }, { @@ -352,7 +354,7 @@ }, "outputs": [], "source": [ - "ds_tensors = ds_tensors.map(tf.square).shuffle(2).batch(2)\n", + "ds_tensors = ds_tensors.map(tf.math.square).shuffle(2).batch(2)\n", "\n", "ds_file = ds_file.batch(2)" ] diff --git a/site/en/tutorials/customization/custom_layers.ipynb b/site/en/tutorials/customization/custom_layers.ipynb index 8214537210f..8bfe0a01b09 100644 --- a/site/en/tutorials/customization/custom_layers.ipynb +++ b/site/en/tutorials/customization/custom_layers.ipynb @@ -103,7 +103,7 @@ "\n", "Most of the time when writing code for machine learning models you want to operate at a higher level of abstraction than individual operations and manipulation of individual variables.\n", "\n", - "Many machine learning models are expressible as the composition and stacking of relatively simple layers, and TensorFlow provides both a set of many common layers as a well as easy ways for you to write your own application-specific layers either from scratch or as the composition of existing layers.\n", + "Many machine learning models are expressible as the composition and stacking of relatively simple layers, and TensorFlow provides both a set of many common layers as well as easy ways for you to write your own application-specific layers either from scratch or as the composition of existing layers.\n", "\n", "TensorFlow includes the full [Keras](https://keras.io) API in the tf.keras package, and the Keras layers are very useful when building your own models.\n" ] diff --git a/site/en/tutorials/customization/custom_training_walkthrough.ipynb b/site/en/tutorials/customization/custom_training_walkthrough.ipynb index 513f3427f51..9a05d864815 100644 --- a/site/en/tutorials/customization/custom_training_walkthrough.ipynb +++ b/site/en/tutorials/customization/custom_training_walkthrough.ipynb @@ -191,7 +191,7 @@ "source": [ "### Preview the data\n", "\n", - "Download the simplified version of the penguins dataset (`penguins/simple`) using the TensorFlow Datasets [`tdfs.load`](https://www.tensorflow.org/datasets/api_docs/python/tfds/load) method. There are 344 data records in this dataset. Extract the first five records into a [`DataFrame`](https://www.tensorflow.org/datasets/api_docs/python/tfds/as_dataframe) object to inspect a sample of the values in this dataset:" + "Download the simplified version of the penguins dataset (`penguins/simple`) using the TensorFlow Datasets [`tfds.load`](https://www.tensorflow.org/datasets/api_docs/python/tfds/load) method. There are 344 data records in this dataset. Extract the first five records into a [`DataFrame`](https://www.tensorflow.org/datasets/api_docs/python/tfds/as_dataframe) object to inspect a sample of the values in this dataset:" ] }, { @@ -475,7 +475,7 @@ }, "outputs": [], "source": [ - "print(\"Prediction: {}\".format(tf.argmax(predictions, axis=1)))\n", + "print(\"Prediction: {}\".format(tf.math.argmax(predictions, axis=1)))\n", "print(\" Labels: {}\".format(labels))" ] }, @@ -824,7 +824,7 @@ " # training=False is needed only if there are layers with different\n", " # behavior during training versus inference (e.g. Dropout).\n", " logits = model(x, training=False)\n", - " prediction = tf.argmax(logits, axis=1, output_type=tf.int64)\n", + " prediction = tf.math.argmax(logits, axis=1, output_type=tf.int64)\n", " test_accuracy(prediction, y)\n", "\n", "print(\"Test set accuracy: {:.3%}\".format(test_accuracy.result()))" @@ -895,7 +895,7 @@ "predictions = model(predict_dataset, training=False)\n", "\n", "for i, logits in enumerate(predictions):\n", - " class_idx = tf.argmax(logits).numpy()\n", + " class_idx = tf.math.argmax(logits).numpy()\n", " p = tf.nn.softmax(logits)[class_idx]\n", " name = class_names[class_idx]\n", " print(\"Example {} prediction: {} ({:4.1f}%)\".format(i, name, 100*p))" diff --git a/site/en/tutorials/distribute/custom_training.ipynb b/site/en/tutorials/distribute/custom_training.ipynb index da45c340b1a..d14b0ac003c 100644 --- a/site/en/tutorials/distribute/custom_training.ipynb +++ b/site/en/tutorials/distribute/custom_training.ipynb @@ -68,9 +68,9 @@ "id": "FbVhjPpzn6BM" }, "source": [ - "This tutorial demonstrates how to use [`tf.distribute.Strategy`](https://www.tensorflow.org/guide/distributed_training) with custom training loops. We will train a simple CNN model on the fashion MNIST dataset. The fashion MNIST dataset contains 60000 train images of size 28 x 28 and 10000 test images of size 28 x 28.\n", + "This tutorial demonstrates how to use `tf.distribute.Strategy`—a TensorFlow API that provides an abstraction for [distributing your training](../../guide/distributed_training.ipynb) across multiple processing units (GPUs, multiple machines, or TPUs)—with custom training loops. In this example, you will train a simple convolutional neural network on the [Fashion MNIST dataset](https://github.com/zalandoresearch/fashion-mnist) containing 70,000 images of size 28 x 28.\n", "\n", - "We are using custom training loops to train our model because they give us flexibility and a greater control on training. Moreover, it is easier to debug the model and the training loop." + "[Custom training loops](../customization/custom_training_walkthrough.ipynb) provide flexibility and a greater control on training. They also make it easier to debug the model and the training loop." ] }, { @@ -97,7 +97,7 @@ "id": "MM6W__qraV55" }, "source": [ - "## Download the fashion MNIST dataset" + "## Download the Fashion MNIST dataset" ] }, { @@ -112,14 +112,14 @@ "\n", "(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()\n", "\n", - "# Adding a dimension to the array -> new shape == (28, 28, 1)\n", - "# We are doing this because the first layer in our model is a convolutional\n", + "# Add a dimension to the array -> new shape == (28, 28, 1)\n", + "# This is done because the first layer in our model is a convolutional\n", "# layer and it requires a 4D input (batch_size, height, width, channels).\n", "# batch_size dimension will be added later on.\n", "train_images = train_images[..., None]\n", "test_images = test_images[..., None]\n", "\n", - "# Getting the images in [0, 1] range.\n", + "# Scale the images to the [0, 1] range.\n", "train_images = train_images / np.float32(255)\n", "test_images = test_images / np.float32(255)" ] @@ -141,13 +141,13 @@ "source": [ "How does `tf.distribute.MirroredStrategy` strategy work?\n", "\n", - "* All the variables and the model graph is replicated on the replicas.\n", + "* All the variables and the model graph are replicated across the replicas.\n", "* Input is evenly distributed across the replicas.\n", "* Each replica calculates the loss and gradients for the input it received.\n", - "* The gradients are synced across all the replicas by summing them.\n", + "* The gradients are synced across all the replicas by **summing** them.\n", "* After the sync, the same update is made to the copies of the variables on each replica.\n", "\n", - "Note: You can put all the code below inside a single scope. We are dividing it into several code cells for illustration purposes.\n" + "Note: You can put all the code below inside a single scope. This example divides it into several code cells for illustration purposes.\n" ] }, { @@ -158,8 +158,8 @@ }, "outputs": [], "source": [ - "# If the list of devices is not specified in the\n", - "# `tf.distribute.MirroredStrategy` constructor, it will be auto-detected.\n", + "# If the list of devices is not specified in\n", + "# `tf.distribute.MirroredStrategy` constructor, they will be auto-detected.\n", "strategy = tf.distribute.MirroredStrategy()" ] }, @@ -171,7 +171,7 @@ }, "outputs": [], "source": [ - "print ('Number of devices: {}'.format(strategy.num_replicas_in_sync))" + "print('Number of devices: {}'.format(strategy.num_replicas_in_sync))" ] }, { @@ -183,15 +183,6 @@ "## Setup input pipeline" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "0Qb6nDgxiN_n" - }, - "source": [ - "Export the graph and the variables to the platform-agnostic SavedModel format. After your model is saved, you can load it with or without the scope." - ] - }, { "cell_type": "code", "execution_count": null, @@ -225,8 +216,8 @@ }, "outputs": [], "source": [ - "train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(BUFFER_SIZE).batch(GLOBAL_BATCH_SIZE) \n", - "test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(GLOBAL_BATCH_SIZE) \n", + "train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(BUFFER_SIZE).batch(GLOBAL_BATCH_SIZE)\n", + "test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(GLOBAL_BATCH_SIZE)\n", "\n", "train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)\n", "test_dist_dataset = strategy.experimental_distribute_dataset(test_dataset)" @@ -240,7 +231,7 @@ "source": [ "## Create the model\n", "\n", - "Create a model using `tf.keras.Sequential`. You can also use the Model Subclassing API to do this." + "Create a model using `tf.keras.Sequential`. You can also use the [Model Subclassing API](https://www.tensorflow.org/guide/keras/custom_layers_and_models) or the [functional API](https://www.tensorflow.org/guide/keras/functional) to do this." ] }, { @@ -252,14 +243,21 @@ "outputs": [], "source": [ "def create_model():\n", + " regularizer = tf.keras.regularizers.L2(1e-5)\n", " model = tf.keras.Sequential([\n", - " tf.keras.layers.Conv2D(32, 3, activation='relu'),\n", + " tf.keras.layers.Conv2D(32, 3,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", " tf.keras.layers.MaxPooling2D(),\n", - " tf.keras.layers.Conv2D(64, 3, activation='relu'),\n", + " tf.keras.layers.Conv2D(64, 3,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", " tf.keras.layers.MaxPooling2D(),\n", " tf.keras.layers.Flatten(),\n", - " tf.keras.layers.Dense(64, activation='relu'),\n", - " tf.keras.layers.Dense(10)\n", + " tf.keras.layers.Dense(64,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", + " tf.keras.layers.Dense(10, kernel_regularizer=regularizer)\n", " ])\n", "\n", " return model" @@ -286,25 +284,29 @@ "source": [ "## Define the loss function\n", "\n", - "Normally, on a single machine with 1 GPU/CPU, loss is divided by the number of examples in the batch of input.\n", + "Recall that the loss function consists of one or two parts:\n", "\n", - "*So, how should the loss be calculated when using a `tf.distribute.Strategy`?*\n", + " * The **prediction loss** measures how far off the model's predictions are from the training labels for a batch of training examples. It is computed for each labeled example and then reduced across the batch by computing the average value.\n", + " * Optionally, **regularization loss** terms can be added to the prediction loss, to steer the model away from overfitting the training data. A common choice is L2 regularization, which adds a small fixed multiple of the sum of squares of all model weights, independent of the number of examples. The model above uses L2 regularization to demonstrate its handling in the training loop below.\n", "\n", - "* For an example, let's say you have 4 GPU's and a batch size of 64. One batch of input is distributed\n", - "across the replicas (4 GPUs), each replica getting an input of size 16.\n", + "For training on a single machine with a single GPU/CPU, this works as follows:\n", "\n", - "* The model on each replica does a forward pass with its respective input and calculates the loss. Now, instead of dividing the loss by the number of examples in its respective input (BATCH_SIZE_PER_REPLICA = 16), the loss should be divided by the GLOBAL_BATCH_SIZE (64)." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "OCIcsaeoIHJX" - }, - "source": [ - "*Why do this?*\n", + " * The prediction loss is computed for each example in the batch, summed across the batch, and then divided by the batch size.\n", + " * The regularization loss is added to the prediction loss.\n", + " * The gradient of the total loss is computed w.r.t. each model weight, and the optimizer updates each model weight from the corresponding gradient.\n", + "\n", + "With `tf.distribute.Strategy`, the input batch is split between replicas.\n", + "For example, let's say you have 4 GPUs, each with one replica of the model. One batch of 256 input examples is distributed evenly across the 4 replicas, so each replica gets a batch of size 64: We have `256 = 4*64`, or generally `GLOBAL_BATCH_SIZE = num_replicas_in_sync * BATCH_SIZE_PER_REPLICA`.\n", "\n", - "* This needs to be done because after the gradients are calculated on each replica, they are synced across the replicas by **summing** them." + "Each replica computes the loss from the training examples it gets and computes the gradients of the loss w.r.t. each model weight. The optimizer takes care that these **gradients are summed up across replicas** before using them to update the copies of the model weights on each replica.\n", + "\n", + "*So, how should the loss be calculated when using a `tf.distribute.Strategy`?*\n", + "\n", + " * Each replica computes the prediction loss for all examples distributed to it, sums up the results and divides them by `num_replicas_in_sync * BATCH_SIZE_PER_REPLICA`, or equivently, `GLOBAL_BATCH_SIZE`.\n", + " * Each replica compues the regularization loss(es) and divides them by\n", + " `num_replicas_in_sync`.\n", + "\n", + "Compared to non-distributed training, all per-replica loss terms are scaled down by a factor of `1/num_replicas_in_sync`. On the other hand, all loss terms -- or rather, their gradients -- are summed across that number of replicas before the optimizer applies them. In effect, the optimizer on each replica uses the same gradients as if a non-distributed computation with `GLOBAL_BATCH_SIZE` had happened. This is consistent with the distributed and undistributed behavior of Keras `Model.fit`. See the [Distributed training with Keras](./keras.ipynb) tutorial on how a larger gloabl batch size enables to scale up the learning rate." ] }, { @@ -315,31 +317,18 @@ "source": [ "*How to do this in TensorFlow?*\n", "\n", - "* If you're writing a custom training loop, as in this tutorial, you should sum the per example losses and divide the sum by the GLOBAL_BATCH_SIZE: \n", - "`scale_loss = tf.reduce_sum(loss) * (1. / GLOBAL_BATCH_SIZE)`\n", - "or you can use `tf.nn.compute_average_loss` which takes the per example loss,\n", - "optional sample weights, and GLOBAL_BATCH_SIZE as arguments and returns the scaled loss.\n", - "\n", - "* If you are using regularization losses in your model then you need to scale\n", - "the loss value by number of replicas. You can do this by using the `tf.nn.scale_regularization_loss` function.\n", + " * Loss reduction and scaling is done automatically in Keras `Model.compile` and `Model.fit`\n", "\n", - "* Using `tf.reduce_mean` is not recommended. Doing so divides the loss by actual per replica batch size which may vary step to step.\n", + " * If you're writing a custom training loop, as in this tutorial, you should sum the per-example losses and divide the sum by the global batch size using `tf.nn.compute_average_loss`, which takes the per-example losses and\n", + "optional sample weights as arguments and returns the scaled loss.\n", "\n", - "* This reduction and scaling is done automatically in keras `model.compile` and `model.fit`\n", + " * If using `tf.keras.losses` classes (as in the example below), the loss reduction needs to be explicitly specified to be one of `NONE` or `SUM`. The default `AUTO` and `SUM_OVER_BATCH_SIZE` are disallowed outside `Model.fit`.\n", + " * `AUTO` is disallowed because the user should explicitly think about what reduction they want to make sure it is correct in the distributed case.\n", + " * `SUM_OVER_BATCH_SIZE` is disallowed because currently it would only divide by per replica batch size, and leave the dividing by number of replicas to the user, which might be easy to miss. So, instead, you need to do the reduction yourself explicitly.\n", "\n", - "* If using `tf.keras.losses` classes (as in the example below), the loss reduction needs to be explicitly specified to be one of `NONE` or `SUM`. `AUTO` and `SUM_OVER_BATCH_SIZE` are disallowed when used with `tf.distribute.Strategy`. `AUTO` is disallowed because the user should explicitly think about what reduction they want to make sure it is correct in the distributed case. `SUM_OVER_BATCH_SIZE` is disallowed because currently it would only divide by per replica batch size, and leave the dividing by number of replicas to the user, which might be easy to miss. So instead we ask the user do the reduction themselves explicitly.\n", - "* If `labels` is multi-dimensional, then average the `per_example_loss` across the number of elements in each sample. For example, if the shape of `predictions` is `(batch_size, H, W, n_classes)` and `labels` is `(batch_size, H, W)`, you will need to update `per_example_loss` like: `per_example_loss /= tf.cast(tf.reduce_prod(tf.shape(labels)[1:]), tf.float32)`\n", + " * If you're writing a custom training loop for a model with a non-empty list of `Model.losses` (e.g., weight regularizers), you should sum them up and divide the sum by the number of replicas. You can do this by using the `tf.nn.scale_regularization_loss` function. The model code itself remains unaware of the number of replicas.\n", "\n", - " Caution: **Verify the shape of your loss**. \n", - " Loss functions in `tf.losses`/`tf.keras.losses` typically\n", - " return the average over the last dimension of the input. The loss\n", - " classes wrap these functions. Passing `reduction=Reduction.NONE` when\n", - " creating an instance of a loss class means \"no **additional** reduction\".\n", - " For categorical losses with an example input shape of `[batch, W, H, n_classes]` the `n_classes`\n", - " dimension is reduced. For pointwise losses like\n", - " `losses.mean_squared_error` or `losses.binary_crossentropy` include a\n", - " dummy axis so that `[batch, W, H, 1]` is reduced to `[batch, W, H]`. Without\n", - " the dummy axis `[batch, W, H]` will be incorrectly reduced to `[batch, W]`.\n" + " However, models can define input-dependent regularization losses with Keras APIs such as `Layer.add_loss(...)` and `Layer(activity_regularizer=...)`. For `Layer.add_loss(...)`, it falls on the modeling code to perform the division of the summed per-example terms by the per-replica(!) batch size, e.g., by using `tf.math.reduce_mean()`." ] }, { @@ -351,14 +340,51 @@ "outputs": [], "source": [ "with strategy.scope():\n", - " # Set reduction to `none` so we can do the reduction afterwards and divide by\n", - " # global batch size.\n", + " # Set reduction to `NONE` so you can do the reduction yourself.\n", " loss_object = tf.keras.losses.SparseCategoricalCrossentropy(\n", " from_logits=True,\n", " reduction=tf.keras.losses.Reduction.NONE)\n", - " def compute_loss(labels, predictions):\n", + " def compute_loss(labels, predictions, model_losses):\n", " per_example_loss = loss_object(labels, predictions)\n", - " return tf.nn.compute_average_loss(per_example_loss, global_batch_size=GLOBAL_BATCH_SIZE)" + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", + " return loss" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6pM96bqQY52D" + }, + "source": [ + "### Special cases\n", + "\n", + "Advanced users should also consider the following special cases.\n", + "\n", + " * Input batches shorter than `GLOBAL_BATCH_SIZE` create unpleasant corner cases in several places. In practice, it often works best to avoid them by allowing batches to span epoch boundaries using `Dataset.repeat().batch()` and defining approximate epochs by step counts, not dataset ends. Alternatively, `Dataset.batch(drop_remainder=True)` maintains the notion of epoch but drops the last few examples.\n", + "\n", + " For illustration, this example goes the harder route and allows short batches, so that each training epoch contains each training example exactly once.\n", + " \n", + " Which denominator should be used by `tf.nn.compute_average_loss()`?\n", + "\n", + " * By default, in the example code above and equivalently in `Keras.fit()`, the sum of prediction losses is divided by `num_replicas_in_sync` times the actual batch size seen on the replica (with empty batches silently ignored). This preserves the balance between the prediction loss on the one hand and the regularization losses on the other hand. It is particularly appropriate for models that use input-dependent regularization losses. Plain L2 regularization just superimposes weight decay onto the gradients of the prediction loss and is less in need of such a balance.\n", + " * In practice, many custom training loops pass as a constant Python value into `tf.nn.compute_average_loss(..., global_batch_size=GLOBAL_BATCH_SIZE)` to use it as the denominator. This preserves the relative weighting of training examples between batches. Without it, the smaller denominator in short batches effectively upweights the examples in those. (Before TensorFlow 2.13, this was also needed to avoid NaNs in case some replica received an actual batch size of zero.)\n", + " \n", + " Both options are equivalent if short batches are avoided, as suggested above.\n", + "\n", + " * Multi-dimensional `labels` require you to average the `per_example_loss` across the number of predictions in each example. Consider a classification task for all pixels of an input image, with `predictions` of shape `(batch_size, H, W, n_classes)` and `labels` of shape `(batch_size, H, W)`. You will need to update `per_example_loss` like: `per_example_loss /= tf.cast(tf.reduce_prod(tf.shape(labels)[1:]), tf.float32)`\n", + "\n", + " Caution: **Verify the shape of your loss**.\n", + " Loss functions in `tf.losses`/`tf.keras.losses` typically\n", + " return the average over the last dimension of the input. The loss\n", + " classes wrap these functions. Passing `reduction=Reduction.NONE` when\n", + " creating an instance of a loss class means \"no **additional** reduction\".\n", + " For categorical losses with an example input shape of `[batch, W, H, n_classes]` the `n_classes`\n", + " dimension is reduced. For pointwise losses like\n", + " `losses.mean_squared_error` or `losses.binary_crossentropy` include a\n", + " dummy axis so that `[batch, W, H, 1]` is reduced to `[batch, W, H]`. Without\n", + " the dummy axis `[batch, W, H]` will be incorrectly reduced to `[batch, W]`." ] }, { @@ -406,11 +432,11 @@ }, "outputs": [], "source": [ - "# model, optimizer, and checkpoint must be created under `strategy.scope`.\n", + "# A model, an optimizer, and a checkpoint must be created under `strategy.scope`.\n", "with strategy.scope():\n", " model = create_model()\n", "\n", - " optimizer = tf.keras.optimizers.Adam()\n", + " optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)\n", "\n", " checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model)" ] @@ -428,13 +454,13 @@ "\n", " with tf.GradientTape() as tape:\n", " predictions = model(images, training=True)\n", - " loss = compute_loss(labels, predictions)\n", + " loss = compute_loss(labels, predictions, model.losses)\n", "\n", " gradients = tape.gradient(loss, model.trainable_variables)\n", " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", "\n", " train_accuracy.update_state(labels, predictions)\n", - " return loss \n", + " return loss\n", "\n", "def test_step(inputs):\n", " images, labels = inputs\n", @@ -484,9 +510,9 @@ "\n", " template = (\"Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, \"\n", " \"Test Accuracy: {}\")\n", - " print (template.format(epoch+1, train_loss,\n", - " train_accuracy.result()*100, test_loss.result(),\n", - " test_accuracy.result()*100))\n", + " print(template.format(epoch + 1, train_loss,\n", + " train_accuracy.result() * 100, test_loss.result(),\n", + " test_accuracy.result() * 100))\n", "\n", " test_loss.reset_states()\n", " train_accuracy.reset_states()\n", @@ -499,12 +525,12 @@ "id": "Z1YvXqOpwy08" }, "source": [ - "Things to note in the example above:\n", + "### Things to note in the example above\n", "\n", - "* We are iterating over the `train_dist_dataset` and `test_dist_dataset` using a `for x in ...` construct.\n", + "* Iterate over the `train_dist_dataset` and `test_dist_dataset` using a `for x in ...` construct.\n", "* The scaled loss is the return value of the `distributed_train_step`. This value is aggregated across replicas using the `tf.distribute.Strategy.reduce` call and then across batches by summing the return value of the `tf.distribute.Strategy.reduce` calls.\n", "* `tf.keras.Metrics` should be updated inside `train_step` and `test_step` that gets executed by `tf.distribute.Strategy.run`.\n", - "*`tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can do `tf.distribute.Strategy.reduce` to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n" + "* `tf.distribute.Strategy.run` returns results from each local replica in the strategy, and there are multiple ways to consume this result. You can do `tf.distribute.Strategy.reduce` to get an aggregated value. You can also do `tf.distribute.Strategy.experimental_local_results` to get the list of values contained in the result, one per local replica.\n" ] }, { @@ -570,8 +596,8 @@ "for images, labels in test_dataset:\n", " eval_step(images, labels)\n", "\n", - "print ('Accuracy after restoring the saved model without strategy: {}'.format(\n", - " eval_accuracy.result()*100))" + "print('Accuracy after restoring the saved model without strategy: {}'.format(\n", + " eval_accuracy.result() * 100))" ] }, { @@ -584,7 +610,7 @@ "\n", "### Using iterators\n", "\n", - "If you want to iterate over a given number of steps and not through the entire dataset you can create an iterator using the `iter` call and explicity call `next` on the iterator. You can choose to iterate over the dataset both inside and outside the tf.function. Here is a small snippet demonstrating iteration of the dataset outside the tf.function using an iterator.\n" + "If you want to iterate over a given number of steps and not through the entire dataset, you can create an iterator using the `iter` call and explicitly call `next` on the iterator. You can choose to iterate over the dataset both inside and outside the `tf.function`. Here is a small snippet demonstrating iteration of the dataset outside the `tf.function` using an iterator.\n" ] }, { @@ -606,7 +632,7 @@ " average_train_loss = total_loss / num_batches\n", "\n", " template = (\"Epoch {}, Loss: {}, Accuracy: {}\")\n", - " print (template.format(epoch+1, average_train_loss, train_accuracy.result()*100))\n", + " print(template.format(epoch + 1, average_train_loss, train_accuracy.result() * 100))\n", " train_accuracy.reset_states()" ] }, @@ -616,8 +642,9 @@ "id": "GxVp48Oy0m6y" }, "source": [ - "### Iterating inside a tf.function\n", - "You can also iterate over the entire input `train_dist_dataset` inside a tf.function using the `for x in ...` construct or by creating iterators like we did above. The example below demonstrates wrapping one epoch of training in a tf.function and iterating over `train_dist_dataset` inside the function." + "### Iterating inside a `tf.function`\n", + "\n", + "You can also iterate over the entire input `train_dist_dataset` inside a `tf.function` using the `for x in ...` construct or by creating iterators like you did above. The example below demonstrates wrapping one epoch of training with a `@tf.function` decorator and iterating over `train_dist_dataset` inside the function." ] }, { @@ -643,7 +670,7 @@ " train_loss = distributed_train_epoch(train_dist_dataset)\n", "\n", " template = (\"Epoch {}, Loss: {}, Accuracy: {}\")\n", - " print (template.format(epoch+1, train_loss, train_accuracy.result()*100))\n", + " print(template.format(epoch + 1, train_loss, train_accuracy.result() * 100))\n", "\n", " train_accuracy.reset_states()" ] @@ -658,17 +685,18 @@ "\n", "Note: As a general rule, you should use `tf.keras.Metrics` to track per-sample values and avoid values that have been aggregated within a replica.\n", "\n", - "We do *not* recommend using `tf.metrics.Mean` to track the training loss across different replicas, because of the loss scaling computation that is carried out.\n", + "Because of the loss scaling computation that is carried out, it's not recommended to use `tf.keras.metrics.Mean` to track the training loss across different replicas.\n", "\n", "For example, if you run a training job with the following characteristics:\n", + "\n", "* Two replicas\n", "* Two samples are processed on each replica\n", "* Resulting loss values: [2, 3] and [4, 5] on each replica\n", "* Global batch size = 4\n", "\n", - "With loss scaling, you calculate the per-sample value of loss on each replica by adding the loss values, and then dividing by the global batch size. In this case: `(2 + 3) / 4 = 1.25` and `(4 + 5) / 4 = 2.25`. \n", + "With loss scaling, you calculate the per-sample value of loss on each replica by adding the loss values, and then dividing by the global batch size. In this case: `(2 + 3) / 4 = 1.25` and `(4 + 5) / 4 = 2.25`.\n", "\n", - "If you use `tf.metrics.Mean` to track loss across the two replicas, the result is different. In this example, you end up with a `total` of 3.50 and `count` of 2, which results in `total`/`count` = 1.75 when `result()` is called on the metric. Loss calculated with `tf.keras.Metrics` is scaled by an additional factor that is equal to the number of replicas in sync." + "If you use `tf.keras.metrics.Mean` to track loss across the two replicas, the result is different. In this example, you end up with a `total` of 3.50 and `count` of 2, which results in `total`/`count` = 1.75 when `result()` is called on the metric. Loss calculated with `tf.keras.Metrics` is scaled by an additional factor that is equal to the number of replicas in sync." ] }, { @@ -678,16 +706,17 @@ }, "source": [ "### Guide and examples\n", + "\n", "Here are some examples for using distribution strategy with custom training loops:\n", "\n", "1. [Distributed training guide](../../guide/distributed_training)\n", "2. [DenseNet](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/densenet/distributed_train.py) example using `MirroredStrategy`.\n", - "1. [BERT](https://github.com/tensorflow/models/blob/master/official/nlp/bert/run_classifier.py) example trained using `MirroredStrategy` and `TPUStrategy`.\n", + "1. [BERT](https://github.com/tensorflow/models/blob/master/official/legacy/bert/run_classifier.py) example trained using `MirroredStrategy` and `TPUStrategy`.\n", "This example is particularly helpful for understanding how to load from a checkpoint and generate periodic checkpoints during distributed training etc.\n", "2. [NCF](https://github.com/tensorflow/models/blob/master/official/recommendation/ncf_keras_main.py) example trained using `MirroredStrategy` that can be enabled using the `keras_use_ctl` flag.\n", "3. [NMT](https://github.com/tensorflow/examples/blob/master/tensorflow_examples/models/nmt_with_attention/distributed_train.py) example trained using `MirroredStrategy`.\n", "\n", - "More examples listed in the [Distribution strategy guide](../../guide/distributed_training.ipynb#examples_and_tutorials)." + "You can find more examples listed under _Examples and tutorials_ in the [Distribution strategy guide](../../guide/distributed_training.ipynb)." ] }, { @@ -699,7 +728,8 @@ "## Next steps\n", "\n", "* Try out the new `tf.distribute.Strategy` API on your models.\n", - "* Visit the [Performance section](../../guide/function.ipynb) in the guide to learn more about other strategies and [tools](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models." + "* Visit the [Better performance with `tf.function`](../../guide/function.ipynb) and [TensorFlow Profiler](../../guide/profiler.md) guides to learn more about tools to optimize the performance of your TensorFlow models.\n", + "* Check out the [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide, which provides an overview of the available distribution strategies." ] } ], @@ -707,7 +737,6 @@ "colab": { "collapsed_sections": [], "name": "custom_training.ipynb", - "provenance": [], "toc_visible": true }, "kernelspec": { diff --git a/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb b/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb new file mode 100644 index 00000000000..84f6478c2b5 --- /dev/null +++ b/site/en/tutorials/distribute/dtensor_keras_tutorial.ipynb @@ -0,0 +1,760 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Authors.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MT-LkFOl2axM" + }, + "source": [ + "# Using DTensors with Keras" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r6P32iYYV27b" + }, + "source": [ + "
    VersionPython versionCompilerBuild toolscuDNNCUDA
    tensorflow_gpu-2.10.03.7-3.10MSVC 2019Bazel 5.1.18.111.2
    tensorflow_gpu-2.9.03.7-3.10MSVC 2019Bazel 5.0.08.111.2
    tensorflow_gpu-2.8.03.7-3.10MSVC 2019Bazel 4.2.18.111.2
    tensorflow_gpu-2.7.03.7-3.9MSVC 2019Bazel 3.7.28.111.2
    tensorflow_gpu-2.6.03.6-3.9MSVC 2019Bazel 3.7.28.111.2
    \n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vTe9dcbUAwqx" + }, + "source": [ + "## Overview\n", + "\n", + "In this tutorial, you will learn how to use DTensors with Keras.\n", + "\n", + "Through DTensor integration with Keras, you can reuse your existing Keras layers and models to build and train distributed machine learning models.\n", + "\n", + "You will train a multi-layer classification model with the MNIST data. Setting the layout for subclassing model, Sequential model, and functional model will be demonstrated.\n", + "\n", + "This tutorial assumes that you have already read the [DTensor programing guide](/guide/dtensor_overview), and are familiar with basic DTensor concepts like `Mesh` and `Layout`.\n", + "\n", + "This tutorial is based on [Training a neural network on MNIST with Keras](https://www.tensorflow.org/datasets/keras_example)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "keIyP3IoA1o4" + }, + "source": [ + "## Setup\n", + "\n", + "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", + "\n", + "First, install or upgrade TensorFlow Datasets:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "4dHik7NYA5vm" + }, + "outputs": [], + "source": [ + "!pip install --quiet --upgrade tensorflow-datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VttBMZngDx8x" + }, + "source": [ + "Next, import TensorFlow and `dtensor`, and configure TensorFlow to use 8 virtual CPUs.\n", + "\n", + "Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CodX6idGBGSm" + }, + "outputs": [], + "source": [ + "import tensorflow as tf\n", + "import tensorflow_datasets as tfds\n", + "from tensorflow.experimental import dtensor" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "aAtvrpasDpDD" + }, + "outputs": [], + "source": [ + "def configure_virtual_cpus(ncpu):\n", + " phy_devices = tf.config.list_physical_devices('CPU')\n", + " tf.config.set_logical_device_configuration(\n", + " phy_devices[0], \n", + " [tf.config.LogicalDeviceConfiguration()] * ncpu)\n", + " \n", + "configure_virtual_cpus(8)\n", + "tf.config.list_logical_devices('CPU')\n", + "\n", + "devices = [f'CPU:{i}' for i in range(8)]" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ogULE1OHtyd9" + }, + "source": [ + "## Deterministic pseudo-random number generators\n", + "One thing you should note is that DTensor API requires each of the running client to have the same random seeds, so that it could have deterministic behavior for initializing the weights. You can achieve this by setting the global seeds in keras via `tf.keras.utils.set_random_seed()`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "9u85YypguL8N" + }, + "outputs": [], + "source": [ + "tf.keras.backend.experimental.enable_tf_random_generator()\n", + "tf.keras.utils.set_random_seed(1337)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tO11XvPDAu3_" + }, + "source": [ + "## Creating a Data Parallel Mesh\n", + "\n", + "This tutorial demonstrates Data Parallel training. Adapting to Model Parallel training and Spatial Parallel training can be as simple as switching to a different set of `Layout` objects. Refer to the [Distributed training with DTensors](dtensor_ml_tutorial.ipynb) tutorial for more information on distributed training beyond Data Parallel.\n", + "\n", + "Data Parallel training is a commonly used parallel training scheme, also used by, for example, `tf.distribute.MirroredStrategy`.\n", + "\n", + "With DTensor, a Data Parallel training loop uses a `Mesh` that consists of a single 'batch' dimension, where each device runs a replica of the model that receives a shard from the global batch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6sT6s6z4j9H-" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"batch\", 8)], devices=devices)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rouFcF6FE0aF" + }, + "source": [ + "As each device runs a full replica of the model, the model variables shall be fully replicated across the mesh (unsharded). As an example, a fully replicated Layout for a rank-2 weight on this `Mesh` would be as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "U8OxvkDKE1Nu" + }, + "outputs": [], + "source": [ + "example_weight_layout = dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh) # or\n", + "example_weight_layout = dtensor.Layout.replicated(mesh, rank=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6Bnic98RE0xi" + }, + "source": [ + "A layout for a rank-2 data tensor on this `Mesh` would be sharded along the first dimension (sometimes known as `batch_sharded`)," + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "PhYp0EKBFfxt" + }, + "outputs": [], + "source": [ + "example_data_layout = dtensor.Layout(['batch', dtensor.UNSHARDED], mesh) # or\n", + "example_data_layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=2)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4U-6n0DericV" + }, + "source": [ + "## Create Keras layers with layout\n", + "\n", + "In the data parallel scheme, you usually create your model weights with a fully replicated layout, so that each replica of the model can do calculations with the sharded input data. \n", + "\n", + "In order to configure the layout information for your layers' weights, Keras has exposed an extra parameter in the layer constructor for most of the built-in layers.\n", + "\n", + "The following example builds a small image classification model with fully replicated weight layout. You can specify layout information `kernel` and `bias` in `tf.keras.layers.Dense` via arguments `kernel_layout` and `bias_layout`. Most of the built-in keras layers are ready for explicitly specifying the `Layout` for the layer weights." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Koc5GlA1tFXY" + }, + "outputs": [], + "source": [ + "unsharded_layout_2d = dtensor.Layout.replicated(mesh, 2)\n", + "unsharded_layout_1d = dtensor.Layout.replicated(mesh, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GfOGTIxGs5Ql" + }, + "outputs": [], + "source": [ + "model = tf.keras.models.Sequential([\n", + " tf.keras.layers.Flatten(input_shape=(28, 28)),\n", + " tf.keras.layers.Dense(128, \n", + " activation='relu',\n", + " name='d1',\n", + " kernel_layout=unsharded_layout_2d, \n", + " bias_layout=unsharded_layout_1d),\n", + " tf.keras.layers.Dense(10,\n", + " name='d2',\n", + " kernel_layout=unsharded_layout_2d, \n", + " bias_layout=unsharded_layout_1d)\n", + "])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0frf3jsVtx_n" + }, + "source": [ + "You can check the layout information by examining the `layout` property on the weights." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Z_nqv_VdwcXo" + }, + "outputs": [], + "source": [ + "for weight in model.weights:\n", + " print(f'Weight name: {weight.name} with layout: {weight.layout}')\n", + " break" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6FMGB-QsxPtU" + }, + "source": [ + "## Load a dataset and build input pipeline\n", + "\n", + "Load a MNIST dataset and configure some pre-processing input pipeline for it. The dataset itself is not associated with any DTensor layout information." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zGt4kwltxOt4" + }, + "outputs": [], + "source": [ + "(ds_train, ds_test), ds_info = tfds.load(\n", + " 'mnist',\n", + " split=['train', 'test'],\n", + " shuffle_files=True,\n", + " as_supervised=True,\n", + " with_info=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HkUaOB_ryaLH" + }, + "outputs": [], + "source": [ + "def normalize_img(image, label):\n", + " \"\"\"Normalizes images: `uint8` -> `float32`.\"\"\"\n", + " return tf.cast(image, tf.float32) / 255., label" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Efm2H1iqydan" + }, + "outputs": [], + "source": [ + "batch_size = 128\n", + "\n", + "ds_train = ds_train.map(\n", + " normalize_img, num_parallel_calls=tf.data.AUTOTUNE)\n", + "ds_train = ds_train.cache()\n", + "ds_train = ds_train.shuffle(ds_info.splits['train'].num_examples)\n", + "ds_train = ds_train.batch(batch_size)\n", + "ds_train = ds_train.prefetch(tf.data.AUTOTUNE)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Lcrg6QAtyis4" + }, + "outputs": [], + "source": [ + "ds_test = ds_test.map(\n", + " normalize_img, num_parallel_calls=tf.data.AUTOTUNE)\n", + "ds_test = ds_test.batch(batch_size)\n", + "ds_test = ds_test.cache()\n", + "ds_test = ds_test.prefetch(tf.data.AUTOTUNE)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fHEZwib7lhqn" + }, + "source": [ + "## Define the training logic for the model\n", + "\n", + "Next, define the training and evaluation logic for the model. \n", + "\n", + "As of TensorFlow 2.9, you have to write a custom-training-loop for a DTensor-enabled Keras model. This is to pack the input data with proper layout information, which is not integrated with the standard `tf.keras.Model.fit()` or `tf.keras.Model.eval()` functions from Keras. you will get more `tf.data` support in the upcoming release. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "CAx11gMjzzjs" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def train_step(model, x, y, optimizer, metrics):\n", + " with tf.GradientTape() as tape:\n", + " logits = model(x, training=True)\n", + " # tf.reduce_sum sums the batch sharded per-example loss to a replicated\n", + " # global loss (scalar).\n", + " loss = tf.reduce_sum(tf.keras.losses.sparse_categorical_crossentropy(\n", + " y, logits, from_logits=True))\n", + " \n", + " gradients = tape.gradient(loss, model.trainable_variables)\n", + " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", + "\n", + " for metric in metrics.values():\n", + " metric.update_state(y_true=y, y_pred=logits)\n", + "\n", + " loss_per_sample = loss / len(x)\n", + " results = {'loss': loss_per_sample}\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "maSTWeRemO0P" + }, + "outputs": [], + "source": [ + "@tf.function\n", + "def eval_step(model, x, y, metrics):\n", + " logits = model(x, training=False)\n", + " loss = tf.reduce_sum(tf.keras.losses.sparse_categorical_crossentropy(\n", + " y, logits, from_logits=True))\n", + "\n", + " for metric in metrics.values():\n", + " metric.update_state(y_true=y, y_pred=logits)\n", + "\n", + " loss_per_sample = loss / len(x)\n", + " results = {'eval_loss': loss_per_sample}\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dt00axcLmvLr" + }, + "outputs": [], + "source": [ + "def pack_dtensor_inputs(images, labels, image_layout, label_layout):\n", + " num_local_devices = image_layout.mesh.num_local_devices()\n", + " images = tf.split(images, num_local_devices)\n", + " labels = tf.split(labels, num_local_devices)\n", + " images = dtensor.pack(images, image_layout)\n", + " labels = dtensor.pack(labels, label_layout)\n", + " return images, labels" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9Eb-qIJGrxB9" + }, + "source": [ + "## Metrics and optimizers\n", + "\n", + "When using DTensor API with Keras `Metric` and `Optimizer`, you will need to provide the extra mesh information, so that any internal state variables and tensors can work with variables in the model.\n", + "\n", + "- For an optimizer, DTensor introduces a new experimental namespace `keras.dtensor.experimental.optimizers`, where many existing Keras Optimizers are extended to receive an additional `mesh` argument. In future releases, it may be merged with Keras core optimizers.\n", + "\n", + "- For metrics, you can directly specify the `mesh` to the constructor as an argument to make it a DTensor compatible `Metric`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1lu_0mz1sxrl" + }, + "outputs": [], + "source": [ + "optimizer = tf.keras.dtensor.experimental.optimizers.Adam(0.01, mesh=mesh)\n", + "metrics = {'accuracy': tf.keras.metrics.SparseCategoricalAccuracy(mesh=mesh)}\n", + "eval_metrics = {'eval_accuracy': tf.keras.metrics.SparseCategoricalAccuracy(mesh=mesh)}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QzufrkistELx" + }, + "source": [ + "## Train the model\n", + "\n", + "The following example demonstrates how to shard the data from input pipeline on the batch dimension, and train with the model, which has fully replicated weights. \n", + "\n", + "After 3 epochs, the model should achieve about 97% of accuracy:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "kZW568Dk0vvL" + }, + "outputs": [], + "source": [ + "num_epochs = 3\n", + "\n", + "image_layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=4)\n", + "label_layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=1)\n", + "\n", + "for epoch in range(num_epochs):\n", + " print(\"============================\") \n", + " print(\"Epoch: \", epoch)\n", + " for metric in metrics.values():\n", + " metric.reset_state()\n", + " step = 0\n", + " results = {}\n", + " pbar = tf.keras.utils.Progbar(target=None, stateful_metrics=[])\n", + " for input in ds_train:\n", + " images, labels = input[0], input[1]\n", + " images, labels = pack_dtensor_inputs(\n", + " images, labels, image_layout, label_layout)\n", + "\n", + " results.update(train_step(model, images, labels, optimizer, metrics))\n", + " for metric_name, metric in metrics.items():\n", + " results[metric_name] = metric.result()\n", + "\n", + " pbar.update(step, values=results.items(), finalize=False)\n", + " step += 1\n", + " pbar.update(step, values=results.items(), finalize=True)\n", + "\n", + " for metric in eval_metrics.values():\n", + " metric.reset_state()\n", + " for input in ds_test:\n", + " images, labels = input[0], input[1]\n", + " images, labels = pack_dtensor_inputs(\n", + " images, labels, image_layout, label_layout)\n", + " results.update(eval_step(model, images, labels, eval_metrics))\n", + "\n", + " for metric_name, metric in eval_metrics.items():\n", + " results[metric_name] = metric.result()\n", + " \n", + " for metric_name, metric in results.items():\n", + " print(f\"{metric_name}: {metric.numpy()}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HYEXF6qCuoSr" + }, + "source": [ + "## Specify Layout for existing model code\n", + "\n", + "Often you have models that work well for your use case. Specifying `Layout` information to each individual layer within the model will be a large amount of work requiring a lot of edits.\n", + "\n", + "To help you easily convert your existing Keras model to work with DTensor API you can use the new `tf.keras.dtensor.experimental.LayoutMap` API that allow you to specify the `Layout` from a global point of view.\n", + "\n", + "First, you need to create a `LayoutMap` instance, which is a dictionary-like object that contains all the `Layout` you would like to specify for your model weights.\n", + "\n", + "`LayoutMap` needs a `Mesh` instance at init, which can be used to provide default replicated `Layout` for any weights that doesn't have Layout configured. In case you would like all your model weights to be just fully replicated, you can provide empty `LayoutMap`, and the default mesh will be used to create replicated `Layout`.\n", + "\n", + "`LayoutMap` uses a string as key and a `Layout` as value. There is a behavior difference between a normal Python dict and this class. The string key will be treated as a regex when retrieving the value." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SCq5Nl-UP_dS" + }, + "source": [ + "### Subclassed Model\n", + "\n", + "Consider the following model defined using the Keras subclassing Model syntax." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LZ0hRFs8unu0" + }, + "outputs": [], + "source": [ + "class SubclassedModel(tf.keras.Model):\n", + "\n", + " def __init__(self, name=None):\n", + " super().__init__(name=name)\n", + " self.feature = tf.keras.layers.Dense(16)\n", + " self.feature_2 = tf.keras.layers.Dense(24)\n", + " self.dropout = tf.keras.layers.Dropout(0.1)\n", + "\n", + " def call(self, inputs, training=None):\n", + " x = self.feature(inputs)\n", + " x = self.dropout(x, training=training)\n", + " return self.feature_2(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1njxqPB-yS97" + }, + "source": [ + "There are 4 weights in this model, which are `kernel` and `bias` for two `Dense` layers. Each of them are mapped based on the object path:\n", + "\n", + "* `model.feature.kernel`\n", + "* `model.feature.bias`\n", + "* `model.feature_2.kernel`\n", + "* `model.feature_2.bias`\n", + "\n", + "Note: For subclassed Models, the attribute name, rather than the `.name` attribute of the layer, is used as the key to retrieve the Layout from the mapping. This is consistent with the convention followed by `tf.Module` checkpointing. For complex models with more than a few layers, you can [manually inspect checkpoints](https://www.tensorflow.org/guide/checkpoint#manually_inspecting_checkpoints) to view the attribute mappings. \n", + "\n", + "Now define the following `LayoutMap` and apply it to the model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "goVX6iIZw468" + }, + "outputs": [], + "source": [ + "layout_map = tf.keras.dtensor.experimental.LayoutMap(mesh=mesh)\n", + "\n", + "layout_map['feature.*kernel'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=2)\n", + "layout_map['feature.*bias'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=1)\n", + "\n", + "with layout_map.scope():\n", + " subclassed_model = SubclassedModel()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "M32HcSp_PyWs" + }, + "source": [ + "The model weights are created on the first call, so call the model with a DTensor input and confirm the weights have the expected layouts:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c3CbD9l7qUNq" + }, + "outputs": [], + "source": [ + "dtensor_input = dtensor.copy_to_mesh(tf.zeros((16, 16)), layout=unsharded_layout_2d)\n", + "# Trigger the weights creation for subclass model\n", + "subclassed_model(dtensor_input)\n", + "\n", + "print(subclassed_model.feature.kernel.layout)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ZyCnfd-4Q2jk" + }, + "source": [ + "With this, you can quickly map the `Layout` to your models without updating any of your existing code. " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6GliUdWTQnKC" + }, + "source": [ + "### Sequential and Functional Models" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "6zzvTqAR2Teu" + }, + "source": [ + "For Keras Functional and Sequential models, you can use `tf.keras.dtensor.experimental.LayoutMap` as well.\n", + "\n", + "Note: For Functional and Sequential models, the mappings are slightly different. The layers in the model don't have a public attribute attached to the model (though you can access them via `Model.layers` as a list). Use the string name as the key in this case. The string name is guaranteed to be unique within a model." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gXK2EquIRJCC" + }, + "outputs": [], + "source": [ + "layout_map = tf.keras.dtensor.experimental.LayoutMap(mesh=mesh)\n", + "\n", + "layout_map['feature.*kernel'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=2)\n", + "layout_map['feature.*bias'] = dtensor.Layout.batch_sharded(mesh, 'batch', rank=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "cBzwJqrg2TH3" + }, + "outputs": [], + "source": [ + "with layout_map.scope():\n", + " inputs = tf.keras.Input((16,), batch_size=16)\n", + " x = tf.keras.layers.Dense(16, name='feature')(inputs)\n", + " x = tf.keras.layers.Dropout(0.1)(x)\n", + " output = tf.keras.layers.Dense(32, name='feature_2')(x)\n", + " model = tf.keras.Model(inputs, output)\n", + "\n", + "print(model.layers[1].kernel.layout)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "pPuh1NlE3-wO" + }, + "outputs": [], + "source": [ + "with layout_map.scope():\n", + " model = tf.keras.Sequential([\n", + " tf.keras.layers.Dense(16, name='feature', input_shape=(16,)),\n", + " tf.keras.layers.Dropout(0.1),\n", + " tf.keras.layers.Dense(32, name='feature_2')\n", + " ])\n", + "\n", + "print(model.layers[2].kernel.layout)" + ] + } + ], + "metadata": { + "colab": { + "name": "dtensor_keras_tutorial.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb b/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb new file mode 100644 index 00000000000..55557be6368 --- /dev/null +++ b/site/en/tutorials/distribute/dtensor_ml_tutorial.ipynb @@ -0,0 +1,1070 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2019 The TensorFlow Authors.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "# Distributed training with DTensors" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "r6P32iYYV27b" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " View on TensorFlow.org\n", + " \n", + " Run in Google Colab\n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kiF4jjX4O1mF" + }, + "source": [ + "## Overview\n", + "\n", + "DTensor provides a way for you to distribute the training of your model across devices to improve efficiency, reliability and scalability. For more details, check out the [DTensor concepts](../../guide/dtensor_overview.ipynb) guide.\n", + "\n", + "In this tutorial, you will train a sentiment analysis model using DTensors. The example demonstrates three distributed training schemes:\n", + "\n", + " - Data Parallel training, where the training samples are sharded (partitioned) to devices.\n", + " - Model Parallel training, where the model variables are sharded to devices.\n", + " - Spatial Parallel training, where the features of input data are sharded to devices (also known as [Spatial Partitioning](https://cloud.google.com/blog/products/ai-machine-learning/train-ml-models-on-large-images-and-3d-volumes-with-spatial-partitioning-on-cloud-tpus)).\n", + "\n", + "The training portion of this tutorial is inspired by a Kaggle notebook called [A Kaggle guide on sentiment analysis](https://www.kaggle.com/code/anasofiauzsoy/yelp-review-sentiment-analysis-tensorflow-tfds/notebook). To learn about the complete training and evaluation workflow (without DTensor), refer to that notebook.\n", + "\n", + "This tutorial will walk through the following steps:\n", + "\n", + "- Some data cleaning to obtain a `tf.data.Dataset` of tokenized sentences and their polarity.\n", + "- Then, building an MLP model with custom Dense and BatchNorm layers using a `tf.Module` to track the inference variables. The model constructor will take additional `Layout` arguments to control the sharding of variables.\n", + "- For training, you will first use data parallel training together with `tf.experimental.dtensor`'s checkpoint feature. Then, you will continue with Model Parallel Training and Spatial Parallel Training.\n", + "- The final section briefly describes the interaction between `tf.saved_model` and `tf.experimental.dtensor` as of TensorFlow 2.9." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YD80veeg7QtW" + }, + "source": [ + "## Setup\n", + "\n", + "DTensor (`tf.experimental.dtensor`) has been part of TensorFlow since the 2.9.0 release.\n", + "\n", + "First, install or upgrade TensorFlow Datasets:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-RKXLJN-7Yyb" + }, + "outputs": [], + "source": [ + "!pip install --quiet --upgrade tensorflow-datasets" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tcxP4_Zu7ciQ" + }, + "source": [ + "Next, import `tensorflow` and `dtensor`, and configure TensorFlow to use 8 virtual CPUs.\n", + "\n", + "Even though this example uses virtual CPUs, DTensor works the same way on CPU, GPU or TPU devices." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dXcB26oP7dUd" + }, + "outputs": [], + "source": [ + "import tempfile\n", + "import numpy as np\n", + "import tensorflow_datasets as tfds\n", + "\n", + "import tensorflow as tf\n", + "\n", + "from tensorflow.experimental import dtensor\n", + "\n", + "print('TensorFlow version:', tf.__version__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "oHtO6MJLUXlz" + }, + "outputs": [], + "source": [ + "def configure_virtual_cpus(ncpu):\n", + " phy_devices = tf.config.list_physical_devices('CPU')\n", + " tf.config.set_logical_device_configuration(phy_devices[0], [\n", + " tf.config.LogicalDeviceConfiguration(),\n", + " ] * ncpu)\n", + "\n", + "configure_virtual_cpus(8)\n", + "DEVICES = [f'CPU:{i}' for i in range(8)]\n", + "\n", + "tf.config.list_logical_devices('CPU')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "omYd4jbF7j_I" + }, + "source": [ + "## Download the dataset\n", + "\n", + "Download the IMDB reviews data set to train the sentiment analysis model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "fW4w4QlFVHhx" + }, + "outputs": [], + "source": [ + "train_data = tfds.load('imdb_reviews', split='train', shuffle_files=True, batch_size=64)\n", + "train_data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ki3mpfi4aZH8" + }, + "source": [ + "## Prepare the data\n", + "\n", + "First tokenize the text. Here use an extension of one-hot encoding, the `'tf_idf'` mode of `tf.keras.layers.TextVectorization`.\n", + "\n", + "- For the sake of speed, limit the number of tokens to 1200.\n", + "- To keep the `tf.Module` simple, run `TextVectorization` as a preprocessing step before the training.\n", + "\n", + "The final result of the data cleaning section is a `Dataset` with the tokenized text as `x` and label as `y`.\n", + "\n", + "**Note**: Running `TextVectorization` as a preprocessing step is **neither a usual practice nor a recommended one** as doing so assumes the training data fits into the client memory, which is not always the case.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zNpxjku_57Lg" + }, + "outputs": [], + "source": [ + "text_vectorization = tf.keras.layers.TextVectorization(output_mode='tf_idf', max_tokens=1200, output_sequence_length=None)\n", + "text_vectorization.adapt(data=train_data.map(lambda x: x['text']))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "q16bjngoVwQp" + }, + "outputs": [], + "source": [ + "def vectorize(features):\n", + " return text_vectorization(features['text']), features['label']\n", + "\n", + "train_data_vec = train_data.map(vectorize)\n", + "train_data_vec" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "atTqL9kE5wz4" + }, + "source": [ + "## Build a neural network with DTensor\n", + "\n", + "Now build a Multi-Layer Perceptron (MLP) network with `DTensor`. The network will use fully connected Dense and BatchNorm layers.\n", + "\n", + "`DTensor` expands TensorFlow through single-program multi-data (SPMD) expansion of regular TensorFlow Ops according to the `dtensor.Layout` attributes of their input `Tensor` and variables.\n", + "\n", + "Variables of `DTensor` aware layers are `dtensor.DVariable`, and the constructors of `DTensor` aware layer objects take additional `Layout` inputs in addition to the usual layer parameters.\n", + "\n", + "Note: As of TensorFlow 2.9, Keras layers such as `tf.keras.layer.Dense`, and `tf.keras.layer.BatchNormalization` accepts `dtensor.Layout` arguments. Refer to the [DTensor Keras Integration Tutorial](/tutorials/distribute/dtensor_keras_tutorial) for more information using Keras with DTensor." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PMCt-Gj3b3Jy" + }, + "source": [ + "### Dense Layer\n", + "\n", + "The following custom Dense layer defines 2 layer variables: $W_{ij}$ is the variable for weights, and $b_i$ is the variable for the biases.\n", + "\n", + "$$\n", + "y_j = \\sigma(\\sum_i x_i W_{ij} + b_j)\n", + "$$\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nYlFUJWNjl4N" + }, + "source": [ + "### Layout deduction\n", + "\n", + "This result comes from the following observations:\n", + "\n", + "- The preferred DTensor sharding for operands to a matrix dot product $t_j = \\sum_i x_i W_{ij}$ is to shard $\\mathbf{W}$ and $\\mathbf{x}$ the same way along the $i$-axis.\n", + "\n", + "- The preferred DTensor sharding for operands to a matrix sum $t_j + b_j$, is to shard $\\mathbf{t}$ and $\\mathbf{b}$ the same way along the $j$-axis.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "VpKblz7Yb16G" + }, + "outputs": [], + "source": [ + "class Dense(tf.Module):\n", + "\n", + " def __init__(self, input_size, output_size,\n", + " init_seed, weight_layout, activation=None):\n", + " super().__init__()\n", + "\n", + " random_normal_initializer = tf.function(tf.random.stateless_normal)\n", + "\n", + " self.weight = dtensor.DVariable(\n", + " dtensor.call_with_layout(\n", + " random_normal_initializer, weight_layout,\n", + " shape=[input_size, output_size],\n", + " seed=init_seed\n", + " ))\n", + " if activation is None:\n", + " activation = lambda x:x\n", + " self.activation = activation\n", + " \n", + " # bias is sharded the same way as the last axis of weight.\n", + " bias_layout = weight_layout.delete([0])\n", + "\n", + " self.bias = dtensor.DVariable(\n", + " dtensor.call_with_layout(tf.zeros, bias_layout, [output_size]))\n", + "\n", + " def __call__(self, x):\n", + " y = tf.matmul(x, self.weight) + self.bias\n", + " y = self.activation(y)\n", + "\n", + " return y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tfVY_vAKbxM0" + }, + "source": [ + "### BatchNorm\n", + "\n", + "A batch normalization layer helps avoid collapsing modes while training. In this case, adding batch normalization layers helps model training avoid producing a model that only produces zeros.\n", + "\n", + "The constructor of the custom `BatchNorm` layer below does not take a `Layout` argument. This is because `BatchNorm` has no layer variables. This still works with DTensor because 'x', the only input to the layer, is already a DTensor that represents the global batch.\n", + "\n", + "Note: With DTensor, the input Tensor 'x' always represents the global batch. Therefore `tf.nn.batch_normalization` is applied to the global batch. This differs from training with `tf.distribute.MirroredStrategy`, where Tensor 'x' only represents the per-replica shard of the batch (the local batch)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "riBA9pfhlPFq" + }, + "outputs": [], + "source": [ + "class BatchNorm(tf.Module):\n", + "\n", + " def __init__(self):\n", + " super().__init__()\n", + "\n", + " def __call__(self, x, training=True):\n", + " if not training:\n", + " # This branch is not used in the Tutorial.\n", + " pass\n", + " mean, variance = tf.nn.moments(x, axes=[0])\n", + " return tf.nn.batch_normalization(x, mean, variance, 0.0, 1.0, 1e-5)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "q4R4MPz5prh4" + }, + "source": [ + "A full featured batch normalization layer (such as `tf.keras.layers.BatchNormalization`) will need Layout arguments for its variables." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "unFcP99zprJj" + }, + "outputs": [], + "source": [ + "def make_keras_bn(bn_layout):\n", + " return tf.keras.layers.BatchNormalization(gamma_layout=bn_layout,\n", + " beta_layout=bn_layout,\n", + " moving_mean_layout=bn_layout,\n", + " moving_variance_layout=bn_layout,\n", + " fused=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v8Dj7AJ_lPs0" + }, + "source": [ + "### Putting Layers Together\n", + "\n", + "Next, build a Multi-layer perceptron (MLP) network with the building blocks above. The diagram below shows the axis relationships between the input `x` and the weight matrices for the two `Dense` layers without any DTensor sharding or replication applied." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "udFGAO-NrZw6" + }, + "source": [ + "\"The\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8DCQ0aQ5rQtB" + }, + "source": [ + "The output of the first `Dense` layer is passed into the input of the second `Dense` layer (after the `BatchNorm`). Therefore, the preferred DTensor sharding for the output of first `Dense` layer ($\\mathbf{W_1}$) and the input of second `Dense` layer ($\\mathbf{W_2}$) is to shard $\\mathbf{W_1}$ and $\\mathbf{W_2}$ the same way along the common axis $\\hat{j}$,\n", + "\n", + "$$\n", + "\\mathsf{Layout}[{W_{1,ij}}; i, j] = \\left[\\hat{i}, \\hat{j}\\right] \\\\\n", + "\\mathsf{Layout}[{W_{2,jk}}; j, k] = \\left[\\hat{j}, \\hat{k} \\right]\n", + "$$\n", + "\n", + "Even though the layout deduction shows that the 2 layouts are not independent, for the sake of simplicity of the model interface, `MLP` will take 2 `Layout` arguments, one per Dense layer." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "junyS-965opl" + }, + "outputs": [], + "source": [ + "from typing import Tuple\n", + "\n", + "class MLP(tf.Module):\n", + "\n", + " def __init__(self, dense_layouts: Tuple[dtensor.Layout, dtensor.Layout]):\n", + " super().__init__()\n", + "\n", + " self.dense1 = Dense(\n", + " 1200, 48, (1, 2), dense_layouts[0], activation=tf.nn.relu)\n", + " self.bn = BatchNorm()\n", + " self.dense2 = Dense(48, 2, (3, 4), dense_layouts[1])\n", + "\n", + " def __call__(self, x):\n", + " y = x\n", + " y = self.dense1(y)\n", + " y = self.bn(y)\n", + " y = self.dense2(y)\n", + " return y\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9dgLmebHhr7h" + }, + "source": [ + "The trade-off between correctness in layout deduction constraints and simplicity of API is a common design point of APIs that uses DTensor.\n", + "It is also possible to capture the dependency between `Layout`'s with a different API. For example, the `MLPStricter` class creates the `Layout` objects in the constructor." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "wEZR7UlihsYX" + }, + "outputs": [], + "source": [ + "class MLPStricter(tf.Module):\n", + "\n", + " def __init__(self, mesh, input_mesh_dim, inner_mesh_dim1, output_mesh_dim):\n", + " super().__init__()\n", + "\n", + " self.dense1 = Dense(\n", + " 1200, 48, (1, 2), dtensor.Layout([input_mesh_dim, inner_mesh_dim1], mesh),\n", + " activation=tf.nn.relu)\n", + " self.bn = BatchNorm()\n", + " self.dense2 = Dense(48, 2, (3, 4), dtensor.Layout([inner_mesh_dim1, output_mesh_dim], mesh))\n", + "\n", + "\n", + " def __call__(self, x):\n", + " y = x\n", + " y = self.dense1(y)\n", + " y = self.bn(y)\n", + " y = self.dense2(y)\n", + " return y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GcQi7D5mal2L" + }, + "source": [ + "To make sure the model runs, probe your model with fully replicated layouts and a fully replicated batch of `'x'` input." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "zOPuYeQwallh" + }, + "outputs": [], + "source": [ + "WORLD = dtensor.create_mesh([(\"world\", 8)], devices=DEVICES)\n", + "\n", + "model = MLP([dtensor.Layout.replicated(WORLD, rank=2),\n", + " dtensor.Layout.replicated(WORLD, rank=2)])\n", + "\n", + "sample_x, sample_y = train_data_vec.take(1).get_single_element()\n", + "sample_x = dtensor.copy_to_mesh(sample_x, dtensor.Layout.replicated(WORLD, rank=2))\n", + "print(model(sample_x))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "akrjDstEpDv9" + }, + "source": [ + "## Moving data to the device\n", + "\n", + "Usually, `tf.data` iterators (and other data fetching methods) yield tensor objects backed by the local host device memory. This data must be transferred to the accelerator device memory that backs DTensor's component tensors.\n", + "\n", + "`dtensor.copy_to_mesh` is unsuitable for this situation because it replicates input tensors to all devices due to DTensor's global perspective. So in this tutorial, you will use a helper function `repack_local_tensor`, to facilitate the transfer of data. This helper function uses `dtensor.pack` to send (and only send) the shard of the global batch that is intended for a replica to the device backing the replica.\n", + "\n", + "This simplified function assumes single-client. Determining the correct way to split the local tensor and the mapping between the pieces of the split and the local devices can be laboring in a multi-client application.\n", + "\n", + "Additional DTensor API to simplify `tf.data` integration is planned, supporting both single-client and multi-client applications. Please stay tuned." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3t5WvQR4Hvo4" + }, + "outputs": [], + "source": [ + "def repack_local_tensor(x, layout):\n", + " \"\"\"Repacks a local Tensor-like to a DTensor with layout.\n", + "\n", + " This function assumes a single-client application.\n", + " \"\"\"\n", + " x = tf.convert_to_tensor(x)\n", + " sharded_dims = []\n", + "\n", + " # For every sharded dimension, use tf.split to split the along the dimension.\n", + " # The result is a nested list of split-tensors in queue[0].\n", + " queue = [x]\n", + " for axis, dim in enumerate(layout.sharding_specs):\n", + " if dim == dtensor.UNSHARDED:\n", + " continue\n", + " num_splits = layout.shape[axis]\n", + " queue = tf.nest.map_structure(lambda x: tf.split(x, num_splits, axis=axis), queue)\n", + " sharded_dims.append(dim)\n", + "\n", + " # Now we can build the list of component tensors by looking up the location in\n", + " # the nested list of split-tensors created in queue[0].\n", + " components = []\n", + " for locations in layout.mesh.local_device_locations():\n", + " t = queue[0]\n", + " for dim in sharded_dims:\n", + " split_index = locations[dim] # Only valid on single-client mesh.\n", + " t = t[split_index]\n", + " components.append(t)\n", + "\n", + " return dtensor.pack(components, layout)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2KKCDcjG7zj2" + }, + "source": [ + "## Data parallel training\n", + "\n", + "In this section, you will train your MLP model with data parallel training. The following sections will demonstrate model parallel training and spatial parallel training.\n", + "\n", + "Data parallel training is a commonly used scheme for distributed machine learning:\n", + "\n", + " - Model variables are replicated on N devices each.\n", + " - A global batch is split into N per-replica batches.\n", + " - Each per-replica batch is trained on the replica device.\n", + " - The gradient is reduced before weight up data is collectively performed on all replicas.\n", + "\n", + "Data parallel training provides nearly linear speedup regarding the number of devices." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UMsLUyTGq3oL" + }, + "source": [ + "### Creating a data parallel mesh\n", + "\n", + "A typical data parallelism training loop uses a DTensor `Mesh` that consists of a single `batch` dimension, where each device becomes a replica that receives a shard from the global batch.\n", + "\n", + "\"Data\n", + "\n", + "\n", + "The replicated model runs on the replica, therefore the model variables are fully replicated (unsharded)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "C0IyOlxmeu4I" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"batch\", 8)], devices=DEVICES)\n", + "\n", + "model = MLP([dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh),\n", + " dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh),])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OREKwBybo1gZ" + }, + "source": [ + "### Packing training data to DTensors\n", + "\n", + "The training data batch should be packed into DTensors sharded along the `'batch'`(first) axis, such that DTensor will evenly distribute the training data to the `'batch'` mesh dimension.\n", + "\n", + "**Note**: In DTensor, the `batch size` always refers to the global batch size. The batch size should be chosen such that it can be divided evenly by the size of the `batch` mesh dimension." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8xMYkTpGocY8" + }, + "outputs": [], + "source": [ + "def repack_batch(x, y, mesh):\n", + " x = repack_local_tensor(x, layout=dtensor.Layout(['batch', dtensor.UNSHARDED], mesh))\n", + " y = repack_local_tensor(y, layout=dtensor.Layout(['batch'], mesh))\n", + " return x, y\n", + "\n", + "sample_x, sample_y = train_data_vec.take(1).get_single_element()\n", + "sample_x, sample_y = repack_batch(sample_x, sample_y, mesh)\n", + "\n", + "print('x', sample_x[:, 0])\n", + "print('y', sample_y)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "uONSiqOIkFL1" + }, + "source": [ + "### Training step\n", + "\n", + "This example uses a Stochastic Gradient Descent optimizer with the Custom Training Loop (CTL). Consult the [Custom Training Loop guide](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [Walk through](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough) for more information on those topics.\n", + "\n", + "The `train_step` is encapsulated as a `tf.function` to indicate this body is to be traced as a TensorFlow Graph. The body of `train_step` consists of a forward inference pass, a backward gradient pass, and the variable update.\n", + "\n", + "Note that the body of `train_step` does not contain any special DTensor annotations. Instead, `train_step` only contains high-level TensorFlow operations that process the input `x` and `y` from the global view of the input batch and the model. All of the DTensor annotations (`Mesh`, `Layout`) are factored out of the train step." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "BwUFzLGDtQT6" + }, + "outputs": [], + "source": [ + "# Refer to the CTL (custom training loop guide)\n", + "@tf.function\n", + "def train_step(model, x, y, learning_rate=tf.constant(1e-4)):\n", + " with tf.GradientTape() as tape:\n", + " logits = model(x)\n", + " # tf.reduce_sum sums the batch sharded per-example loss to a replicated\n", + " # global loss (scalar).\n", + " loss = tf.reduce_sum(\n", + " tf.nn.sparse_softmax_cross_entropy_with_logits(\n", + " logits=logits, labels=y))\n", + " parameters = model.trainable_variables\n", + " gradients = tape.gradient(loss, parameters)\n", + " for parameter, parameter_gradient in zip(parameters, gradients):\n", + " parameter.assign_sub(learning_rate * parameter_gradient)\n", + "\n", + " # Define some metrics\n", + " accuracy = 1.0 - tf.reduce_sum(tf.cast(tf.argmax(logits, axis=-1, output_type=tf.int64) != y, tf.float32)) / x.shape[0]\n", + " loss_per_sample = loss / len(x)\n", + " return {'loss': loss_per_sample, 'accuracy': accuracy}" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0OYTu4j0evWT" + }, + "source": [ + "### Checkpointing\n", + "\n", + "You can checkpoint a DTensor model using `tf.train.Checkpoint` out of the box. Saving and restoring sharded DVariables will perform an efficient sharded save and restore. Currently, when using `tf.train.Checkpoint.save` and `tf.train.Checkpoint.restore`, all DVariables must be on the same host mesh, and DVariables and regular variables cannot be saved together. You can learn more about checkpointing in [this guide](../../guide/checkpoint.ipynb).\n", + "\n", + "When a DTensor checkpoint is restored, `Layout`s of variables can be different from when the checkpoint is saved. That is, saving DTensor models is layout- and mesh-agnostic, and only affects the efficiency of sharded saving. You can save a DTensor model with one mesh and layout and restore it on a different mesh and layout. This tutorial makes use of this feature to continue the training in the Model Parallel training and Spatial Parallel training sections.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rsInFFJg7x9t" + }, + "outputs": [], + "source": [ + "CHECKPOINT_DIR = tempfile.mkdtemp()\n", + "\n", + "def start_checkpoint_manager(model):\n", + " ckpt = tf.train.Checkpoint(root=model)\n", + " manager = tf.train.CheckpointManager(ckpt, CHECKPOINT_DIR, max_to_keep=3)\n", + "\n", + " if manager.latest_checkpoint:\n", + " print(\"Restoring a checkpoint\")\n", + " ckpt.restore(manager.latest_checkpoint).assert_consumed()\n", + " else:\n", + " print(\"New training\")\n", + " return manager\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9r77ky5Jgp1j" + }, + "source": [ + "### Training loop\n", + "\n", + "For the data parallel training scheme, train for epochs and report the progress. 3 epochs is insufficient for training the model -- an accuracy of 50% is as good as randomly guessing.\n", + "\n", + "Enable checkpointing so that you can pick up the training later. In the following section, you will load the checkpoint and train with a different parallel scheme." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "UaLn-vGZgqbS" + }, + "outputs": [], + "source": [ + "num_epochs = 2\n", + "manager = start_checkpoint_manager(model)\n", + "\n", + "for epoch in range(num_epochs):\n", + " step = 0\n", + " pbar = tf.keras.utils.Progbar(target=int(train_data_vec.cardinality()), stateful_metrics=[])\n", + " metrics = {'epoch': epoch}\n", + " for x,y in train_data_vec:\n", + "\n", + " x, y = repack_batch(x, y, mesh)\n", + "\n", + " metrics.update(train_step(model, x, y, 1e-2))\n", + "\n", + " pbar.update(step, values=metrics.items(), finalize=False)\n", + " step += 1\n", + " manager.save()\n", + " pbar.update(step, values=metrics.items(), finalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YRFJEhum7EGD" + }, + "source": [ + "## Model Parallel Training\n", + "\n", + "If you switch to a 2 dimensional `Mesh`, and shard the model variables along the second mesh dimension, then the training becomes Model Parallel.\n", + "\n", + "In Model Parallel training, each model replica spans multiple devices (2 in this case):\n", + "\n", + "- There are 4 model replicas, and the training data batch is distributed to the 4 replicas.\n", + "- The 2 devices within a single model replica receive replicated training data.\n", + "\n", + "\n", + "\"Model\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5gZE9IT5Dzwl" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"batch\", 4), (\"model\", 2)], devices=DEVICES)\n", + "model = MLP([dtensor.Layout([dtensor.UNSHARDED, \"model\"], mesh), \n", + " dtensor.Layout([\"model\", dtensor.UNSHARDED], mesh)])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ihof3DkMFKnf" + }, + "source": [ + "As the training data is still sharded along the batch dimension, you can reuse the same `repack_batch` function as the Data Parallel training case. DTensor will automatically replicate the per-replica batch to all devices inside the replica along the `\"model\"` mesh dimension." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dZf56ynbE_p1" + }, + "outputs": [], + "source": [ + "def repack_batch(x, y, mesh):\n", + " x = repack_local_tensor(x, layout=dtensor.Layout(['batch', dtensor.UNSHARDED], mesh))\n", + " y = repack_local_tensor(y, layout=dtensor.Layout(['batch'], mesh))\n", + " return x, y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UW3OXdhNFfpv" + }, + "source": [ + "Next run the training loop. The training loop reuses the same checkpoint manager as the Data Parallel training example, and the code looks identical.\n", + "\n", + "You can continue training the data parallel trained model under model parallel training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "LLC0wgii7EgA" + }, + "outputs": [], + "source": [ + "num_epochs = 2\n", + "manager = start_checkpoint_manager(model)\n", + "\n", + "for epoch in range(num_epochs):\n", + " step = 0\n", + " pbar = tf.keras.utils.Progbar(target=int(train_data_vec.cardinality()))\n", + " metrics = {'epoch': epoch}\n", + " for x,y in train_data_vec:\n", + " x, y = repack_batch(x, y, mesh)\n", + " metrics.update(train_step(model, x, y, 1e-2))\n", + " pbar.update(step, values=metrics.items(), finalize=False)\n", + " step += 1\n", + " manager.save()\n", + " pbar.update(step, values=metrics.items(), finalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BZH-aMrVzi2L" + }, + "source": [ + "## Spatial Parallel Training" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "u-bK6IZ9GCS9" + }, + "source": [ + "When training data of very high dimensionality (e.g. a very large image or a video), it may be desirable to shard along the feature dimension. This is called [Spatial Partitioning](https://cloud.google.com/blog/products/ai-machine-learning/train-ml-models-on-large-images-and-3d-volumes-with-spatial-partitioning-on-cloud-tpus), which was first introduced into TensorFlow for training models with large 3-d input samples.\n", + "\n", + "\"Spatial\n", + "\n", + "DTensor also supports this case. The only change you need to do is to create a Mesh that includes a `feature` dimension, and apply the corresponding `Layout`.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jpc9mqURGpmK" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"batch\", 2), (\"feature\", 2), (\"model\", 2)], devices=DEVICES)\n", + "model = MLP([dtensor.Layout([\"feature\", \"model\"], mesh), \n", + " dtensor.Layout([\"model\", dtensor.UNSHARDED], mesh)])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "i07Wrv-jHBc1" + }, + "source": [ + "Shard the input data along the `feature` dimension when packing the input tensors to DTensors. You do this with a slightly different repack function, `repack_batch_for_spt`, where `spt` stands for Spatial Parallel Training." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "DWR8qF6BGtFL" + }, + "outputs": [], + "source": [ + "def repack_batch_for_spt(x, y, mesh):\n", + " # Shard data on feature dimension, too\n", + " x = repack_local_tensor(x, layout=dtensor.Layout([\"batch\", 'feature'], mesh))\n", + " y = repack_local_tensor(y, layout=dtensor.Layout([\"batch\"], mesh))\n", + " return x, y" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ygl9dqMUHTVN" + }, + "source": [ + "The Spatial parallel training can also continue from a checkpoint created with other parallell training schemes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "p3NnpHSKo-hx" + }, + "outputs": [], + "source": [ + "num_epochs = 2\n", + "\n", + "manager = start_checkpoint_manager(model)\n", + "for epoch in range(num_epochs):\n", + " step = 0\n", + " metrics = {'epoch': epoch}\n", + " pbar = tf.keras.utils.Progbar(target=int(train_data_vec.cardinality()))\n", + "\n", + " for x, y in train_data_vec:\n", + " x, y = repack_batch_for_spt(x, y, mesh)\n", + " metrics.update(train_step(model, x, y, 1e-2))\n", + "\n", + " pbar.update(step, values=metrics.items(), finalize=False)\n", + " step += 1\n", + " manager.save()\n", + " pbar.update(step, values=metrics.items(), finalize=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vp4L59CpJjYr" + }, + "source": [ + "## SavedModel and DTensor\n", + "\n", + "The integration of DTensor and SavedModel is still under development. \n", + "\n", + "As of TensorFlow `2.11`, `tf.saved_model` can save sharded and replicated DTensor models, and saving will do an efficient sharded save on different devices of the mesh. However, after a model is saved, all DTensor annotations are lost and the saved signatures can only be used with regular Tensors, not DTensors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "49HfIq_SJZoj" + }, + "outputs": [], + "source": [ + "mesh = dtensor.create_mesh([(\"world\", 1)], devices=DEVICES[:1])\n", + "mlp = MLP([dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh), \n", + " dtensor.Layout([dtensor.UNSHARDED, dtensor.UNSHARDED], mesh)])\n", + "\n", + "manager = start_checkpoint_manager(mlp)\n", + "\n", + "model_for_saving = tf.keras.Sequential([\n", + " text_vectorization,\n", + " mlp\n", + "])\n", + "\n", + "@tf.function(input_signature=[tf.TensorSpec([None], tf.string)])\n", + "def run(inputs):\n", + " return {'result': model_for_saving(inputs)}\n", + "\n", + "tf.saved_model.save(\n", + " model_for_saving, \"/tmp/saved_model\",\n", + " signatures=run)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h6Csim_VMGxQ" + }, + "source": [ + "As of TensorFlow 2.9.0, you can only call a loaded signature with a regular Tensor, or a fully replicated DTensor (which will be converted to a regular Tensor)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "HG_ASSzR4IWW" + }, + "outputs": [], + "source": [ + "sample_batch = train_data.take(1).get_single_element()\n", + "sample_batch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qW8yKPrhKQ5b" + }, + "outputs": [], + "source": [ + "loaded = tf.saved_model.load(\"/tmp/saved_model\")\n", + "\n", + "run_sig = loaded.signatures[\"serving_default\"]\n", + "result = run_sig(sample_batch['text'])['result']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "GahGbv0ZmkJb" + }, + "outputs": [], + "source": [ + "np.mean(tf.argmax(result, axis=-1) == sample_batch['label'])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ks-Vs9qsH6jO" + }, + "source": [ + "## What's next?\n", + "\n", + "This tutorial demonstrated building and training an MLP sentiment analysis model with DTensor.\n", + "\n", + "Through `Mesh` and `Layout` primitives, DTensor can transform a TensorFlow `tf.function` to a distributed program suitable for a variety of training schemes.\n", + "\n", + "In a real-world machine learning application, evaluation and cross-validation should be applied to avoid producing an over-fitted model. The techniques introduced in this tutorial can also be applied to introduce parallelism to evaluation.\n", + "\n", + "Composing a model with `tf.Module` from scratch is a lot of work, and reusing existing building blocks such as layers and helper functions can drastically speed up model development.\n", + "As of TensorFlow 2.9, all Keras Layers under `tf.keras.layers` accepts DTensor layouts as their arguments, and can be used to build DTensor models. You can even directly reuse a Keras model with DTensor without modifying the model implementation. Refer to the [DTensor Keras Integration Tutorial](https://www.tensorflow.org/tutorials/distribute/dtensor_keras_tutorial) for information on using DTensor Keras. " + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "dtensor_ml_tutorial.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/tutorials/distribute/input.ipynb b/site/en/tutorials/distribute/input.ipynb index 073cdd66725..f779c4f19a6 100644 --- a/site/en/tutorials/distribute/input.ipynb +++ b/site/en/tutorials/distribute/input.ipynb @@ -73,7 +73,7 @@ "This guide will show you the different ways in which you can create distributed dataset and iterators using `tf.distribute` APIs. Additionally, the following topics will be covered:\n", "- Usage, sharding and batching options when using `tf.distribute.Strategy.experimental_distribute_dataset` and `tf.distribute.Strategy.distribute_datasets_from_function`.\n", "- Different ways in which you can iterate over the distributed dataset.\n", - "- Differences between `tf.distribute.Strategy.experimental_distribute_dataset`/`tf.distribute.Strategy.distribute_datasets_from_function` APIs and `tf.data` APIs as well any limitations that users may come across in their usage.\n", + "- Differences between `tf.distribute.Strategy.experimental_distribute_dataset`/`tf.distribute.Strategy.distribute_datasets_from_function` APIs and `tf.data` APIs as well as any limitations that users may come across in their usage.\n", "\n", "This guide does not cover usage of distributed input with Keras APIs." ] @@ -733,7 +733,7 @@ "id": "hNCYZ9L-BD2R" }, "source": [ - "### Model.fit" + "### Preprocessing with `Model.fit`" ] }, { @@ -792,7 +792,7 @@ "id": "imZLQUOYBJyW" }, "source": [ - "### Custom Training Loop" + "### Preprocessing with a custom training loop" ] }, { @@ -1096,8 +1096,7 @@ "colab": { "collapsed_sections": [], "name": "input.ipynb", - "provenance": [], - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/distribute/keras.ipynb b/site/en/tutorials/distribute/keras.ipynb index 31a0dd7f386..b96656d4436 100644 --- a/site/en/tutorials/distribute/keras.ipynb +++ b/site/en/tutorials/distribute/keras.ipynb @@ -280,7 +280,7 @@ "id": "4xsComp8Kz5H" }, "source": [ - "## Create the model" + "## Create the model and instantiate the optimizer" ] }, { @@ -310,10 +310,21 @@ " ])\n", "\n", " model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n", - " optimizer=tf.keras.optimizers.Adam(),\n", + " optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),\n", " metrics=['accuracy'])" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "DCDKFcNJzdcd" + }, + "source": [ + "For this toy example with the MNIST dataset, you will be using the Adam optimizer's default learning rate of 0.001.\n", + "\n", + "For larger datasets, the key benefit of distributed training is to learn more in each training step, because each step processes more training data in parallel, which allows for a larger learning rate (within the limits of the model and dataset)." + ] + }, { "cell_type": "markdown", "metadata": { @@ -352,7 +363,7 @@ "# Define the checkpoint directory to store the checkpoints.\n", "checkpoint_dir = './training_checkpoints'\n", "# Define the name of the checkpoint files.\n", - "checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt_{epoch}\")" + "checkpoint_prefix = os.path.join(checkpoint_dir, \"ckpt_{epoch:04d}.weights.h5\")" ] }, { @@ -385,8 +396,7 @@ "# Define a callback for printing the learning rate at the end of each epoch.\n", "class PrintLR(tf.keras.callbacks.Callback):\n", " def on_epoch_end(self, epoch, logs=None):\n", - " print('\\nLearning rate for epoch {} is {}'.format(", - " epoch + 1, model.optimizer.lr.numpy()))" + " print('\\nLearning rate for epoch {} is {}'.format(epoch + 1, model.optimizer.learning_rate.numpy()))" ] }, { @@ -476,7 +486,10 @@ }, "outputs": [], "source": [ - "model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))\n", + "import pathlib\n", + "latest_checkpoint = sorted(pathlib.Path(checkpoint_dir).glob('*'))[-1]\n", + "\n", + "model.load_weights(latest_checkpoint)\n", "\n", "eval_loss, eval_acc = model.evaluate(eval_dataset)\n", "\n", @@ -529,7 +542,7 @@ "id": "kBLlogrDvMgg" }, "source": [ - "## Export to SavedModel" + "## Save the model" ] }, { @@ -538,7 +551,7 @@ "id": "Xa87y_A0vRma" }, "source": [ - "Export the graph and the variables to the platform-agnostic SavedModel format using Keras `Model.save`. After your model is saved, you can load it with or without the `Strategy.scope`." + "Save the model to a `.keras` zip archive using `Model.save`. After your model is saved, you can load it with or without the `Strategy.scope`." ] }, { @@ -549,7 +562,7 @@ }, "outputs": [], "source": [ - "path = 'saved_model/'" + "path = 'my_model.keras'" ] }, { @@ -560,7 +573,7 @@ }, "outputs": [], "source": [ - "model.save(path, save_format='tf')" + "model.save(path)" ] }, { diff --git a/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb b/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb index 859a117d2f2..0361eea9328 100644 --- a/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb +++ b/site/en/tutorials/distribute/multi_worker_with_ctl.ipynb @@ -63,11 +63,9 @@ "source": [ "## Overview\n", "\n", - "This tutorial demonstrates multi-worker training with custom training loop API, distributed via MultiWorkerMirroredStrategy, so a Keras model designed to run on [single-worker](https://www.tensorflow.org/tutorials/distribute/custom_training) can seamlessly work on multiple workers with minimal code change.\n", + "This tutorial demonstrates how to perform multi-worker distributed training with a Keras model and with [custom training loops](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) using the `tf.distribute.Strategy` API. The training loop is distributed via `tf.distribute.MultiWorkerMirroredStrategy`, such that a `tf.keras` model—designed to run on [single-worker](custom_training.ipynb)—can seamlessly work on multiple workers with minimal code changes. Custom training loops provide flexibility and a greater control on training, while also making it easier to debug the model. Learn more about [writing a basic training loop](../../guide/basic_training_loops.ipynb), [writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [custom training](../customization/custom_training_walkthrough.ipynb).\n", "\n", - "We are using custom training loops to train our model because they give us flexibility and a greater control on training. Moreover, it is easier to debug the model and the training loop. More detailed information is available in [Writing a training loop from scratch](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch).\n", - "\n", - "If you are looking for how to use `MultiWorkerMirroredStrategy` with keras `model.fit`, refer to this [tutorial](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras) instead.\n", + "If you are looking for how to use `MultiWorkerMirroredStrategy` with `tf.keras.Model.fit`, refer to this [tutorial](multi_worker_with_keras.ipynb) instead.\n", "\n", "[Distributed Training in TensorFlow](../../guide/distributed_training.ipynb) guide is available for an overview of the distribution strategies TensorFlow supports for those interested in a deeper understanding of `tf.distribute.Strategy` APIs." ] @@ -102,9 +100,8 @@ "id": "Zz0EY91y3mxy" }, "source": [ - "Before importing TensorFlow, make a few changes to the environment.\n", - "\n", - "Disable all GPUs. This prevents errors caused by the workers all trying to use the same GPU. For a real application each worker would be on a different machine." + "Before importing TensorFlow, make a few changes to the environment:\n", + "* Disable all GPUs. This prevents errors caused by all workers trying to use the same GPU. In a real-world application, each worker would be on a different machine." ] }, { @@ -124,7 +121,7 @@ "id": "7X1MS6385BWi" }, "source": [ - "Reset the `TF_CONFIG` environment variable, you'll see more about this later." + "* Reset the `'TF_CONFIG'` environment variable (you'll see more about this later)." ] }, { @@ -144,7 +141,7 @@ "id": "Rd4L9Ii77SS8" }, "source": [ - "Be sure that the current directory is on python's path. This allows the notebook to import the files written by `%%writefile` later.\n" + "* Make sure that the current directory is on Python's path. This allows the notebook to import the files written by `%%writefile` later.\n" ] }, { @@ -194,7 +191,7 @@ "id": "fLW6D2TzvC-4" }, "source": [ - "Next create an `mnist.py` file with a simple model and dataset setup. This python file will be used by the worker-processes in this tutorial:" + "Next, create an `mnist.py` file with a simple model and dataset setup. This Python file will be used by the worker-processes in this tutorial:" ] }, { @@ -230,13 +227,18 @@ " return dataset\n", "\n", "def build_cnn_model():\n", + " regularizer = tf.keras.regularizers.L2(1e-5)\n", " return tf.keras.Sequential([\n", " tf.keras.Input(shape=(28, 28)),\n", " tf.keras.layers.Reshape(target_shape=(28, 28, 1)),\n", - " tf.keras.layers.Conv2D(32, 3, activation='relu'),\n", + " tf.keras.layers.Conv2D(32, 3,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", " tf.keras.layers.Flatten(),\n", - " tf.keras.layers.Dense(128, activation='relu'),\n", - " tf.keras.layers.Dense(10)\n", + " tf.keras.layers.Dense(128,\n", + " activation='relu',\n", + " kernel_regularizer=regularizer),\n", + " tf.keras.layers.Dense(10, kernel_regularizer=regularizer)\n", " ])" ] }, @@ -246,9 +248,9 @@ "id": "JmgZwwymxqt5" }, "source": [ - "## Multi-worker Configuration\n", + "## Multi-worker configuration\n", "\n", - "Now let's enter the world of multi-worker training. In TensorFlow, the `TF_CONFIG` environment variable is required for training on multiple machines, each of which possibly has a different role. `TF_CONFIG` used below, is a JSON string used to specify the cluster configuration on each worker that is part of the cluster. This is the default method for specifying a cluster, using `cluster_resolver.TFConfigClusterResolver`, but there are other options available in the `distribute.cluster_resolver` module." + "Now let's enter the world of multi-worker training. In TensorFlow, the `'TF_CONFIG'` environment variable is required for training on multiple machines. Each machine may have a different role. The `'TF_CONFIG'` variable used below is a JSON string that specifies the cluster configuration on each worker that is part of the cluster. This is the default method for specifying a cluster, using `cluster_resolver.TFConfigClusterResolver`, but there are other options available in the `distribute.cluster_resolver` module. Learn more about setting up the `'TF_CONFIG'` variable in the [Distributed training guide](../../guide/distributed_training.ipynb)." ] }, { @@ -283,7 +285,7 @@ "id": "JjgwJbPKZkJL" }, "source": [ - "Here is the same `TF_CONFIG` serialized as a JSON string:" + "Note that `tf_config` is just a local variable in Python. To use it for training configuration, serialize it as a JSON and place it in a `'TF_CONFIG'` environment variable. Here is the same `'TF_CONFIG'` serialized as a JSON string:" ] }, { @@ -303,11 +305,11 @@ "id": "AUBmYRZqxthH" }, "source": [ - "There are two components of `TF_CONFIG`: `cluster` and `task`.\n", + "There are two components of `'TF_CONFIG'`: `'cluster'` and `'task'`.\n", "\n", - "* `cluster` is the same for all workers and provides information about the training cluster, which is a dict consisting of different types of jobs such as `worker`. In multi-worker training with `MultiWorkerMirroredStrategy`, there is usually one `worker` that takes on a little more responsibility like saving checkpoint and writing summary file for TensorBoard in addition to what a regular `worker` does. Such a worker is referred to as the `chief` worker, and it is customary that the `worker` with `index` 0 is appointed as the chief `worker` (in fact this is how `tf.distribute.Strategy` is implemented).\n", + "* `'cluster'` is the same for all workers and provides information about the training cluster, which is a dict consisting of different types of jobs such as `'worker'`. In multi-worker training with `MultiWorkerMirroredStrategy`, there is usually one `'worker'` that takes on a little more responsibility like saving checkpoints and writing summary files for TensorBoard in addition to what a regular `'worker'` does. Such a worker is referred to as the `'chief'` worker, and it is customary that the `'worker'` with `'index'` 0 is appointed as the chief `worker`.\n", "\n", - "* `task` provides information of the current task and is different on each worker. It specifies the `type` and `index` of that worker." + "* `'task'` provides information of the current task and is different on each worker. It specifies the `'type'` and `'index'` of that worker." ] }, { @@ -316,7 +318,7 @@ "id": "8YFpxrcsZ2xG" }, "source": [ - "In this example, you set the task `type` to `\"worker\"` and the task `index` to `0`. This machine is the first worker and will be appointed as the chief worker and do more work than the others. Note that other machines will need to have the `TF_CONFIG` environment variable set as well, and it should have the same `cluster` dict, but different task `type` or task `index` depending on what the roles of those machines are.\n" + "In this example, you set the task `'type'` to `'worker'` and the task `'index'` to `0`. This machine is the first worker and will be appointed as the chief worker and do more work than the others. Note that other machines will need to have the `'TF_CONFIG'` environment variable set as well, and it should have the same `'cluster'` dict, but different task `'type'` or task `'index'` depending on what the roles of those machines are.\n" ] }, { @@ -325,18 +327,9 @@ "id": "aogb74kHxynz" }, "source": [ - "For illustration purposes, this tutorial shows how one may set a `TF_CONFIG` with 2 workers on `localhost`. In practice, users would create multiple workers on external IP addresses/ports, and set `TF_CONFIG` on each worker appropriately.\n", + "For illustration purposes, this tutorial shows how one may set a `'TF_CONFIG'` with two workers on `'localhost'`. In practice, users would create multiple workers on external IP addresses/ports, and set `'TF_CONFIG'` on each worker appropriately.\n", "\n", - "In this example you will use 2 workers, the first worker's `TF_CONFIG` is shown above. For the second worker you would set `tf_config['task']['index']=1`" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "f83FVYqDX3aX" - }, - "source": [ - "Above, `tf_config` is just a local variable in python. To actually use it to configure training, this dictionary needs to be serialized as JSON, and placed in the `TF_CONFIG` environment variable." + "This example uses two workers. The first worker's `'TF_CONFIG'` is shown above. For the second worker, set `tf_config['task']['index']=1`." ] }, { @@ -354,7 +347,7 @@ "id": "FcjAbuGY1ACJ" }, "source": [ - "Subprocesses inherit environment variables from their parent. So if you set an environment variable in this `jupyter notebook` process:" + "Subprocesses inherit environment variables from their parent. So if you set an environment variable in this Jupyter Notebook process:" ] }, { @@ -374,7 +367,7 @@ "id": "gQkIX-cg18md" }, "source": [ - "You can access the environment variable from a subprocesses:" + "you can then access the environment variable from a subprocess:" ] }, { @@ -395,7 +388,7 @@ "id": "af6BCA-Y2fpz" }, "source": [ - "In the next section, you'll use this to pass the `TF_CONFIG` to the worker subprocesses. You would never really launch your jobs this way, but it's sufficient for the purposes of this tutorial: To demonstrate a minimal multi-worker example." + "In the next section, you'll use this to pass the `'TF_CONFIG'` to the worker subprocesses. You would never really launch your jobs this way, but it's sufficient for the purposes of this tutorial: To demonstrate a minimal multi-worker example." ] }, { @@ -406,7 +399,7 @@ "source": [ "## MultiWorkerMirroredStrategy\n", "\n", - "To train the model, use an instance of `tf.distribute.MultiWorkerMirroredStrategy`, which creates copies of all variables in the model's layers on each device across all workers. The [`tf.distribute.Strategy` guide](../../guide/distributed_training.ipynb) has more details about this strategy." + "Before training the model, first create an instance of `tf.distribute.MultiWorkerMirroredStrategy`:" ] }, { @@ -426,7 +419,7 @@ "id": "N0iv7SyyAohc" }, "source": [ - "Note: `TF_CONFIG` is parsed and TensorFlow's GRPC servers are started at the time `MultiWorkerMirroredStrategy()` is called, so the `TF_CONFIG` environment variable must be set before a `tf.distribute.Strategy` instance is created. To save time in this illustrative example we have not done this so that servers do not need to start. A full example is found in the last section of this tutorial." + "Note: `'TF_CONFIG'` is parsed and TensorFlow's GRPC servers are started at the time you call `tf.distribute.MultiWorkerMirroredStrategy.` Therefore, you must set the `'TF_CONFIG'` environment variable before you instantiate a `tf.distribute.Strategy`. To save time in this illustrative example, this is not demonstrated in this tutorial, so that servers do not need to start. You can find a full example in the last section of this tutorial." ] }, { @@ -435,7 +428,7 @@ "id": "TS4S-faBHHam" }, "source": [ - "Use `tf.distribute.Strategy.scope` to specify that a strategy should be used when building your model. This puts you in the \"[cross-replica context](https://www.tensorflow.org/guide/distributed_training?hl=en#mirroredstrategy)\" for this strategy, which means the strategy is put in control of things like variable placement." + "Use `tf.distribute.Strategy.scope` to specify that a strategy should be used when building your model. This allows the strategy to control things like variable placement—it will create copies of all variables in the model's layers on each device across all workers." ] }, { @@ -459,9 +452,8 @@ }, "source": [ "## Auto-shard your data across workers\n", - "In multi-worker training, dataset sharding is not necessarily needed, however it gives you exactly-once semantics which makes more training more reproducible, i.e. training on multiple workers should be the same as training on one worker. Note: performance can be affected in some cases.\n", "\n", - "See: [`distribute_datasets_from_function`](https://www.tensorflow.org/api_docs/python/tf/distribute/Strategy?version=nightly#distribute_datasets_from_function)" + "In multi-worker training, _dataset sharding_ is needed to ensure convergence and reproducibility. Sharding means handing each worker a subset of the entire dataset—it helps create the experience similar to training on a single worker. In the example below, you're relying on the default autosharding policy of `tf.distribute`. You can also customize it by setting the `tf.data.experimental.AutoShardPolicy` of the `tf.data.experimental.DistributeOptions`. To learn more, refer to the _Sharding_ section of the [Distributed input tutorial](input.ipynb)." ] }, { @@ -487,8 +479,8 @@ "id": "rkNzSR3g60iP" }, "source": [ - "## Define Custom Training Loop and Train the model\n", - "Specify an optimizer" + "## Define a custom training loop and train the model\n", + "Specify an optimizer:" ] }, { @@ -500,7 +492,7 @@ "outputs": [], "source": [ "with strategy.scope():\n", - " # The creation of optimizer and train_accuracy will need to be in\n", + " # The creation of optimizer and train_accuracy needs to be in\n", " # `strategy.scope()` as well, since they create variables.\n", " optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)\n", " train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(\n", @@ -513,7 +505,7 @@ "id": "RmrDcAii4B5O" }, "source": [ - "Define a training step with `tf.function`\n" + "Define a training step with `tf.function`:\n" ] }, { @@ -533,11 +525,13 @@ " x, y = inputs\n", " with tf.GradientTape() as tape:\n", " predictions = multi_worker_model(x, training=True)\n", - " per_batch_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n", + " per_example_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n", " from_logits=True,\n", " reduction=tf.keras.losses.Reduction.NONE)(y, predictions)\n", - " loss = tf.nn.compute_average_loss(\n", - " per_batch_loss, global_batch_size=global_batch_size)\n", + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " model_losses = multi_worker_model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", "\n", " grads = tape.gradient(loss, multi_worker_model.trainable_variables)\n", " optimizer.apply_gradients(\n", @@ -558,7 +552,7 @@ "source": [ "### Checkpoint saving and restoring\n", "\n", - "Checkpointing implementation in a Custom Training Loop requires the user to handle it instead of using a keras callback. It allows you to save model's weights and restore them without having to save the whole model." + "As you write a custom training loop, you need to handle [checkpoint saving](../../guide/checkpoint.ipynb) manually instead of relying on a Keras callback. Note that for `MultiWorkerMirroredStrategy`, saving a checkpoint or a complete model requires the participation of all workers, because attempting to save only on the chief worker could lead to a deadlock. Workers also need to write to different paths to avoid overwriting each other. Here's an example of how to configure the directories:" ] }, { @@ -593,23 +587,13 @@ " return os.path.join(dirpath, base)" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "P7fabUIEW7-M" - }, - "source": [ - "Note: Checkpointing and Saving need to happen on each worker and they need to write to different paths as they would override each others.\n", - "If you chose to only checkpoint/save on the chief, this can lead to deadlock and is not recommended." - ] - }, { "cell_type": "markdown", "metadata": { "id": "nrcdPHtG4ObO" }, "source": [ - " Here, you'll create one `tf.train.Checkpoint` that tracks the model, which is managed by a `tf.train.CheckpointManager` so that only the latest checkpoint is preserved." + "Create one `tf.train.Checkpoint` that tracks the model, which is managed by a `tf.train.CheckpointManager`, so that only the latest checkpoints are preserved:" ] }, { @@ -627,9 +611,9 @@ " name='step_in_epoch')\n", "task_type, task_id = (strategy.cluster_resolver.task_type,\n", " strategy.cluster_resolver.task_id)\n", - "# We normally don't need to manually instantiate a ClusterSpec, but in this \n", - "# illustrative example we did not set TF_CONFIG before initializing the\n", - "# strategy. See the next section for \"real-world\" usage.\n", + "# Normally, you don't need to manually instantiate a `ClusterSpec`, but in this\n", + "# illustrative example you did not set `'TF_CONFIG'` before initializing the\n", + "# strategy. Check out the next section for \"real-world\" usage.\n", "cluster_spec = tf.train.ClusterSpec(tf_config['cluster'])\n", "\n", "checkpoint = tf.train.Checkpoint(\n", @@ -647,7 +631,7 @@ "id": "RO7cbN40XD5v" }, "source": [ - "Now, when you need to restore, you can find the latest checkpoint saved using the convenient `tf.train.latest_checkpoint` function." + "Now, when you need to restore a checkpoint, you can find the latest checkpoint saved using the convenient `tf.train.latest_checkpoint` function (or by calling `tf.train.CheckpointManager.restore_or_initialize`)." ] }, { @@ -715,7 +699,7 @@ "id": "0W1Osks466DE" }, "source": [ - "## Full code setup on workers" + "## Complete code at a glance" ] }, { @@ -724,10 +708,11 @@ "id": "jfYpmIxO6Jck" }, "source": [ - "To actually run with `MultiWorkerMirroredStrategy` you'll need to run worker processes and pass a `TF_CONFIG` to them.\n", + "To sum up all the procedures discussed so far:\n", "\n", - "Like the `mnist.py` file written earlier, here is the `main.py` that \n", - "contain the same code we walked through step by step previously in this colab, we're just writing it to a file so each of the workers will run it:" + "1. You create worker processes.\n", + "2. Pass `'TF_CONFIG'`s to the worker processes.\n", + "3. Let each work process run the script below that contains the training code." ] }, { @@ -761,7 +746,7 @@ " or (task_type == 'worker'\n", " and task_id == 0\n", " and 'chief' not in cluster_spec.as_dict()))\n", - " \n", + "\n", "def _get_temp_dir(dirpath, task_id):\n", " base_dirpath = 'workertemp_' + str(task_id)\n", " temp_dir = os.path.join(dirpath, base_dirpath)\n", @@ -781,11 +766,11 @@ "strategy = tf.distribute.MultiWorkerMirroredStrategy()\n", "\n", "with strategy.scope():\n", - " # Model building/compiling need to be within `strategy.scope()`.\n", + " # Model building/compiling need to be within `tf.distribute.Strategy.scope`.\n", " multi_worker_model = mnist.build_cnn_model()\n", "\n", " multi_worker_dataset = strategy.distribute_datasets_from_function(\n", - " lambda input_context: mnist.dataset_fn(global_batch_size, input_context)) \n", + " lambda input_context: mnist.dataset_fn(global_batch_size, input_context))\n", " optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001)\n", " train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(\n", " name='train_accuracy')\n", @@ -799,11 +784,13 @@ " x, y = inputs\n", " with tf.GradientTape() as tape:\n", " predictions = multi_worker_model(x, training=True)\n", - " per_batch_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n", + " per_example_loss = tf.keras.losses.SparseCategoricalCrossentropy(\n", " from_logits=True,\n", " reduction=tf.keras.losses.Reduction.NONE)(y, predictions)\n", - " loss = tf.nn.compute_average_loss(\n", - " per_batch_loss, global_batch_size=global_batch_size)\n", + " loss = tf.nn.compute_average_loss(per_example_loss)\n", + " model_losses = multi_worker_model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", "\n", " grads = tape.gradient(loss, multi_worker_model.trainable_variables)\n", " optimizer.apply_gradients(\n", @@ -853,7 +840,7 @@ " train_loss = total_loss / num_batches\n", " print('Epoch: %d, accuracy: %f, train_loss: %f.'\n", " %(epoch.numpy(), train_accuracy.result(), train_loss))\n", - " \n", + "\n", " train_accuracy.reset_states()\n", "\n", " checkpoint_manager.save()\n", @@ -870,7 +857,6 @@ "id": "ItVOvPN1qnZ6" }, "source": [ - "## Train and Evaluate\n", "The current directory now contains both Python files:" ] }, @@ -892,7 +878,7 @@ "id": "qmEEStPS6vR_" }, "source": [ - "So json-serialize the `TF_CONFIG` and add it to the environment variables:" + "So JSON-serialize the `'TF_CONFIG'` and add it to the environment variables:" ] }, { @@ -912,7 +898,7 @@ "id": "MsY3dQLK7jdf" }, "source": [ - "Now, you can launch a worker process that will run the `main.py` and use the `TF_CONFIG`:" + "Now, you can launch a worker process that will run the `main.py` and use the `'TF_CONFIG'`:" ] }, { @@ -950,9 +936,9 @@ "1. It uses the `%%bash` which is a [notebook \"magic\"](https://ipython.readthedocs.io/en/stable/interactive/magics.html) to run some bash commands.\n", "2. It uses the `--bg` flag to run the `bash` process in the background, because this worker will not terminate. It waits for all the workers before it starts.\n", "\n", - "The backgrounded worker process won't print output to this notebook, so the `&>` redirects its output to a file, so you can see what happened.\n", + "The backgrounded worker process won't print the output to this notebook. The `&>` redirects its output to a file, so that you can inspect what happened.\n", "\n", - "So, wait a few seconds for the process to start up:" + "Wait a few seconds for the process to start up:" ] }, { @@ -973,7 +959,7 @@ "id": "ZFPoNxg_9_Mx" }, "source": [ - "Now look what's been output to the worker's logfile so far:" + "Now, check the output to the worker's log file so far:" ] }, { @@ -1003,7 +989,7 @@ "id": "Pi8vPNNA_l4a" }, "source": [ - "So update the `tf_config` for the second worker's process to pick up:" + "Update the `tf_config` for the second worker's process to pick up:" ] }, { @@ -1045,7 +1031,7 @@ "id": "hX4FA2O2AuAn" }, "source": [ - "Now if you recheck the logs written by the first worker you'll see that it participated in training that model:" + "If you recheck the logs written by the first worker, notice that it participated in training that model:" ] }, { @@ -1068,7 +1054,7 @@ }, "outputs": [], "source": [ - "# Delete the `TF_CONFIG`, and kill any background tasks so they don't affect the next section.\n", + "# Delete the `'TF_CONFIG'`, and kill any background tasks so they don't affect the next section.\n", "os.environ.pop('TF_CONFIG', None)\n", "%killbgscripts" ] @@ -1079,9 +1065,9 @@ "id": "bhxMXa0AaZkK" }, "source": [ - "## Multi worker training in depth\n", + "## Multi-worker training in depth\n", "\n", - "This tutorial has demonstrated a `Custom Training Loop` workflow of the multi-worker setup. A detailed description of other topics is available in the [`model.fit's guide`](https://colab.sandbox.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/distribute/multi_worker_with_keras.ipynb) of the multi-worker setup and applicable to CTLs." + "This tutorial has demonstrated a custom training loop workflow of the multi-worker setup. Detailed descriptions of other topics is available in the [Multi-worker training with Keras (`tf.keras.Model.fit`)](multi_worker_with_keras.ipynb) tutorial applicable to custom training loops." ] }, { @@ -1090,10 +1076,11 @@ "id": "ega2hdOQEmy_" }, "source": [ - "## See also\n", - "1. [Distributed Training in TensorFlow](https://www.tensorflow.org/guide/distributed_training) guide provides an overview of the available distribution strategies.\n", + "## Learn more\n", + "\n", + "1. The [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide provides an overview of the available distribution strategies.\n", "2. [Official models](https://github.com/tensorflow/models/tree/master/official), many of which can be configured to run multiple distribution strategies.\n", - "3. The [Performance section](../../guide/function.ipynb) in the guide provides information about other strategies and [tools](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models.\n" + "3. The [Performance section](../../guide/function.ipynb) in the `tf.function` guide provides information about other strategies and [tools](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models.\n" ] } ], @@ -1101,8 +1088,7 @@ "colab": { "collapsed_sections": [], "name": "multi_worker_with_ctl.ipynb", - "provenance": [], - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb b/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb index b4fffa60fb4..fcee0618854 100644 --- a/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb +++ b/site/en/tutorials/distribute/multi_worker_with_estimator.ipynb @@ -186,7 +186,7 @@ "\n", "There are two components of `TF_CONFIG`: `cluster` and `task`. `cluster` provides information about the entire cluster, namely the workers and parameter servers in the cluster. `task` provides information about the current task. The first component `cluster` is the same for all workers and parameter servers in the cluster, and the second component `task` is different on each worker and parameter server and specifies its own `type` and `index`. In this example, the task `type` is `worker` and the task `index` is `0`.\n", "\n", - "For illustration purposes, this tutorial shows how to set a `TF_CONFIG` with 2 workers on `localhost`. In practice, you would create multiple workers on an external IP address and port, and set `TF_CONFIG` on each worker appropriately, i.e. modify the task `index`.\n", + "For illustration purposes, this tutorial shows how to set a `TF_CONFIG` with 2 workers on `localhost`. In practice, you would create multiple workers on an external IP address and port, and set `TF_CONFIG` on each worker appropriately, i.e., modify the task `index`.\n", "\n", "Warning: *Do not execute the following code in Colab.* TensorFlow's runtime will attempt to create a gRPC server at the specified IP address and port, which will likely fail. See the [keras version](multi_worker_with_keras.ipynb) of this tutorial for an example of how you can test run multiple workers on a single machine.\n", "\n", @@ -351,8 +351,7 @@ "Tce3stUlHN0L" ], "name": "multi_worker_with_estimator.ipynb", - "provenance": [], - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/distribute/multi_worker_with_keras.ipynb b/site/en/tutorials/distribute/multi_worker_with_keras.ipynb index 0699e1e451d..c972e8b7fb6 100644 --- a/site/en/tutorials/distribute/multi_worker_with_keras.ipynb +++ b/site/en/tutorials/distribute/multi_worker_with_keras.ipynb @@ -63,13 +63,36 @@ "source": [ "## Overview\n", "\n", - "This tutorial demonstrates how to perform multi-worker distributed training with a Keras model and the `Model.fit` API using the `tf.distribute.Strategy` API—specifically the `tf.distribute.MultiWorkerMirroredStrategy` class. With the help of this strategy, a Keras model that was designed to run on a single-worker can seamlessly work on multiple workers with minimal code changes.\n", - "\n", - "For those interested in a deeper understanding of `tf.distribute.Strategy` APIs, the [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide is available for an overview of the distribution strategies TensorFlow supports.\n", + "This tutorial demonstrates how to perform multi-worker distributed training with a Keras model and the `Model.fit` API using the `tf.distribute.MultiWorkerMirroredStrategy` API. With the help of this strategy, a Keras model that was designed to run on a single-worker can seamlessly work on multiple workers with minimal code changes.\n", "\n", "To learn how to use the `MultiWorkerMirroredStrategy` with Keras and a custom training loop, refer to [Custom training loop with Keras and MultiWorkerMirroredStrategy](multi_worker_with_ctl.ipynb).\n", "\n", - "Note that the purpose of this tutorial is to demonstrate a minimal multi-worker example with two workers." + "This tutorial contains a minimal multi-worker example with two workers for demonstration purposes." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JUdRerXg6yz3" + }, + "source": [ + "### Choose the right strategy" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YAiCV_oL63GM" + }, + "source": [ + "Before you dive in, make sure that `tf.distribute.MultiWorkerMirroredStrategy` is the right choice for your accelerator(s) and training. These are two common ways of distributing training with data parallelism:\n", + "\n", + "* _Synchronous training_, where the steps of training are synced across the workers and replicas, such as `tf.distribute.MirroredStrategy`, `tf.distribute.TPUStrategy`, and `tf.distribute.MultiWorkerMirroredStrategy`. All workers train over different slices of input data in sync, and aggregating gradients at each step.\n", + "* _Asynchronous training_, where the training steps are not strictly synced, such as `tf.distribute.experimental.ParameterServerStrategy`. All workers are independently training over the input data and updating variables asynchronously.\n", + "\n", + "If you are looking for multi-worker synchronous training without TPU, then `tf.distribute.MultiWorkerMirroredStrategy` is your choice. It creates copies of all variables in the model's layers on each device across all workers. It uses `CollectiveOps`, a TensorFlow op for collective communication, to aggregate gradients and keeps the variables in sync. For those interested, check out the `tf.distribute.experimental.CommunicationOptions` parameter for the collective implementation options.\n", + "\n", + "For an overview of `tf.distribute.Strategy` APIs, refer to [Distributed training in TensorFlow](../../guide/distributed_training.ipynb)." ] }, { @@ -104,14 +127,14 @@ "source": [ "Before importing TensorFlow, make a few changes to the environment:\n", "\n", - "1. Disable all GPUs. This prevents errors caused by the workers all trying to use the same GPU. In a real-world application, each worker would be on a different machine." + "* In a real-world application, each worker would be on a different machine. For the purposes of this tutorial, all the workers will run on the **this** machine. Therefore, disable all GPUs to prevent errors caused by all workers trying to use the same GPU." ] }, { "cell_type": "code", "execution_count": null, "metadata": { - "id": "685pbYEY3jGC" + "id": "rpEIVI5upIzM" }, "outputs": [], "source": [ @@ -124,7 +147,7 @@ "id": "7X1MS6385BWi" }, "source": [ - "2. Reset the `TF_CONFIG` environment variable (you'll learn more about this later):" + "* Reset the `TF_CONFIG` environment variable (you'll learn more about this later):" ] }, { @@ -144,7 +167,7 @@ "id": "Rd4L9Ii77SS8" }, "source": [ - "3. Make sure that the current directory is on Python's path—this allows the notebook to import the files written by `%%writefile` later:\n" + "* Make sure that the current directory is on Python's path—this allows the notebook to import the files written by `%%writefile` later:\n" ] }, { @@ -162,10 +185,30 @@ { "cell_type": "markdown", "metadata": { - "id": "pDhHuMjb7bfU" + "id": "9hLpDZhAz2q-" }, "source": [ - "Now import TensorFlow:" + "Install `tf-nightly`, as the frequency of checkpoint saving at a particular step with the `save_freq` argument in `tf.keras.callbacks.BackupAndRestore` is introduced from TensorFlow 2.10:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-XqozLfzz30N" + }, + "outputs": [], + "source": [ + "!pip install tf-nightly" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "524e38dab658" + }, + "source": [ + "Finally, import TensorFlow:" ] }, { @@ -276,7 +319,7 @@ "\n", "### A cluster with jobs and tasks\n", "\n", - "In TensorFlow, distributed training involves: a `'cluster'`\n", + "In TensorFlow, distributed training involves a `'cluster'`\n", "with several jobs, and each of the jobs may have one or more `'task'`s.\n", "\n", "You will need the `TF_CONFIG` configuration environment variable for training on multiple machines, each of which possibly has a different role. `TF_CONFIG` is a JSON string used to specify the cluster configuration for each worker that is part of the cluster.\n", @@ -284,10 +327,10 @@ "There are two components of a `TF_CONFIG` variable: `'cluster'` and `'task'`.\n", "\n", "* A `'cluster'` is the same for all workers and provides information about the training cluster, which is a dict consisting of different types of jobs, such as `'worker'` or `'chief'`.\n", - " - In multi-worker training with `tf.distribute.MultiWorkerMirroredStrategy`, there is usually one `'worker'` that takes on responsibilities, such as saving a checkpoint and writing a summary file for TensorBoard, in addition to what a regular `'worker'` does. Such `'worker'` is referred to as the chief worker (with a job name `'chief'`).\n", - " - It is customary for the `'chief'` to have `'index'` `0` be appointed to (in fact, this is how `tf.distribute.Strategy` is implemented).\n", + " - In multi-worker training with `tf.distribute.MultiWorkerMirroredStrategy`, there is usually one `'worker'` that takes on more responsibilities, such as saving a checkpoint and writing a summary file for TensorBoard, in addition to what a regular `'worker'` does. Such `'worker'` is referred to as the chief worker (with a job name `'chief'`).\n", + " - It is customary for the worker with `'index'` `0` to be the `'chief'`.\n", "\n", - "* A `'task'` provides information of the current task and is different for each worker. It specifies the `'type'` and `'index'` of that worker.\n", + "* A `'task'` provides information on the current task and is different for each worker. It specifies the `'type'` and `'index'` of that worker.\n", "\n", "Below is an example configuration:" ] @@ -314,7 +357,7 @@ "id": "JjgwJbPKZkJL" }, "source": [ - "Here is the same `TF_CONFIG` serialized as a JSON string:" + "Note that `tf_config` is just a local variable in Python. To use it for training configuration, serialize it as a JSON and place it in a `TF_CONFIG` environment variable." ] }, { @@ -328,22 +371,13 @@ "json.dumps(tf_config)" ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "f83FVYqDX3aX" - }, - "source": [ - "Note that`tf_config` is just a local variable in Python. To be able to use it for a training configuration, this dict needs to be serialized as a JSON and placed in a `TF_CONFIG` environment variable." - ] - }, { "cell_type": "markdown", "metadata": { "id": "8YFpxrcsZ2xG" }, "source": [ - "In the example configuration above, you set the task `'type'` to `'worker'` and the task `'index'` to `0`. Therefore, this machine is the _first_ worker. It will be appointed as the `'chief'` worker and do more work than the others.\n", + "In the example configuration above, you set the task `'type'` to `'worker'` and the task `'index'` to `0`. Therefore, this machine is the _first_ worker. It will be appointed as the `'chief'` worker.\n", "\n", "Note: Other machines will need to have the `TF_CONFIG` environment variable set as well, and it should have the same `'cluster'` dict, but different task `'type'`s or task `'index'`es, depending on the roles of those machines." ] @@ -354,12 +388,8 @@ "id": "aogb74kHxynz" }, "source": [ - "For illustration purposes, this tutorial shows how you may set up a `TF_CONFIG` variable with two workers on a `localhost`.\n", - "\n", - "In practice, you would create multiple workers on external IP addresses/ports and set a `TF_CONFIG` variable on each worker accordingly.\n", - "\n", - "In this tutorial, you will use two workers:\n", - "- The first (`'chief'`) worker's `TF_CONFIG` is shown above.\n", + "In practice, you would create multiple workers on external IP addresses/ports and set a `TF_CONFIG` variable on each worker accordingly. For illustration purposes, this tutorial shows how you may set up a `TF_CONFIG` variable with two workers on a `localhost`:\n", + "- The first (`'chief'`) worker's `TF_CONFIG` as shown above.\n", "- For the second worker, you will set `tf_config['task']['index']=1`" ] }, @@ -378,9 +408,7 @@ "id": "FcjAbuGY1ACJ" }, "source": [ - "Subprocesses inherit environment variables from their parent.\n", - "\n", - "For example, you can set an environment variable in this Jupyter Notebook process as follows:" + "Subprocesses inherit environment variables from their parent. So if you set an environment variable in this Jupyter Notebook process:" ] }, { @@ -400,7 +428,7 @@ "id": "gQkIX-cg18md" }, "source": [ - "Then, you can access the environment variable from a subprocesses:" + "... then you can access the environment variable from the subprocesses:" ] }, { @@ -421,7 +449,16 @@ "id": "af6BCA-Y2fpz" }, "source": [ - "In the next section, you'll use a similar method to pass the `TF_CONFIG` to the worker subprocesses. In a real-world scenario, you wouldn't launch your jobs this way, but it's sufficient in this example." + "In the next section, you'll use this method to pass the `TF_CONFIG` to the worker subprocesses. You would never really launch your jobs this way in a real-world scenario—this tutorial is just showing how to do it with a minimal multi-worker example." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "dnDJmaRA9qnf" + }, + "source": [ + "## Train the model" ] }, { @@ -430,16 +467,7 @@ "id": "UhNtHfuxCGVy" }, "source": [ - "## Choose the right strategy\n", - "\n", - "In TensorFlow, there are two main forms of distributed training:\n", - "\n", - "* _Synchronous training_, where the steps of training are synced across the workers and replicas, and\n", - "* _Asynchronous training_, where the training steps are not strictly synced (for example, [parameter server training](parameter_server_training.ipynb)).\n", - "\n", - "This tutorial demonstrates how to perform synchronous multi-worker training using an instance of `tf.distribute.MultiWorkerMirroredStrategy`.\n", - "\n", - "`MultiWorkerMirroredStrategy` creates copies of all variables in the model's layers on each device across all workers. It uses `CollectiveOps`, a TensorFlow op for collective communication, to aggregate gradients and keep the variables in sync. The `tf.distribute.Strategy` [guide](../../guide/distributed_training.ipynb) has more details about this strategy." + "To train the model, firstly create an instance of the `tf.distribute.MultiWorkerMirroredStrategy`:" ] }, { @@ -462,23 +490,12 @@ "Note: `TF_CONFIG` is parsed and TensorFlow's GRPC servers are started at the time `MultiWorkerMirroredStrategy` is called, so the `TF_CONFIG` environment variable must be set before a `tf.distribute.Strategy` instance is created. Since `TF_CONFIG` is not set yet, the above strategy is effectively single-worker training." ] }, - { - "cell_type": "markdown", - "metadata": { - "id": "FMy2VM4Akzpr" - }, - "source": [ - "`MultiWorkerMirroredStrategy` provides multiple implementations via the `tf.distribute.experimental.CommunicationOptions` parameter: 1) `RING` implements ring-based collectives using gRPC as the cross-host communication layer; 2) `NCCL` uses the [NVIDIA Collective Communication Library](https://developer.nvidia.com/nccl) to implement collectives; and 3) `AUTO` defers the choice to the runtime. The best choice of collective implementation depends upon the number and kind of GPUs, and the network interconnect in the cluster." - ] - }, { "cell_type": "markdown", "metadata": { "id": "H47DDcOgfzm7" }, "source": [ - "## Train the model\n", - "\n", "With the integration of `tf.distribute.Strategy` API into `tf.keras`, the only change you will make to distribute the training to multiple-workers is enclosing the model building and `model.compile()` call inside `strategy.scope()`. The distribution strategy's scope dictates how and where the variables are created, and in the case of `MultiWorkerMirroredStrategy`, the variables created are `MirroredVariable`s, and they are replicated on each of the workers.\n" ] }, @@ -584,7 +601,7 @@ "id": "qmEEStPS6vR_" }, "source": [ - "So json-serialize the `TF_CONFIG` and add it to the environment variables:" + "Serialize the `TF_CONFIG` to JSON and add it to the environment variables:" ] }, { @@ -686,7 +703,7 @@ "id": "RqZhVF7L_KOy" }, "source": [ - "The last line of the log file should say: `Started server with target: grpc://localhost:12345`. The first worker is now ready, and is waiting for all the other worker(s) to be ready to proceed." + "The last line of the log file should say: `Started server with target: grpc://localhost:12345`. The first worker is now ready and is waiting for all the other worker(s) to be ready to proceed." ] }, { @@ -758,11 +775,7 @@ "id": "zL79ak5PMzEg" }, "source": [ - "Unsurprisingly, this ran _slower_ than the test run at the beginning of this tutorial.\n", - "\n", - "Running multiple workers on a single machine only adds overhead.\n", - "\n", - "The goal here was not to improve the training time, but only to give an example of multi-worker training." + "Note: This may run slower than the test run at the beginning of this tutorial because running multiple workers on a single machine only adds overhead. The goal here is not to improve the training time but to give an example of multi-worker training.\n" ] }, { @@ -784,11 +797,16 @@ "id": "9j2FJVHoUIrE" }, "source": [ - "## Multi-worker training in depth\n", - "\n", - "So far, you have learned how to perform a basic multi-worker setup.\n", - "\n", - "During the rest of the tutorial, you will learn about other factors, which may be useful or important for real use cases, in detail." + "## Multi-worker training in depth\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C1hBks_dAZmT" + }, + "source": [ + "So far, you have learned how to perform a basic multi-worker setup. The rest of the tutorial goes over other factors, which may be useful or important for real use cases, in detail." ] }, { @@ -824,21 +842,37 @@ "dataset_no_auto_shard = multi_worker_dataset.with_options(options)" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "z85hElxsBQsT" + }, + "source": [ + "### Evaluation" + ] + }, { "cell_type": "markdown", "metadata": { "id": "gmqvlh5LhAoU" }, "source": [ - "### Evaluation\n", - "\n", - "If you pass the `validation_data` into `Model.fit`, it will alternate between training and evaluation for each epoch. The evaluation taking the `validation_data` is distributed across the same set of workers and the evaluation results are aggregated and available for all workers.\n", + "If you pass the `validation_data` into `Model.fit` as well, it will alternate between training and evaluation for each epoch. The evaluation work is distributed across the same set of workers, and its results are aggregated and available to all workers.\n", "\n", "Similar to training, the validation dataset is automatically sharded at the file level. You need to set a global batch size in the validation dataset and set the `validation_steps`.\n", "\n", - "A repeated dataset is also recommended for evaluation.\n", + "A repeated dataset (by calling `tf.data.Dataset.repeat`) is recommended for evaluation.\n", "\n", - "Alternatively, you can also create another task that periodically reads checkpoints and runs the evaluation. This is what Estimator does. But this is not a recommended way to perform evaluation and thus its details are omitted." + "Alternatively, you can also create another task that periodically reads checkpoints and runs the evaluation. This is what an Estimator does. But this is not a recommended way to perform evaluation and thus its details are omitted." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FNkoxUPJBNTb" + }, + "source": [ + "### Performance" ] }, { @@ -847,25 +881,21 @@ "id": "XVk4ftYx6JAO" }, "source": [ - "### Performance\n", - "\n", - "You now have a Keras model that is all set up to run in multiple workers with the `MultiWorkerMirroredStrategy`.\n", - "\n", - "To tweak performance of multi-worker training, you can try the following:\n", + "To tweak the performance of multi-worker training, you can try the following:\n", "\n", "- `tf.distribute.MultiWorkerMirroredStrategy` provides multiple [collective communication implementations](https://www.tensorflow.org/api_docs/python/tf/distribute/experimental/CommunicationImplementation):\n", " - `RING` implements ring-based collectives using gRPC as the cross-host communication layer.\n", " - `NCCL` uses the [NVIDIA Collective Communication Library](https://developer.nvidia.com/nccl) to implement collectives.\n", " - `AUTO` defers the choice to the runtime.\n", " \n", - " The best choice of collective implementation depends upon the number of GPUs, the type of GPUs, and the network interconnect in the cluster. To override the automatic choice, specify the `communication_options` parameter of `MultiWorkerMirroredStrategy`'s constructor. For example:\n", + " The best choice of collective implementation depends upon the number of GPUs, the type of GPUs, and the network interconnects in the cluster. To override the automatic choice, specify the `communication_options` parameter of `MultiWorkerMirroredStrategy`'s constructor. For example:\n", " \n", " ```python\n", - " communication_options=tf.distribute.experimental.CommunicationOptions(implementation=tf.distribute.experimental.CollectiveCommunication.NCCL)\n", + " communication_options=tf.distribute.experimental.CommunicationOptions(implementation=tf.distribute.experimental.CommunicationImplementation.NCCL)\n", " ```\n", "\n", "- Cast the variables to `tf.float` if possible:\n", - " - The official ResNet model includes [an example](https://github.com/tensorflow/models/blob/8367cf6dabe11adf7628541706b660821f397dce/official/resnet/resnet_model.py#L466) of how this can be done." + " - The official ResNet model includes [an example](https://github.com/tensorflow/models/blob/8367cf6dabe11adf7628541706b660821f397dce/official/resnet/resnet_model.py#L466) of how to do this." ] }, { @@ -882,7 +912,7 @@ "\n", "When a worker becomes unavailable, other workers will fail (possibly after a timeout). In such cases, the unavailable worker needs to be restarted, as well as other workers that have failed.\n", "\n", - "Note: Previously, the `ModelCheckpoint` callback provided a mechanism to restore the training state upon a restart from a job failure for multi-worker training. The TensorFlow team are introducing a new [`BackupAndRestore`](#scrollTo=kmH8uCUhfn4w) callback, which also adds the support to single-worker training for a consistent experience, and removed the fault tolerance functionality from existing `ModelCheckpoint` callback. From now on, applications that rely on this behavior should migrate to the new `BackupAndRestore` callback." + "Note: Previously, the `ModelCheckpoint` callback provided a mechanism to restore the training state upon a restart from a job failure for multi-worker training. The TensorFlow team is introducing a new [`BackupAndRestore`](#scrollTo=kmH8uCUhfn4w) callback, which also adds the support to single-worker training for a consistent experience, and removed the fault tolerance functionality from existing `ModelCheckpoint` callback. From now on, applications that rely on this behavior should migrate to the new `BackupAndRestore` callback." ] }, { @@ -891,13 +921,13 @@ "id": "KvHPjGlyyFt6" }, "source": [ - "#### ModelCheckpoint callback\n", + "#### The `ModelCheckpoint` callback\n", "\n", "`ModelCheckpoint` callback no longer provides fault tolerance functionality, please use [`BackupAndRestore`](#scrollTo=kmH8uCUhfn4w) callback instead.\n", "\n", "The `ModelCheckpoint` callback can still be used to save checkpoints. But with this, if training was interrupted or successfully finished, in order to continue training from the checkpoint, the user is responsible to load the model manually.\n", "\n", - "Optionally the user can choose to save and restore model/weights outside `ModelCheckpoint` callback." + "Optionally, users can choose to save and restore model/weights outside `ModelCheckpoint` callback." ] }, { @@ -919,14 +949,14 @@ "\n", "You should have some cleanup logic that deletes the temporary directories created by the workers once your training has completed.\n", "\n", - "The reason for saving on the chief and workers at the same time is because you might be aggregating variables during checkpointing which requires both the chief and workers to participate in the allreduce communication protocol. On the other hand, letting chief and workers save to the same model directory will result in errors due to contention.\n", + "The reason for saving on the chief and workers at the same time is because you might be aggregating variables during checkpointing, which requires both the chief and workers to participate in the allreduce communication protocol. On the other hand, letting chief and workers save to the same model directory will result in errors due to contention.\n", "\n", - "Using the `MultiWorkerMirroredStrategy`, the program is run on every worker, and in order to know whether the current worker is chief, it takes advantage of the cluster resolver object that has attributes `task_type` and `task_id`:\n", - "- `task_type` tells you what the current job is (e.g. `'worker'`).\n", + "Using the `MultiWorkerMirroredStrategy`, the program is run on every worker, and in order to know whether the current worker is the chief, it takes advantage of the cluster resolver object that has attributes `task_type` and `task_id`:\n", + "- `task_type` tells you what the current job is (for example, `'worker'`).\n", "- `task_id` tells you the identifier of the worker.\n", "- The worker with `task_id == 0` is designated as the chief worker.\n", "\n", - "In the code snippet below, the `write_filepath` function provides the file path to write, which depends on the the worker's `task_id`:\n", + "In the code snippet below, the `write_filepath` function provides the file path to write, which depends on the worker's `task_id`:\n", "\n", "- For the chief worker (with `task_id == 0`), it writes to the original file path. \n", "- For other workers, it creates a temporary directory—`temp_dir`—with the `task_id` in the directory path to write in:" @@ -943,14 +973,14 @@ "model_path = '/tmp/keras-model'\n", "\n", "def _is_chief(task_type, task_id):\n", - " # Note: there are two possible `TF_CONFIG` configuration.\n", + " # Note: there are two possible `TF_CONFIG` configurations.\n", " # 1) In addition to `worker` tasks, a `chief` task type is use;\n", " # in this case, this function should be modified to\n", " # `return task_type == 'chief'`.\n", " # 2) Only `worker` task type is used; in this case, worker 0 is\n", " # regarded as the chief. The implementation demonstrated here\n", " # is for this case.\n", - " # For the purpose of this Colab section, the `task_type is None` case\n", + " # For the purpose of this Colab section, the `task_type` is `None` case\n", " # is added because it is effectively run with only a single worker.\n", " return (task_type == 'worker' and task_id == 0) or task_type is None\n", "\n", @@ -981,6 +1011,15 @@ "With that, you're now ready to save:" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "XnToxeIcg_6O" + }, + "source": [ + "Deprecated: For Keras objects, it's recommended to use the new high-level `.keras` format and `tf.keras.Model.export`, as demonstrated in the guide [here](https://www.tensorflow.org/guide/keras/save_and_serialize). The low-level SavedModel format continues to be supported for existing code." + ] + }, { "cell_type": "code", "execution_count": null, @@ -998,7 +1037,7 @@ "id": "8LXUVVl9_v5x" }, "source": [ - "As described above, later on the model should only be loaded from the path chief saved to, so let's remove the temporary ones the non-chief workers saved:" + "As described above, later on the model should only be loaded from the file path the chief worker saved to. Therefore, remove the temporary ones the non-chief workers have saved:" ] }, { @@ -1019,7 +1058,7 @@ "id": "Nr-2PKlHAPBT" }, "source": [ - "Now, when it's time to load, let's use convenient `tf.keras.models.load_model` API, and continue with further work.\n", + "Now, when it's time to load, use the convenient `tf.keras.models.load_model` API, and continue with further work.\n", "\n", "Here, assume only using single worker to load and continue training, in which case you do not call `tf.keras.models.load_model` within another `strategy.scope()` (note that `strategy = tf.distribute.MultiWorkerMirroredStrategy()`, as defined earlier):" ] @@ -1117,15 +1156,17 @@ "id": "kmH8uCUhfn4w" }, "source": [ - "#### BackupAndRestore callback\n", + "#### The `BackupAndRestore` callback\n", + "\n", + "The `tf.keras.callbacks.BackupAndRestore` callback provides the fault tolerance functionality by backing up the model and current training state in a temporary checkpoint file under `backup_dir` argument to `BackupAndRestore`. \n", "\n", - "The `tf.keras.callbacks.BackupAndRestore` callback provides the fault tolerance functionality by backing up the model and current epoch number in a temporary checkpoint file under `backup_dir` argument to `BackupAndRestore`. This is done at the end of each epoch.\n", + "Note: In Tensorflow 2.9, the current model and the training state is backed up at epoch boundaries. In the `tf-nightly` version and from TensorFlow 2.10, the `BackupAndRestore` callback can back up the model and the training state at epoch or step boundaries. `BackupAndRestore` accepts an optional `save_freq` argument. `save_freq` accepts either `'epoch'` or an `int` value. If `save_freq` is set to `'epoch'` the model is backed up after every epoch. If `save_freq` is set to an integer value greater than `0`, the model is backed up after every `save_freq` number of batches.\n", "\n", - "Once jobs get interrupted and restart, the callback restores the last checkpoint, and training continues from the beginning of the interrupted epoch. Any partial training already done in the unfinished epoch before interruption will be thrown away, so that it doesn't affect the final model state.\n", + "Once the jobs get interrupted and restarted, the `BackupAndRestore` callback restores the last checkpoint, and you can continue training from the beginning of the epoch and step at which the training state was last saved.\n", "\n", "To use it, provide an instance of `tf.keras.callbacks.BackupAndRestore` at the `Model.fit` call.\n", "\n", - "With `MultiWorkerMirroredStrategy`, if a worker gets interrupted, the whole cluster pauses until the interrupted worker is restarted. Other workers will also restart, and the interrupted worker rejoins the cluster. Then, every worker reads the checkpoint file that was previously saved and picks up its former state, thereby allowing the cluster to get back in sync. Then, the training continues.\n", + "With `MultiWorkerMirroredStrategy`, if a worker gets interrupted, the whole cluster will pause until the interrupted worker is restarted. Other workers will also restart, and the interrupted worker will rejoin the cluster. Then, every worker will read the checkpoint file that was previously saved and pick up its former state, thereby allowing the cluster to get back in sync. Then, the training will continue. The distributed dataset iterator state will be re-initialized and not restored.\n", "\n", "The `BackupAndRestore` callback uses the `CheckpointManager` to save and restore the training state, which generates a file called checkpoint that tracks existing checkpoints together with the latest one. For this reason, `backup_dir` should not be re-used to store other checkpoints in order to avoid name collision.\n", "\n", @@ -1143,7 +1184,8 @@ "outputs": [], "source": [ "# Multi-worker training with `MultiWorkerMirroredStrategy`\n", - "# and the `BackupAndRestore` callback.\n", + "# and the `BackupAndRestore` callback. The training state \n", + "# is backed up at epoch boundaries by default.\n", "\n", "callbacks = [tf.keras.callbacks.BackupAndRestore(backup_dir='/tmp/backup')]\n", "with strategy.scope():\n", @@ -1154,6 +1196,66 @@ " callbacks=callbacks)" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "f8e86TAp0Rsl" + }, + "source": [ + "If the `save_freq` argument in the `BackupAndRestore` callback is set to `'epoch'`, the model is backed up after every epoch." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rZjQGPsF0aEI" + }, + "outputs": [], + "source": [ + "# The training state is backed up at epoch boundaries because `save_freq` is\n", + "# set to `epoch`.\n", + "\n", + "callbacks = [tf.keras.callbacks.BackupAndRestore(backup_dir='/tmp/backup')]\n", + "with strategy.scope():\n", + " multi_worker_model = mnist_setup.build_and_compile_cnn_model()\n", + "multi_worker_model.fit(multi_worker_dataset,\n", + " epochs=3,\n", + " steps_per_epoch=70,\n", + " callbacks=callbacks)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "p-r44kCM0jc6" + }, + "source": [ + "Note: The next code block uses features that are only available in `tf-nightly` until Tensorflow 2.10 is released.\n", + "\n", + "If the `save_freq` argument in the `BackupAndRestore` callback is set to an integer value greater than `0`, the model is backed up after every `save_freq` number of batches." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bSJUyLSF0moC" + }, + "outputs": [], + "source": [ + "# The training state is backed up at every 30 steps because `save_freq` is set\n", + "# to an integer value of `30`.\n", + "\n", + "callbacks = [tf.keras.callbacks.BackupAndRestore(backup_dir='/tmp/backup', save_freq=30)]\n", + "with strategy.scope():\n", + " multi_worker_model = mnist_setup.build_and_compile_cnn_model()\n", + "multi_worker_model.fit(multi_worker_dataset,\n", + " epochs=3,\n", + " steps_per_epoch=70,\n", + " callbacks=callbacks)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1162,7 +1264,7 @@ "source": [ "If you inspect the directory of `backup_dir` you specified in `BackupAndRestore`, you may notice some temporarily generated checkpoint files. Those files are needed for recovering the previously lost instances, and they will be removed by the library at the end of `Model.fit` upon successful exiting of your training.\n", "\n", - "Note: Currently the `BackupAndRestore` callback only supports eager mode. In graph mode, consider using [Save/Restore Model](#model_saving_and_loading) mentioned above, and by providing `initial_epoch` in `Model.fit`." + "Note: Currently the `BackupAndRestore` callback only supports eager mode. In graph mode, consider using `Model.save`/`tf.saved_model.save` and `tf.keras.models.load_model` for saving and restoring models, respectively, as described in the _Model saving and loading_ section above, and by providing `initial_epoch` in `Model.fit` during training." ] }, { @@ -1173,7 +1275,7 @@ "source": [ "## Additional resources\n", "\n", - "1. The [Distributed training in TensorFlow](https://www.tensorflow.org/guide/distributed_training) guide provides an overview of the available distribution strategies.\n", + "1. The [Distributed training in TensorFlow](../../guide/distributed_training.ipynb) guide provides an overview of the available distribution strategies.\n", "1. The [Custom training loop with Keras and MultiWorkerMirroredStrategy](multi_worker_with_ctl.ipynb) tutorial shows how to use the `MultiWorkerMirroredStrategy` with Keras and a custom training loop.\n", "1. Check out the [official models](https://github.com/tensorflow/models/tree/master/official), many of which can be configured to run multiple distribution strategies.\n", "1. The [Better performance with tf.function](../../guide/function.ipynb) guide provides information about other strategies and tools, such as the [TensorFlow Profiler](../../guide/profiler.md) you can use to optimize the performance of your TensorFlow models." @@ -1182,9 +1284,8 @@ ], "metadata": { "colab": { - "collapsed_sections": [], "name": "multi_worker_with_keras.ipynb", - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/distribute/parameter_server_training.ipynb b/site/en/tutorials/distribute/parameter_server_training.ipynb index 0edbd218744..2e6bb0cfce2 100644 --- a/site/en/tutorials/distribute/parameter_server_training.ipynb +++ b/site/en/tutorials/distribute/parameter_server_training.ipynb @@ -74,7 +74,7 @@ "\n", "A parameter server training cluster consists of _workers_ and _parameter servers_. Variables are created on parameter servers and they are read and updated by workers in each step. By default, workers read and update these variables independently without synchronizing with each other. This is why sometimes parameter server-style training is called _asynchronous training_.\n", "\n", - "In TensorFlow 2, parameter server training is powered by the `tf.distribute.experimental.ParameterServerStrategy` class, which distributes the training steps to a cluster that scales up to thousands of workers (accompanied by parameter servers)." + "In TensorFlow 2, parameter server training is powered by the `tf.distribute.ParameterServerStrategy` class, which distributes the training steps to a cluster that scales up to thousands of workers (accompanied by parameter servers)." ] }, { @@ -87,9 +87,9 @@ "\n", "There are two main supported training methods:\n", "\n", - "- The Keras `Model.fit` API, which is recommended when you prefer a high-level abstraction and handling of training.\n", - "- A custom training loop (you can refer to [Custom training](https://www.tensorflow.org/tutorials/customization/custom_training_walkthrough#train_the_model), [Writing a training loop from scratch\n", - "](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [Custom training loop with Keras and MultiWorkerMirroredStrategy](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_ctl) for more details.) Custom loop training is recommended when you prefer to define the details of their training loop." + "- The Keras `Model.fit` API: if you prefer a high-level abstraction and handling of training. This is generally recommended if you are training a `tf.keras.Model`.\n", + "- A custom training loop: if you prefer to define the details of your training loop (you can refer to guides on [Custom training](../customization/custom_training_walkthrough.ipynb), [Writing a training loop from scratch\n", + "](https://www.tensorflow.org/guide/keras/writing_a_training_loop_from_scratch) and [Custom training loop with Keras and MultiWorkerMirroredStrategy](multi_worker_with_ctl.ipynb) for more details)." ] }, { @@ -100,12 +100,12 @@ "source": [ "### A cluster with jobs and tasks\n", "\n", - "Regardless of the API of choice (`Model.fit` or a custom training loop), distributed training in TensorFlow 2 involves: a `'cluster'` with several `'jobs'`, and each of the jobs may have one or more `'tasks'`.\n", + "Regardless of the API of choice (`Model.fit` or a custom training loop), distributed training in TensorFlow 2 involves a `'cluster'` with several `'jobs'`, and each of the jobs may have one or more `'tasks'`.\n", "\n", "When using parameter server training, it is recommended to have:\n", "\n", "- One _coordinator_ job (which has the job name `chief`)\n", - "- Multiple _worker_ jobs (job name `worker`); and\n", + "- Multiple _worker_ jobs (job name `worker`)\n", "- Multiple _parameter server_ jobs (job name `ps`)\n", "\n", "The _coordinator_ creates resources, dispatches training tasks, writes checkpoints, and deals with task failures. The _workers_ and _parameter servers_ run `tf.distribute.Server` instances that listen for requests from the coordinator." @@ -117,10 +117,9 @@ "id": "oLV1FbpLtqtB" }, "source": [ - "### Parameter server training with `Model.fit` API\n", + "### Parameter server training with the `Model.fit` API\n", "\n", - "Parameter server training with the `Model.fit` API requires the coordinator to use a `tf.distribute.experimental.ParameterServerStrategy` object, and a `tf.keras.utils.experimental.DatasetCreator` as the input. Similar to `Model.fit` usage with no strategy, or with other strategies, the workflow involves creating and compiling the model, preparing the callbacks, followed by\n", - "a `Model.fit` call." + "Parameter server training with the `Model.fit` API requires the coordinator to use a `tf.distribute.ParameterServerStrategy` object. Similar to `Model.fit` usage with no strategy, or with other strategies, the workflow involves creating and compiling the model, preparing the callbacks, and calling `Model.fit`." ] }, { @@ -131,12 +130,11 @@ "source": [ "### Parameter server training with a custom training loop\n", "\n", - "With custom training loops, the `tf.distribute.experimental.coordinator.ClusterCoordinator` class is the key component used for the coordinator.\n", + "With custom training loops, the `tf.distribute.coordinator.ClusterCoordinator` class is the key component used for the coordinator.\n", "\n", - "- The `ClusterCoordinator` class needs to work in conjunction with a `tf.distribute.Strategy` object.\n", - "- This `tf.distribute.Strategy` object is needed to provide the information of the cluster and is used to define a training step, as demonstrated in [Custom training with tf.distribute.Strategy](https://www.tensorflow.org/tutorials/distribute/custom_training#training_loop).\n", + "- The `ClusterCoordinator` class needs to work in conjunction with a `tf.distribute.ParameterServerStrategy` object.\n", + "- This `tf.distribute.Strategy` object is needed to provide the information of the cluster and is used to define a training step, as demonstrated in [Custom training with tf.distribute.Strategy](custom_training.ipynb).\n", "- The `ClusterCoordinator` object then dispatches the execution of these training steps to remote workers.\n", - "- For parameter server training, the `ClusterCoordinator` needs to work with a `tf.distribute.experimental.ParameterServerStrategy`.\n", "\n", "The most important API provided by the `ClusterCoordinator` object is `schedule`:\n", "\n", @@ -144,7 +142,7 @@ "- The queued functions will be dispatched to remote workers in background threads and their `RemoteValue`s will be filled asynchronously.\n", "- Since `schedule` doesn’t require worker assignment, the `tf.function` passed in can be executed on any available worker.\n", "- If the worker it is executed on becomes unavailable before its completion, the function will be retried on another available worker.\n", - "- Because of this fact and the fact that function execution is not atomic, a function may be executed more than once.\n", + "- Because of this fact and the fact that function execution is not atomic, a single function call may be executed more than once.\n", "\n", "In addition to dispatching remote functions, the `ClusterCoordinator` also helps\n", "to create datasets on all the workers and rebuild these datasets when a worker recovers from failure." @@ -196,9 +194,9 @@ "source": [ "## Cluster setup\n", "\n", - "As mentioned above, a parameter server training cluster requires a coordinator task that runs your training program, one or several workers and parameter server tasks that run TensorFlow servers—`tf.distribute.Server`—and possibly an additional evaluation task that runs side-car evaluation (see the side-car evaluation section below). The requirements to set them up are:\n", + "As mentioned above, a parameter server training cluster requires a coordinator task that runs your training program, one or several workers and parameter server tasks that run TensorFlow servers—`tf.distribute.Server`—and possibly an additional evaluation task that runs sidecar evaluation (refer to the [sidecar evaluation section](#sidecar_evaluation) below). The requirements to set them up are:\n", "\n", - "- The coordinator task needs to know the addresses and ports of all other TensorFlow servers except the evaluator.\n", + "- The coordinator task needs to know the addresses and ports of all other TensorFlow servers, except the evaluator.\n", "- The workers and parameter servers need to know which port they need to listen to. For the sake of simplicity, you can usually pass in the complete cluster information when creating TensorFlow servers on these tasks.\n", "- The evaluator task doesn’t have to know the setup of the training cluster. If it does, it should not attempt to connect to the training cluster.\n", "- Workers and parameter servers should have task types as `\"worker\"` and `\"ps\"`, respectively. The coordinator should use `\"chief\"` as the task type for legacy reasons.\n", @@ -214,7 +212,7 @@ "source": [ "### In-process cluster\n", "\n", - "You will start by creating several TensorFlow servers in advance and connect to them later. Note that this is only for the purpose of this tutorial's demonstration, and in real training the servers will be started on `\"worker\"` and `\"ps\"` machines." + "You will start by creating several TensorFlow servers in advance and you will connect to them later. Note that this is only for the purpose of this tutorial's demonstration, and in real training the servers will be started on `\"worker\"` and `\"ps\"` machines." ] }, { @@ -276,9 +274,9 @@ "id": "pX_91OByt0J2" }, "source": [ - "The in-process cluster setup is frequently used in unit testing, such as [here](https://github.com/tensorflow/tensorflow/blob/7621d31921c2ed979f212da066631ddfda37adf5/tensorflow/python/distribute/coordinator/cluster_coordinator_test.py#L437).\n", + "The in-process cluster setup is frequently used in unit testing, such as [here](https://github.com/tensorflow/tensorflow/blob/eb4c40fc91da260199fa2aed6fe67d36ad49fafd/tensorflow/python/distribute/coordinator/cluster_coordinator_test.py#L447).\n", "\n", - "Another option for local testing is to launch processes on the local machine—check out [Multi-worker training with Keras](https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras) for an example of this approach." + "Another option for local testing is to launch processes on the local machine—check out [Multi-worker training with Keras](multi_worker_with_keras.ipynb) for an example of this approach." ] }, { @@ -289,7 +287,7 @@ "source": [ "## Instantiate a ParameterServerStrategy\n", "\n", - "Before you dive into the training code, let's instantiate a `ParameterServerStrategy` object. Note that this is needed regardless of whether you are proceeding with `Model.fit` or a custom training loop. The `variable_partitioner` argument will be explained in the [Variable sharding section](#variable-sharding)." + "Before you dive into the training code, let's instantiate a `tf.distribute.ParameterServerStrategy` object. Note that this is needed regardless of whether you are proceeding with `Model.fit` or a custom training loop. The `variable_partitioner` argument will be explained in the [Variable sharding section](#variable_sharding)." ] }, { @@ -305,7 +303,7 @@ " min_shard_bytes=(256 << 10),\n", " max_shards=NUM_PS))\n", "\n", - "strategy = tf.distribute.experimental.ParameterServerStrategy(\n", + "strategy = tf.distribute.ParameterServerStrategy(\n", " cluster_resolver,\n", " variable_partitioner=variable_partitioner)" ] @@ -328,7 +326,8 @@ "### Variable sharding\n", "\n", "Variable sharding refers to splitting a variable into multiple smaller\n", - "variables, which are called _shards_. Variable sharding may be useful to distribute the network load when accessing these shards. It is also useful to distribute computation and storage of a normal variable across multiple parameter servers.\n", + "variables, which are called _shards_. Variable sharding may be useful to distribute the network load when accessing these shards. It is also useful to distribute computation and storage of a normal variable across multiple parameter servers, for example, when using very large embeddings\n", + "that may not fit in a single machine's memory.\n", "\n", "To enable variable sharding, you can pass in a `variable_partitioner` when\n", "constructing a `ParameterServerStrategy` object. The `variable_partitioner` will\n", @@ -337,7 +336,7 @@ "`variable_partitioner`s are provided such as\n", "`tf.distribute.experimental.partitioners.MinSizePartitioner`. It is recommended to use size-based partitioners like\n", "`tf.distribute.experimental.partitioners.MinSizePartitioner` to avoid\n", - "partitioning small variables, which could have negative impact on model training\n", + "partitioning small variables, which could have a negative impact on model training\n", "speed." ] }, @@ -347,8 +346,8 @@ "id": "1--SxlxtsOb7" }, "source": [ - "When a `variable_partitioner` is passed in and if you create a variable directly\n", - "under `Strategy.scope`, it will become a container type with a `variables`\n", + "When a `variable_partitioner` is passed in, and you create a variable directly\n", + "under `Strategy.scope`, the variable will become a container type with a `variables`\n", "property, which provides access to the list of shards. In most cases, this\n", "container will be automatically converted to a Tensor by concatenating all the\n", "shards. As a result, it can be used as a normal variable. On the other hand,\n", @@ -356,7 +355,7 @@ "implementation for this container type and in these methods automatic\n", "concatenation will be avoided.\n", "\n", - "Refer to the API docs of `tf.distribute.experimental.ParameterServerStrategy` for more details." + "Refer to the API docs of `tf.distribute.ParameterServerStrategy` for more details." ] }, { @@ -368,7 +367,7 @@ "## Training with `Model.fit`\n", "\n", "\n", - "Keras provides an easy-to-use training API via `Model.fit` that handles the training loop under the hood, with the flexibility of overridable `train_step`, and callbacks, which provide functionalities such as checkpoint saving or summary saving for TensorBoard. With `Model.fit`, the same training code can be used for other strategies with a simple swap of the strategy object." + "Keras provides an easy-to-use training API via `Model.fit` that handles the training loop under the hood, with the flexibility of an overridable `train_step`, and callbacks which provide functionalities such as checkpoint saving or summary saving for TensorBoard. With `Model.fit`, the same training code can be used with other strategies with a simple swap of the strategy object." ] }, { @@ -379,12 +378,14 @@ "source": [ "### Input data\n", "\n", - "`Model.fit` with parameter server training requires that the input data be\n", - "provided in a callable that takes a single argument of type `tf.distribute.InputContext`, and returns a `tf.data.Dataset`. Then, create a `tf.keras.utils.experimental.DatasetCreator` object that takes such `callable`, and an optional `tf.distribute.InputOptions` object via `input_options` argument.\n", + "Keras `Model.fit` with `tf.distribute.ParameterServerStrategy` can take input data in the form of a `tf.data.Dataset`, `tf.distribute.DistributedDataset`, or a `tf.keras.utils.experimental.DatasetCreator`, with `Dataset` being the recommended option for ease of use. If you encounter memory issues using `Dataset`, however, you may need to use `DatasetCreator` with a callable `dataset_fn` argument (refer to the `tf.keras.utils.experimental.DatasetCreator` API documentation for details).\n", "\n", - "Note that it is recommended to shuffle and repeat the data with parameter server training, and specify `steps_per_epoch` in `fit` call so the library knows the epoch boundaries.\n", + "If you transform your dataset into a `tf.data.Dataset`, you should use `Dataset.shuffle` and `Dataset.repeat`, as demonstrated in the code example below.\n", "\n", - "Refer to the [Distributed input](https://www.tensorflow.org/tutorials/distribute/input#usage_2) tutorial for more information about the `InputContext` argument." + "- Keras `Model.fit` with parameter server training assumes that each worker receives the same dataset, except when it is shuffled differently. Therefore, by calling `Dataset.shuffle`, you ensure more even iterations over the data.\n", + "- Because workers do not synchronize, they may finish processing their datasets at different times. Therefore, the easiest way to define epochs with parameter server training is to use `Dataset.repeat`—which repeats a dataset indefinitely when called without an argument—and specify the `steps_per_epoch` argument in the `Model.fit` call.\n", + "\n", + "Refer to the \"Training workflows\" section of the [tf.data guide](../../guide/data.ipynb) for more details on `shuffle` and `repeat`." ] }, { @@ -395,23 +396,14 @@ }, "outputs": [], "source": [ - "def dataset_fn(input_context):\n", - " global_batch_size = 64\n", - " batch_size = input_context.get_per_replica_batch_size(global_batch_size)\n", - "\n", - " x = tf.random.uniform((10, 10))\n", - " y = tf.random.uniform((10,))\n", - "\n", - " dataset = tf.data.Dataset.from_tensor_slices((x, y)).shuffle(10).repeat()\n", - " dataset = dataset.shard(\n", - " input_context.num_input_pipelines,\n", - " input_context.input_pipeline_id)\n", - " dataset = dataset.batch(batch_size)\n", - " dataset = dataset.prefetch(2)\n", + "global_batch_size = 64\n", "\n", - " return dataset\n", + "x = tf.random.uniform((10, 10))\n", + "y = tf.random.uniform((10,))\n", "\n", - "dc = tf.keras.utils.experimental.DatasetCreator(dataset_fn)" + "dataset = tf.data.Dataset.from_tensor_slices((x, y)).shuffle(10).repeat()\n", + "dataset = dataset.batch(global_batch_size)\n", + "dataset = dataset.prefetch(2)" ] }, { @@ -420,7 +412,7 @@ "id": "v_jhF70K7zON" }, "source": [ - "The code in `dataset_fn` will be invoked on the input device, which is usually the CPU, on each of the worker machines.\n" + "If you instead create your dataset with `tf.keras.utils.experimental.DatasetCreator`, the code in `dataset_fn` will be invoked on the input device, which is usually the CPU, on each of the worker machines.\n" ] }, { @@ -431,7 +423,7 @@ "source": [ "### Model construction and compiling\n", "\n", - "Now, you will create a `tf.keras.Model`—a trivial `tf.keras.models.Sequential` model for demonstration purposes—followed by a `Model.compile` call to incorporate components, such as an optimizer, metrics, or parameters such as `steps_per_execution`:" + "Now, you will create a `tf.keras.Model`—a trivial `tf.keras.models.Sequential` model for demonstration purposes—followed by a `Model.compile` call to incorporate components, such as an optimizer, metrics, and other parameters such as `steps_per_execution`:" ] }, { @@ -445,7 +437,7 @@ "with strategy.scope():\n", " model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)])\n", "\n", - " model.compile(tf.keras.optimizers.SGD(), loss='mse', steps_per_execution=10)" + " model.compile(tf.keras.optimizers.legacy.SGD(), loss=\"mse\", steps_per_execution=10)" ] }, { @@ -458,13 +450,13 @@ "\n", " \n", "\n", - "Before you call `model.fit` for the actual training, let's prepare the needed callbacks for common tasks, such as:\n", + "Before you call Keras `Model.fit` for the actual training, prepare any needed [callbacks](https://www.tensorflow.org/guide/keras/train_and_evaluate) for common tasks, such as:\n", "\n", - "- `ModelCheckpoint`: to save the model weights.\n", - "- `BackupAndRestore`: to make sure the training progress is automatically backed up, and recovered if the cluster experiences unavailability (such as abort or preemption); or\n", - "- `TensorBoard`: to save the progress reports into summary files, which can be visualized in the TensorBoard tool.\n", + "- `tf.keras.callbacks.ModelCheckpoint`: saves the model at a certain frequency, such as after every epoch.\n", + "- `tf.keras.callbacks.BackupAndRestore`: provides fault tolerance by backing up the model and current epoch number, if the cluster experiences unavailability (such as abort or preemption). You can then restore the training state upon a restart from a job failure, and continue training from the beginning of the interrupted epoch.\n", + "- `tf.keras.callbacks.TensorBoard`: periodically writes model logs in summary files that can be visualized in the TensorBoard tool.\n", "\n", - "Note: Due to performance consideration, custom callbacks cannot have batch level callbacks overridden when used with `ParameterServerStrategy`. Please modify your custom callbacks to make them epoch level calls, and adjust `steps_per_epoch` to a suitable value. In addition, `steps_per_epoch` is a required argument for `Model.fit` when used with `ParameterServerStrategy`." + "Note: Due to performance considerations, custom callbacks cannot have batch level callbacks overridden when used with `ParameterServerStrategy`. Please modify your custom callbacks to make them epoch level calls, and adjust `steps_per_epoch` to a suitable value. In addition, `steps_per_epoch` is a required argument for `Model.fit` when used with `ParameterServerStrategy`." ] }, { @@ -475,10 +467,10 @@ }, "outputs": [], "source": [ - "working_dir = '/tmp/my_working_dir'\n", - "log_dir = os.path.join(working_dir, 'log')\n", - "ckpt_filepath = os.path.join(working_dir, 'ckpt')\n", - "backup_dir = os.path.join(working_dir, 'backup')\n", + "working_dir = \"/tmp/my_working_dir\"\n", + "log_dir = os.path.join(working_dir, \"log\")\n", + "ckpt_filepath = os.path.join(working_dir, \"ckpt\")\n", + "backup_dir = os.path.join(working_dir, \"backup\")\n", "\n", "callbacks = [\n", " tf.keras.callbacks.TensorBoard(log_dir=log_dir),\n", @@ -486,7 +478,7 @@ " tf.keras.callbacks.BackupAndRestore(backup_dir=backup_dir),\n", "]\n", "\n", - "model.fit(dc, epochs=5, steps_per_epoch=20, callbacks=callbacks)" + "model.fit(dataset, epochs=5, steps_per_epoch=20, callbacks=callbacks)" ] }, { @@ -497,7 +489,7 @@ "source": [ "### Direct usage with `ClusterCoordinator` (optional)\n", "\n", - "Even if you choose the `Model.fit` training path, you can optionally instantiate a `tf.distribute.experimental.coordinator.ClusterCoordinator` object to schedule other functions you would like to be executed on the workers. See the [Training with a custom training loop](#training_with_custom_training_loop) section for more details and examples." + "Even if you choose the `Model.fit` training path, you can optionally instantiate a `tf.distribute.coordinator.ClusterCoordinator` object to schedule other functions you would like to be executed on the workers. Refer to the [Training with a custom training loop](#training_with_custom_training_loop) section for more details and examples." ] }, { @@ -510,11 +502,11 @@ "\n", " \n", "\n", - "Using custom training loops with `tf.distribute.Strategy` provides great flexibility to define training loops. With the `ParameterServerStrategy` defined above (as `strategy`), you will use a `tf.distribute.experimental.coordinator.ClusterCoordinator` to dispatch the execution of training steps to remote workers.\n", + "Using custom training loops with `tf.distribute.Strategy` provides great flexibility to define training loops. With the `ParameterServerStrategy` defined above (as `strategy`), you will use a `tf.distribute.coordinator.ClusterCoordinator` to dispatch the execution of training steps to remote workers.\n", "\n", - "Then, you will create a model, define a dataset and a step function, as you have done in the training loop with other `tf.distribute.Strategy`s. You can find more details in the [Custom training with tf.distribute.Strategy](https://www.tensorflow.org/tutorials/distribute/custom_training) tutorial.\n", + "Then, you will create a model, define a dataset, and define a step function, as you have done in the training loop with other `tf.distribute.Strategy`s. You can find more details in the [Custom training with tf.distribute.Strategy](custom_training.ipynb) tutorial.\n", "\n", - "To ensure efficient dataset prefetching, use the recommended distributed dataset creation APIs mentioned in the [Dispatch training steps to remote workers](https://www.tensorflow.org/tutorials/distribute/parameter_server_training#dispatch_training_steps_to_remote_workers) section below. Also, make sure to call `Strategy.run` inside `worker_fn` to take full advantage of GPUs allocated to workers. The rest of the steps are the same for training with or without GPUs.\n", + "To ensure efficient dataset prefetching, use the recommended distributed dataset creation APIs mentioned in the [Dispatch training steps to remote workers](#dispatch_training_steps_to_remote_workers) section below. Also, make sure to call `Strategy.run` inside `worker_fn` to take full advantage of GPUs allocated to workers. The rest of the steps are the same for training with or without GPUs.\n", "\n", "Let’s create these components in the following steps:\n" ] @@ -529,11 +521,11 @@ "\n", "First, write a function that creates a dataset.\n", "\n", - "If you would like to preprocess the data with [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) or [Tensorflow Transform layers](https://www.tensorflow.org/tfx/tutorials/transform/simple), create these layers **outside the `dataset_fn`** and **under `Strategy.scope`** like you would do for any other Keras layers. This is because the `dataset_fn` will be wrapped into a `tf.function` and then executed on each worker to generate the data pipeline.\n", + "If you would like to preprocess the data with [Keras preprocessing layers](https://www.tensorflow.org/guide/keras/preprocessing_layers) or [Tensorflow Transform layers](https://www.tensorflow.org/tfx/tutorials/transform/simple), create these layers **outside the `dataset_fn`** and **under `Strategy.scope`**, like you would do for any other Keras layers. This is because the `dataset_fn` will be wrapped into a `tf.function` and then executed on each worker to generate the data pipeline.\n", "\n", "If you don't follow the above procedure, creating the layers might create Tensorflow states which will be lifted out of the `tf.function` to the coordinator. Thus, accessing them on workers would incur repetitive RPC calls between coordinator and workers, and cause significant slowdown.\n", "\n", - "Placing the layers under `Strategy.scope` will instead create them on all workers. Then, you will apply the transformation inside the `dataset_fn` via `tf.data.Dataset.map`. Refer to _Data preprocessing_ in the [Distributed input](https://www.tensorflow.org/tutorials/distribute/input) tutorial for more information on data preprocessing with distributed input." + "Placing the layers under `Strategy.scope` will instead create them on all workers. Then, you will apply the transformation inside the `dataset_fn` via `tf.data.Dataset.map`. Refer to _Data preprocessing_ in the [Distributed input](input.ipynb) tutorial for more information on data preprocessing with distributed input." ] }, { @@ -664,10 +656,13 @@ " emb_layer = tf.keras.layers.Embedding(\n", " input_dim=len(feature_lookup_layer.get_vocabulary()), output_dim=16384)\n", " emb_output = tf.reduce_mean(emb_layer(model_input), axis=1)\n", - " dense_output = tf.keras.layers.Dense(units=1, activation=\"sigmoid\")(emb_output)\n", + " dense_output = tf.keras.layers.Dense(\n", + " units=1, activation=\"sigmoid\",\n", + " kernel_regularizer=tf.keras.regularizers.L2(1e-4),\n", + " )(emb_output)\n", " model = tf.keras.Model({\"features\": model_input}, dense_output)\n", "\n", - " optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.1)\n", + " optimizer = tf.keras.optimizers.legacy.RMSprop(learning_rate=0.1)\n", " accuracy = tf.keras.metrics.Accuracy()" ] }, @@ -677,7 +672,7 @@ "id": "iyuxiqCQU50m" }, "source": [ - "Let's confirm that the use of `FixedShardsPartitioner` split all variables into two shards and each shard was assigned to different parameter servers:" + "Let's confirm that the use of `FixedShardsPartitioner` split all variables into two shards and that each shard was assigned to a different parameter server:" ] }, { @@ -691,8 +686,9 @@ "assert len(emb_layer.weights) == 2\n", "assert emb_layer.weights[0].shape == (4, 16384)\n", "assert emb_layer.weights[1].shape == (4, 16384)\n", - "assert emb_layer.weights[0].device == \"/job:ps/replica:0/task:0/device:CPU:0\"\n", - "assert emb_layer.weights[1].device == \"/job:ps/replica:0/task:1/device:CPU:0\"" + "\n", + "print(emb_layer.weights[0].device)\n", + "print(emb_layer.weights[1].device)\n" ] }, { @@ -720,9 +716,12 @@ " with tf.GradientTape() as tape:\n", " pred = model(batch_data, training=True)\n", " per_example_loss = tf.keras.losses.BinaryCrossentropy(\n", - " reduction=tf.keras.losses.Reduction.NONE)(labels, pred)\n", + " reduction=tf.keras.losses.Reduction.NONE)(labels, pred)\n", " loss = tf.nn.compute_average_loss(per_example_loss)\n", - " gradients = tape.gradient(loss, model.trainable_variables)\n", + " model_losses = model.losses\n", + " if model_losses:\n", + " loss += tf.nn.scale_regularization_loss(tf.add_n(model_losses))\n", + " gradients = tape.gradient(loss, model.trainable_variables)\n", "\n", " optimizer.apply_gradients(zip(gradients, model.trainable_variables))\n", "\n", @@ -741,7 +740,7 @@ "id": "rvrYQUeYiLNy" }, "source": [ - "In the above training step function, calling `Strategy.run` and `Strategy.reduce` in the `step_fn` can support multiple GPUs per worker. If the workers have GPUs allocated, `Strategy.run` will distribute the datasets on multiple replicas.\n" + "In the above training step function, calling `Strategy.run` and `Strategy.reduce` in the `step_fn` can support multiple GPUs per worker. If the workers have GPUs allocated, `Strategy.run` will distribute the datasets on multiple replicas (GPUs). Their parallel calls to `tf.nn.compute_average_loss()` compute the average of the loss across the replicas (GPUs) of one worker, independent of the total number of workers." ] }, { @@ -753,7 +752,7 @@ "### Dispatch training steps to remote workers\n", " \n", "\n", - "After all the computations are defined by `ParameterServerStrategy`, you will use the `tf.distribute.experimental.coordinator.ClusterCoordinator` class to create resources and distribute the training steps to remote workers.\n", + "After all the computations are defined by `ParameterServerStrategy`, you will use the `tf.distribute.coordinator.ClusterCoordinator` class to create resources and distribute the training steps to remote workers.\n", "\n", "Let’s first create a `ClusterCoordinator` object and pass in the strategy object:" ] @@ -766,7 +765,7 @@ }, "outputs": [], "source": [ - "coordinator = tf.distribute.experimental.coordinator.ClusterCoordinator(strategy)" + "coordinator = tf.distribute.coordinator.ClusterCoordinator(strategy)" ] }, { @@ -775,7 +774,7 @@ "id": "-xRIgKxciOSe" }, "source": [ - "Then, create a per-worker dataset and an iterator. In the `per_worker_dataset_fn` below, wrapping the `dataset_fn` into `strategy.distribute_datasets_from_function` is recommended to allow efficient prefetching to GPUs seamlessly." + "Then, create a per-worker dataset and an iterator using the `ClusterCoordinator.create_per_worker_dataset` API, which replicates the dataset to all workers. In the `per_worker_dataset_fn` below, wrapping the `dataset_fn` into `strategy.distribute_datasets_from_function` is recommended to allow efficient prefetching to GPUs seamlessly." ] }, { @@ -814,15 +813,15 @@ }, "outputs": [], "source": [ - "num_epoches = 4\n", + "num_epochs = 4\n", "steps_per_epoch = 5\n", - "for i in range(num_epoches):\n", + "for i in range(num_epochs):\n", " accuracy.reset_states()\n", " for _ in range(steps_per_epoch):\n", " coordinator.schedule(step_fn, args=(per_worker_iterator,))\n", " # Wait at epoch boundaries.\n", " coordinator.join()\n", - " print (\"Finished epoch %d, accuracy is %f.\" % (i, accuracy.result().numpy()))" + " print(\"Finished epoch %d, accuracy is %f.\" % (i, accuracy.result().numpy()))" ] }, { @@ -843,7 +842,7 @@ "outputs": [], "source": [ "loss = coordinator.schedule(step_fn, args=(per_worker_iterator,))\n", - "print (\"Final loss is %f\" % loss.fetch())" + "print(\"Final loss is %f\" % loss.fetch())" ] }, { @@ -863,7 +862,7 @@ " # Do something like logging metrics or writing checkpoints.\n", "```\n", "\n", - "For the complete training and serving workflow for this particular example, please check out this [test](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/keras/distribute/parameter_server_training_test.py).\n" + "For the complete training and serving workflow for this particular example, please check out this [test](https://github.com/keras-team/keras/blob/master/keras/integration_test/parameter_server_keras_preprocessing_test.py).\n" ] }, { @@ -876,9 +875,9 @@ "\n", "The dataset in the above code is created using the `ClusterCoordinator.create_per_worker_dataset` API. It creates one dataset per worker and returns a container object. You can call the `iter` method on it to create a per-worker iterator. The per-worker iterator contains one iterator per worker and the corresponding slice of a worker will be substituted in the input argument of the function passed to the `ClusterCoordinator.schedule` method before the function is executed on a particular worker.\n", "\n", - "The `ClusterCoordinator.schedule` method assumes workers are equivalent and thus assumes the datasets on different workers are the same (except that they may be shuffled differently). Because of this, it is also recommended to repeat datasets, and schedule a finite number of steps instead of relying on the `OutOfRangeError` from a dataset.\n", + "The `ClusterCoordinator.schedule` method assumes workers are equivalent and thus assumes the datasets on different workers are the same (except that they may be shuffled differently). Because of this, it is also recommended to repeat datasets, and schedule a finite number of steps instead of relying on receiving an `OutOfRangeError` from a dataset.\n", "\n", - "Another important note is that `tf.data` datasets don’t support implicit serialization and deserialization across task boundaries. So it is important to create the whole dataset inside the function passed to `ClusterCoordinator.create_per_worker_dataset`." + "Another important note is that `tf.data` datasets don’t support implicit serialization and deserialization across task boundaries. So it is important to create the whole dataset inside the function passed to `ClusterCoordinator.create_per_worker_dataset`. The `create_per_worker_dataset` API can also directly take a `tf.data.Dataset` or `tf.distribute.DistributedDataset` as input." ] }, { @@ -889,7 +888,7 @@ "source": [ "## Evaluation\n", "\n", - "There is more than one way to define and run an evaluation loop in distributed training. Each has its own pros and cons as described below. The inline evaluation method is recommended if you don't have a preference." + "The two main approaches to performing evaluation with `tf.distribute.ParameterServerStrategy` training are inline evaluation and sidecar evaluation. Each has its own pros and cons as described below. The inline evaluation method is recommended if you don't have a preference. For users using `Model.fit`, `Model.evaluate` uses inline (distributed) evaluation under the hood." ] }, { @@ -905,7 +904,7 @@ "There are several benefits of inline evaluation. For example:\n", "\n", "- It can support large evaluation models and evaluation datasets that a single task cannot hold.\n", - "- The evaluation results can be used to make decisions for training the next epoch.\n", + "- The evaluation results can be used to make decisions for training the next epoch, for example, whether to stop training early.\n", "\n", "There are two ways to implement inline evaluation: direct evaluation and distributed evaluation.\n", "\n", @@ -921,7 +920,7 @@ "outputs": [], "source": [ "eval_dataset = tf.data.Dataset.from_tensor_slices(\n", - " feature_and_label_gen(num_examples=16)).map(\n", + " feature_and_label_gen(num_examples=16)).map(\n", " lambda x: (\n", " {\"features\": feature_preprocess_stage(x[\"features\"])},\n", " label_preprocess_stage(x[\"label\"])\n", @@ -934,7 +933,7 @@ " actual_pred = tf.cast(tf.greater(pred, 0.5), tf.int64)\n", " eval_accuracy.update_state(labels, actual_pred)\n", "\n", - "print (\"Evaluation accuracy: %f\" % eval_accuracy.result())" + "print(\"Evaluation accuracy: %f\" % eval_accuracy.result())" ] }, { @@ -982,7 +981,7 @@ "for _ in range(eval_steps_per_epoch):\n", " coordinator.schedule(eval_step, args=(per_worker_eval_iterator,))\n", "coordinator.join()\n", - "print (\"Evaluation accuracy: %f\" % eval_accuracy.result())" + "print(\"Evaluation accuracy: %f\" % eval_accuracy.result())" ] }, { @@ -991,7 +990,23 @@ "id": "cKrQktZX5z7a" }, "source": [ - "Note: Currently, the `schedule` and `join` methods of `tf.distribute.experimental.coordinator.ClusterCoordinator` don’t support visitation guarantee or exactly-once semantics. In other words, there is no guarantee that all evaluation examples in a dataset will be evaluated exactly once; some may not be visited and some may be evaluated multiple times. Visitation guarantee on evaluation dataset is being worked on." + "#### Enabling exactly-once evaluation\n", + "\n", + "\n", + "The `schedule` and `join` methods of `tf.distribute.coordinator.ClusterCoordinator` don’t support visitation guarantees or exactly-once semantics by default. In other words, in the above example there is no guarantee that all evaluation examples in a dataset will be evaluated exactly once; some may not be visited and some may be evaluated multiple times.\n", + "\n", + "Exactly-once evaluation may be preferred to reduce the variance of evaluation across epochs, and improve model selection done via early stopping, hyperparameter tuning, or other methods. There are different ways to enable exactly-once evaluation:\n", + "\n", + "- With a `Model.fit/.evaluate` workflow, it can be enabled by adding an argument to `Model.compile`. Refer to docs for the `pss_evaluation_shards` argument.\n", + "- The `tf.data` service API can be used to provide exactly-once visitation for evaluation when using `ParameterServerStrategy` (refer to the _Dynamic Sharding_ section of the `tf.data.experimental.service` API documentation).\n", + "- [Sidecar evaluation](#sidecar_evaluation) provides exactly-once evaluation by default, since the evaluation happens on a single machine. However this can be much slower than performing evaluation distributed across many workers.\n", + "\n", + "The first option, using `Model.compile`, is the suggested solution for most users.\n", + "\n", + "Exactly-once evaluation has some limitations:\n", + "\n", + "- It is not supported to write a custom distributed evaluation loop with an exactly-once visitation guarantee. File a GitHub issue if you need support for this.\n", + "- It cannot automatically handle computation of metrics that use the `Layer.add_metric` API. These should be excluded from evaluation, or reworked into `Metric` objects." ] }, { @@ -1000,9 +1015,69 @@ "id": "H40X-9Gs3i7_" }, "source": [ - "### Side-car evaluation\n", + "### Sidecar evaluation\n", + "\n", + "\n", + "Another method for defining and running an evaluation loop in `tf.distribute.ParameterServerStrategy` training is called _sidecar evaluation_, in which you create a dedicated evaluator task that repeatedly reads checkpoints and runs evaluation on the latest checkpoint (refer to [this guide](../../guide/checkpoint.ipynb) for more details on checkpointing). The coordinator and worker tasks do not spend any time on evaluation, so for a fixed number of iterations the overall training time should be shorter than using other evaluation methods. However, it requires an additional evaluator task and periodic checkpointing to trigger evaluation." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HonyjnXK9-ys" + }, + "source": [ + "To write an evaluation loop for sidecar evaluation, you have two\n", + "options:\n", + "\n", + "1. Use the `tf.keras.utils.SidecarEvaluator` API.\n", + "2. Create a custom evaluation loop.\n", + "\n", + "Refer to the `tf.keras.utils.SidecarEvaluator` API documentation for more details on option 1." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "U_c0EiwB88OG" + }, + "source": [ + "Sidecar evaluation is supported only with a single task. This means:\n", + "\n", + "* It is guaranteed that each example is evaluated once. In the event the\n", + " evaluator is preempted or restarted, it simply restarts the\n", + " evaluation loop from the latest checkpoint, and the partial evaluation\n", + " progress made before the restart is discarded.\n", "\n", - "Another method is called _side-car evaluation_ where you create a dedicated evaluator task that repeatedly reads checkpoints and runs evaluation on a latest checkpoint. It allows your training program to finish early if you don't need to change your training loop based on evaluation results. However, it requires an additional evaluator task and periodic checkpointing to trigger evaluation. Following is a possible side-car evaluation loop:\n", + "* However, running evaluation on a single task implies that a full evaluation\n", + " can possibly take a long time.\n", + "\n", + "* If the size of the model is too large to fit into an evaluator's memory,\n", + " single sidecar evaluation is not applicable." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "VNJoWVc797B1" + }, + "source": [ + "Another caveat is that the `tf.keras.utils.SidecarEvaluator` implementation, and the custom\n", + "evaluation loop below, may skip some checkpoints because it always picks up the\n", + "latest checkpoint available, and during an evaluation epoch, multiple\n", + "checkpoints can be produced from the training cluster. You can write a custom\n", + "evaluation loop that evaluates every checkpoint, but it is not covered in this\n", + "tutorial. On the other hand, it may sit idle if checkpoints are produced less\n", + "frequently than how long it takes to run evaluation." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G5jopxBd85Ji" + }, + "source": [ + "A custom evaluation loop provides more control over the details, such as choosing which checkpoint to evaluate, or providing any additional logic to run along with evaluation. The following is a possible custom sidecar evaluation loop:\n", "\n", "```python\n", "checkpoint_dir = ...\n", @@ -1022,7 +1097,7 @@ " eval_model.evaluate(eval_data)\n", "\n", " # Evaluation finishes when it has evaluated the last epoch.\n", - " if latest_checkpoint.endswith('-{}'.format(train_epoches)):\n", + " if latest_checkpoint.endswith('-{}'.format(train_epochs)):\n", " break\n", "```" ] @@ -1040,7 +1115,7 @@ "\n", "In a real production environment, you will run all tasks in different processes on different machines. The simplest way to configure cluster information on each task is to set `\"TF_CONFIG\"` environment variables and use a `tf.distribute.cluster_resolver.TFConfigClusterResolver` to parse `\"TF_CONFIG\"`.\n", "\n", - "For a general description of `\"TF_CONFIG\"` environment variables, refer to the [Distributed training](https://www.tensorflow.org/guide/distributed_training#setting_up_tf_config_environment_variable) guide.\n", + "For a general description of `\"TF_CONFIG\"` environment variables, refer to \"Setting up the `TF_CONFIG` environment variable\" in the [Distributed training](../../guide/distributed_training.ipynb) guide.\n", "\n", "If you start your training tasks using Kubernetes or other configuration templates, likely, these templates have already set `“TF_CONFIG\"` for you." ] @@ -1053,7 +1128,7 @@ "source": [ "### Set the `\"TF_CONFIG\"` environment variable\n", "\n", - "Suppose you have 3 workers and 2 parameter servers, the `\"TF_CONFIG\"` of worker 1 can be:\n", + "Suppose you have 3 workers and 2 parameter servers. Then the `\"TF_CONFIG\"` of worker 1 can be:\n", "\n", "```python\n", "os.environ[\"TF_CONFIG\"] = json.dumps({\n", @@ -1095,12 +1170,12 @@ "if cluster_resolver.task_type in (\"worker\", \"ps\"):\n", " # Start a TensorFlow server and wait.\n", "elif cluster_resolver.task_type == \"evaluator\":\n", - " # Run side-car evaluation\n", + " # Run sidecar evaluation\n", "else:\n", " # Run the coordinator.\n", "```\n", "\n", - "The following code starts a TensorFlow server and waits:\n", + "The following code starts a TensorFlow server and waits, useful for the `\"worker\"` and `\"ps\"` roles:\n", "\n", "```python\n", "# Set the environment variable to allow reporting worker and ps failure to the\n", @@ -1134,7 +1209,7 @@ "source": [ "### Worker failure\n", "\n", - "`tf.distribute.experimental.coordinator.ClusterCoordinator` or `Model.fit` provide built-in fault tolerance for worker failure. Upon worker recovery, the previously provided dataset function (either to `ClusterCoordinator.create_per_worker_dataset` for a custom training loop, or `tf.keras.utils.experimental.DatasetCreator` for `Model.fit`) will be invoked on the workers to re-create the datasets." + "Both the `tf.distribute.coordinator.ClusterCoordinator` custom training loop and `Model.fit` approaches provide built-in fault tolerance for worker failure. Upon worker recovery, the `ClusterCoordinator` invokes dataset re-creation on the workers." ] }, { @@ -1178,7 +1253,7 @@ "global_steps = int(optimizer.iterations.numpy())\n", "starting_epoch = global_steps // steps_per_epoch\n", "\n", - "for _ in range(starting_epoch, num_epoches):\n", + "for _ in range(starting_epoch, num_epochs):\n", " for _ in range(steps_per_epoch):\n", " coordinator.schedule(step_fn, args=(per_worker_iterator,))\n", " coordinator.join()\n", @@ -1218,12 +1293,16 @@ "source": [ "## Performance improvement\n", "\n", - "There are several possible reasons if you see performance issues when you train with `ParameterServerStrategy` and `ClusterResolver`.\n", + "There are several possible reasons you may experience performance issues when you train with `tf.distribute.ParameterServerStrategy` and `tf.distribute.coordinator.ClusterCoordinator`.\n", "\n", - "One common reason is parameter servers have unbalanced load and some heavily-loaded parameter servers have reached capacity. There can also be multiple root causes. Some simple methods to mitigate this issue are to:\n", + "One common reason is that the parameter servers have unbalanced load and some heavily-loaded parameter servers have reached capacity. There can also be multiple root causes. Some simple methods to mitigate this issue are to:\n", "\n", "1. Shard your large model variables via specifying a `variable_partitioner` when constructing a `ParameterServerStrategy`.\n", - "2. Avoid creating a hotspot variable that is required by all parameter servers in a single step if possible. For example, use a constant learning rate or subclass `tf.keras.optimizers.schedules.LearningRateSchedule` in optimizers since the default behavior is that the learning rate will become a variable placed on a particular parameter server and requested by all other parameter servers in each step.\n", + "2. Avoid creating a hotspot variable that is required by all parameter servers in a single step, by both:\n", + "\n", + " 1) Using a constant learning rate or subclass `tf.keras.optimizers.schedules.LearningRateSchedule` in optimizers. This is because the default behavior is that the learning rate will become a variable placed on a particular parameter server, and requested by all other parameter servers in each step); and\n", + "\n", + " 2) Using a `tf.keras.optimizers.legacy.Optimizer` (the standard `tf.keras.optimizers.Optimizer`s could still lead to hotspot variables).\n", "3. Shuffle your large vocabularies before passing them to Keras preprocessing layers.\n", "\n", "Another possible reason for performance issues is the coordinator. The implementation of `schedule`/`join` is Python-based and thus may have threading overhead. Also, the latency between the coordinator and the workers can be large. If this is the case:\n", @@ -1247,7 +1326,7 @@ "\n", "As the library is optimized further, hopefully most users won't have to manually pack steps in the future.\n", "\n", - "In addition, a small trick for performance improvement is to schedule functions without a return value as explained in the handling task failure section above." + "In addition, a small trick for performance improvement is to schedule functions without a return value as explained in the [handling task failure section](#handling_task_failure) above." ] }, { @@ -1267,8 +1346,7 @@ "- `os.environment[\"grpc_fail_fast\"]=\"use_caller\"` is needed on every task including the coordinator, to make fault tolerance work properly.\n", "- Synchronous parameter server training is not supported.\n", "- It is usually necessary to pack multiple steps into a single function to achieve optimal performance.\n", - "- It is not supported to load a saved_model via `tf.saved_model.load` containing sharded variables. Note loading such a saved_model using TensorFlow Serving is expected to work.\n", - "- It is not supported to load a checkpoint containing sharded optimizer slot variables into a different number of shards.\n", + "- It is not supported to load a saved_model via `tf.saved_model.load` containing sharded variables. Note loading such a saved_model using TensorFlow Serving is expected to work (refer to the [serving tutorial](https://www.tensorflow.org/tfx/tutorials/serving/rest_simple) for details).\n", "- It is not supported to recover from parameter server failure without restarting the coordinator task.\n", "- Creation of `tf.lookup.StaticHashTable`, commonly employed by some Keras preprocessing layers, such as `tf.keras.layers.IntegerLookup`, `tf.keras.layers.StringLookup`, and `tf.keras.layers.TextVectorization`, should be placed under `Strategy.scope`. Otherwise, resources will be placed on the coordinator, and lookup RPCs from workers to the coordinator incur performance implications.\n" ] @@ -1295,8 +1373,8 @@ "source": [ "### Custom training loop specifics\n", "\n", - "- `ClusterCoordinator.schedule` doesn't support visitation guarantees for a dataset.\n", - "- When `ClusterCoordinator.create_per_worker_dataset` is used, the whole dataset must be created inside the function passed to it.\n", + "- `ClusterCoordinator.schedule` doesn't support visitation guarantees for a dataset in general, although a visitation guarantee for evaluation is possible through `Model.fit/.evaluate`. See [Enabling exactly-once evaluation](#exactly_once_evaluation).\n", + "- When `ClusterCoordinator.create_per_worker_dataset` is used with a callable as input, the whole dataset must be created inside the function passed to it.\n", "- `tf.data.Options` is ignored in a dataset created by `ClusterCoordinator.create_per_worker_dataset`." ] } @@ -1304,9 +1382,8 @@ "metadata": { "accelerator": "GPU", "colab": { - "collapsed_sections": [], "name": "parameter_server_training.ipynb", - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/distribute/save_and_load.ipynb b/site/en/tutorials/distribute/save_and_load.ipynb index 79e09b961b6..c53a9b8bf0b 100644 --- a/site/en/tutorials/distribute/save_and_load.ipynb +++ b/site/en/tutorials/distribute/save_and_load.ipynb @@ -73,7 +73,10 @@ "\n", "This tutorial demonstrates how you can save and load models in a SavedModel format with `tf.distribute.Strategy` during or after training. There are two kinds of APIs for saving and loading a Keras model: high-level (`tf.keras.Model.save` and `tf.keras.models.load_model`) and low-level (`tf.saved_model.save` and `tf.saved_model.load`).\n", "\n", - "To learn about SavedModel and serialization in general, please read the [saved model guide](../../guide/saved_model.ipynb), and the [Keras model serialization guide](https://www.tensorflow.org/guide/keras/save_and_serialize). Let's start with a simple example: " + "To learn about SavedModel and serialization in general, please read the [saved model guide](../../guide/saved_model.ipynb), and the [Keras model serialization guide](https://www.tensorflow.org/guide/keras/save_and_serialize). Let's start with a simple example.\n", + "\n", + "Caution: TensorFlow models are code and it is important to be careful with untrusted code. Learn more in [Using TensorFlow securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md).\n", + "\n" ] }, { @@ -186,8 +189,8 @@ "Now that you have a simple model to work with, let's explore the saving/loading APIs. \n", "There are two kinds of APIs available:\n", "\n", - "* High-level (Keras): `Model.save` and `tf.keras.models.load_model`\n", - "* Low-level: `tf.saved_model.save` and `tf.saved_model.load`\n" + "* High-level (Keras): `Model.save` and `tf.keras.models.load_model` (`.keras` zip archive format)\n", + "* Low-level: `tf.saved_model.save` and `tf.saved_model.load` (TF SavedModel format)\n" ] }, { @@ -216,7 +219,7 @@ }, "outputs": [], "source": [ - "keras_model_path = '/tmp/keras_save'\n", + "keras_model_path = '/tmp/keras_save.keras'\n", "model.save(keras_model_path)" ] }, @@ -247,7 +250,7 @@ "id": "gYAnskzorda-" }, "source": [ - "After restoring the model, you can continue training on it, even without needing to call `Model.compile` again, since it was already compiled before saving. The model is saved in TensorFlow's standard `SavedModel` proto format. For more information, please refer to [the guide to `SavedModel` format](../../guide/saved_model.ipynb).\n", + "After restoring the model, you can continue training on it, even without needing to call `Model.compile` again, since it was already compiled before saving. The model is saved a Keras zip archive format, marked by the `.keras` extension. For more information, please refer to [the guide on Keras saving](https://www.tensorflow.org/guide/keras/save_and_serialize).\n", "\n", "Now, restore the model and train it using a `tf.distribute.Strategy`:" ] @@ -461,12 +464,12 @@ "model = get_model()\n", "\n", "# Saving the model using Keras `Model.save`\n", - "model.save(keras_model_path)\n", + "model.save(saved_model_path)\n", "\n", "another_strategy = tf.distribute.MirroredStrategy()\n", "# Loading the model using the lower-level API\n", "with another_strategy.scope():\n", - " loaded = tf.saved_model.load(keras_model_path)" + " loaded = tf.saved_model.load(saved_model_path)" ] }, { @@ -550,7 +553,7 @@ "\n", "my_model = SubclassedModel()\n", "try:\n", - " my_model.save(keras_model_path)\n", + " my_model.save(saved_model_path)\n", "except ValueError as e:\n", " print(f'{type(e).__name__}: ', *e.args)" ] @@ -631,7 +634,7 @@ "my_model.fit(dataset, epochs=2)\n", "\n", "print(my_model.save_spec() is None)\n", - "my_model.save(keras_model_path)" + "my_model.save(saved_model_path)" ] } ], diff --git a/site/en/tutorials/estimator/keras_model_to_estimator.ipynb b/site/en/tutorials/estimator/keras_model_to_estimator.ipynb index e41380908f5..be97a38b6eb 100644 --- a/site/en/tutorials/estimator/keras_model_to_estimator.ipynb +++ b/site/en/tutorials/estimator/keras_model_to_estimator.ipynb @@ -68,7 +68,7 @@ "id": "Dhcq8Ds4mCtm" }, "source": [ - "> Warning: Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details." + "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators." ] }, { @@ -272,8 +272,7 @@ "colab": { "collapsed_sections": [], "name": "keras_model_to_estimator.ipynb", - "provenance": [], - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/estimator/linear.ipynb b/site/en/tutorials/estimator/linear.ipynb index ea46d41ede1..a26ffe2df4f 100644 --- a/site/en/tutorials/estimator/linear.ipynb +++ b/site/en/tutorials/estimator/linear.ipynb @@ -61,7 +61,7 @@ "id": "JOccPOFMm5Tc" }, "source": [ - "> Warning: Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details." + "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators." ] }, { @@ -293,14 +293,31 @@ "pd.concat([dftrain, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive')" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "qCHvgeorEsHa" + }, + "source": [ + "## Feature Engineering for the Model" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dhcq8Ds4mCtm" + }, + "source": [ + "> Warning: The tf.feature_columns module described in this tutorial is not recommended for new code. Keras preprocessing layers cover this functionality, for migration instructions see the [Migrating feature columns guide](https://www.tensorflow.org/guide/migrate/migrating_feature_columns). The tf.feature_columns module was designed for use with TF1 Estimators. It does fall under our [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities." + ] + }, { "cell_type": "markdown", "metadata": { "id": "VqDKQLZn8L-B" }, "source": [ - "## Feature Engineering for the Model\n", - "Estimators use a system called [feature columns](https://www.tensorflow.org/guide/feature_columns) to describe how the model should interpret each of the raw input features. An Estimator expects a vector of numeric inputs, and *feature columns* describe how the model should convert each feature.\n", + "Estimators use a system called [feature columns](https://www.tensorflow.org/tutorials/structured_data/feature_columns) to describe how the model should interpret each of the raw input features. An Estimator expects a vector of numeric inputs, and *feature columns* describe how the model should convert each feature.\n", "\n", "Selecting and crafting the right set of feature columns is key to learning an effective model. A feature column can be either one of the raw inputs in the original features `dict` (a *base feature column*), or any new columns created using transformations defined over one or multiple base columns (a *derived feature columns*).\n", "\n", @@ -583,8 +600,7 @@ "colab": { "collapsed_sections": [], "name": "linear.ipynb", - "provenance": [], - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/estimator/premade.ipynb b/site/en/tutorials/estimator/premade.ipynb index a34096ea2b8..dc81847c7cd 100644 --- a/site/en/tutorials/estimator/premade.ipynb +++ b/site/en/tutorials/estimator/premade.ipynb @@ -68,7 +68,7 @@ "id": "stQiPWL6ni6_" }, "source": [ - "> Warning: Estimators are not recommended for new code. Estimators run `v1.Session`-style code which is more difficult to write correctly, and can behave unexpectedly, especially when combined with TF 2 code. Estimators do fall under [compatibility guarantees](https://tensorflow.org/guide/versions), but will receive no fixes other than security vulnerabilities. See the [migration guide](https://tensorflow.org/guide/migrate) for details." + "> Warning: TensorFlow 2.15 included the final release of the `tf-estimator` package. Estimators will not be available in TensorFlow 2.16 or after. See the [migration guide](https://tensorflow.org/guide/migrate/migrating_estimator) for more information about how to convert off of Estimators." ] }, { diff --git a/site/en/tutorials/generative/autoencoder.ipynb b/site/en/tutorials/generative/autoencoder.ipynb index b68b8e934a7..1b2a6fcd2a8 100644 --- a/site/en/tutorials/generative/autoencoder.ipynb +++ b/site/en/tutorials/generative/autoencoder.ipynb @@ -6,9 +6,16 @@ "id": "Ndo4ERqnwQOU" }, "source": [ - "##### Copyright 2020 The TensorFlow Authors." + "##### Copyright 2024 The TensorFlow Authors." ] }, + { + "metadata": { + "id": "13rwRG5Jec7n" + }, + "cell_type": "markdown", + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -76,7 +83,7 @@ "source": [ "This tutorial introduces autoencoders with three examples: the basics, image denoising, and anomaly detection.\n", "\n", - "An autoencoder is a special type of neural network that is trained to copy its input to its output. For example, given an image of a handwritten digit, an autoencoder first encodes the image into a lower dimensional latent representation, then decodes the latent representation back to an image. An autoencoder learns to compress the data while minimizing the reconstruction error. \n", + "An autoencoder is a special type of neural network that is trained to copy its input to its output. For example, given an image of a handwritten digit, an autoencoder first encodes the image into a lower dimensional latent representation, then decodes the latent representation back to an image. An autoencoder learns to compress the data while minimizing the reconstruction error.\n", "\n", "To learn more about autoencoders, please consider reading chapter 14 from [Deep Learning](https://www.deeplearningbook.org/) by Ian Goodfellow, Yoshua Bengio, and Aaron Courville." ] @@ -117,7 +124,7 @@ }, "source": [ "## Load the dataset\n", - "To start, you will train the basic autoencoder using the Fashion MNIST dataset. Each image in this dataset is 28x28 pixels. " + "To start, you will train the basic autoencoder using the Fashion MNIST dataset. Each image in this dataset is 28x28 pixels." ] }, { @@ -159,27 +166,29 @@ }, "outputs": [], "source": [ - "latent_dim = 64 \n", - "\n", "class Autoencoder(Model):\n", - " def __init__(self, latent_dim):\n", + " def __init__(self, latent_dim, shape):\n", " super(Autoencoder, self).__init__()\n", - " self.latent_dim = latent_dim \n", + " self.latent_dim = latent_dim\n", + " self.shape = shape\n", " self.encoder = tf.keras.Sequential([\n", " layers.Flatten(),\n", " layers.Dense(latent_dim, activation='relu'),\n", " ])\n", " self.decoder = tf.keras.Sequential([\n", - " layers.Dense(784, activation='sigmoid'),\n", - " layers.Reshape((28, 28))\n", + " layers.Dense(tf.math.reduce_prod(shape).numpy(), activation='sigmoid'),\n", + " layers.Reshape(shape)\n", " ])\n", "\n", " def call(self, x):\n", " encoded = self.encoder(x)\n", " decoded = self.decoder(encoded)\n", " return decoded\n", - " \n", - "autoencoder = Autoencoder(latent_dim) " + "\n", + "\n", + "shape = x_test.shape[1:]\n", + "latent_dim = 64\n", + "autoencoder = Autoencoder(latent_dim, shape)\n" ] }, { @@ -329,8 +338,8 @@ "outputs": [], "source": [ "noise_factor = 0.2\n", - "x_train_noisy = x_train + noise_factor * tf.random.normal(shape=x_train.shape) \n", - "x_test_noisy = x_test + noise_factor * tf.random.normal(shape=x_test.shape) \n", + "x_train_noisy = x_train + noise_factor * tf.random.normal(shape=x_train.shape)\n", + "x_test_noisy = x_test + noise_factor * tf.random.normal(shape=x_test.shape)\n", "\n", "x_train_noisy = tf.clip_by_value(x_train_noisy, clip_value_min=0., clip_value_max=1.)\n", "x_test_noisy = tf.clip_by_value(x_test_noisy, clip_value_min=0., clip_value_max=1.)" @@ -655,7 +664,7 @@ "id": "wVcTBDo-CqFS" }, "source": [ - "Plot a normal ECG. " + "Plot a normal ECG." ] }, { @@ -719,12 +728,12 @@ " layers.Dense(32, activation=\"relu\"),\n", " layers.Dense(16, activation=\"relu\"),\n", " layers.Dense(8, activation=\"relu\")])\n", - " \n", + "\n", " self.decoder = tf.keras.Sequential([\n", " layers.Dense(16, activation=\"relu\"),\n", " layers.Dense(32, activation=\"relu\"),\n", " layers.Dense(140, activation=\"sigmoid\")])\n", - " \n", + "\n", " def call(self, x):\n", " encoded = self.encoder(x)\n", " decoded = self.decoder(encoded)\n", @@ -761,8 +770,8 @@ }, "outputs": [], "source": [ - "history = autoencoder.fit(normal_train_data, normal_train_data, \n", - " epochs=20, \n", + "history = autoencoder.fit(normal_train_data, normal_train_data,\n", + " epochs=20,\n", " batch_size=512,\n", " validation_data=(test_data, test_data),\n", " shuffle=True)" @@ -906,7 +915,7 @@ "id": "uEGlA1Be50Nj" }, "source": [ - "Note: There are other strategies you could use to select a threshold value above which test examples should be classified as anomalous, the correct approach will depend on your dataset. You can learn more with the links at the end of this tutorial. " + "Note: There are other strategies you could use to select a threshold value above which test examples should be classified as anomalous, the correct approach will depend on your dataset. You can learn more with the links at the end of this tutorial." ] }, { @@ -915,7 +924,7 @@ "id": "zpLSDAeb51D_" }, "source": [ - "If you examine the reconstruction error for the anomalous examples in the test set, you'll notice most have greater reconstruction error than the threshold. By varing the threshold, you can adjust the [precision](https://developers.google.com/machine-learning/glossary#precision) and [recall](https://developers.google.com/machine-learning/glossary#recall) of your classifier. " + "If you examine the reconstruction error for the anomalous examples in the test set, you'll notice most have greater reconstruction error than the threshold. By varing the threshold, you can adjust the [precision](https://developers.google.com/machine-learning/glossary#precision) and [recall](https://developers.google.com/machine-learning/glossary#recall) of your classifier." ] }, { @@ -990,8 +999,18 @@ "metadata": { "accelerator": "GPU", "colab": { - "collapsed_sections": [], - "name": "autoencoder.ipynb", + "gpuType": "T4", + "private_outputs": true, + "provenance": [ + { + "file_id": "17gKB2bKebV2DzoYIMFzyEXA5uDnwWOvT", + "timestamp": 1712793165979 + }, + { + "file_id": "https://github.com/tensorflow/docs/blob/master/site/en/tutorials/generative/autoencoder.ipynb", + "timestamp": 1712792176273 + } + ], "toc_visible": true }, "kernelspec": { diff --git a/site/en/tutorials/generative/cyclegan.ipynb b/site/en/tutorials/generative/cyclegan.ipynb index 7136dd143ef..313be519591 100644 --- a/site/en/tutorials/generative/cyclegan.ipynb +++ b/site/en/tutorials/generative/cyclegan.ipynb @@ -154,7 +154,7 @@ "This is similar to what was done in [pix2pix](https://www.tensorflow.org/tutorials/generative/pix2pix#load_the_dataset)\n", "\n", "* In random jittering, the image is resized to `286 x 286` and then randomly cropped to `256 x 256`.\n", - "* In random mirroring, the image is randomly flipped horizontally i.e left to right." + "* In random mirroring, the image is randomly flipped horizontally i.e., left to right." ] }, { @@ -634,7 +634,7 @@ "source": [ "## Training\n", "\n", - "Note: This example model is trained for fewer epochs (40) than the paper (200) to keep training time reasonable for this tutorial. Predictions may be less accurate. " + "Note: This example model is trained for fewer epochs (10) than the paper (200) to keep training time reasonable for this tutorial. The generated images will have much lower quality." ] }, { @@ -645,7 +645,7 @@ }, "outputs": [], "source": [ - "EPOCHS = 40" + "EPOCHS = 10" ] }, { @@ -830,8 +830,7 @@ "colab": { "collapsed_sections": [], "name": "cyclegan.ipynb", - "provenance": [], - "toc_visible": true + "toc_visible": true }, "kernelspec": { "display_name": "Python 3", diff --git a/site/en/tutorials/generative/data_compression.ipynb b/site/en/tutorials/generative/data_compression.ipynb new file mode 100644 index 00000000000..f756f088acd --- /dev/null +++ b/site/en/tutorials/generative/data_compression.ipynb @@ -0,0 +1,901 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Tce3stUlHN0L" + }, + "source": [ + "##### Copyright 2022 The TensorFlow Compression Authors." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "tuOe1ymfHZPu" + }, + "outputs": [], + "source": [ + "#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", + "# you may not use this file except in compliance with the License.\n", + "# You may obtain a copy of the License at\n", + "#\n", + "# https://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing, software\n", + "# distributed under the License is distributed on an \"AS IS\" BASIS,\n", + "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", + "# See the License for the specific language governing permissions and\n", + "# limitations under the License." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qFdPvlXBOdUN" + }, + "source": [ + "# Learned data compression" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MfBg1C5NB3X0" + }, + "source": [ + "\n", + " \n", + " \n", + " \n", + " \n", + "
    \n", + " \n", + " \n", + " View on TensorFlow.org\n", + " \n", + " \n", + " \n", + " Run in Google Colab\n", + " \n", + " \n", + " \n", + " View source on GitHub\n", + " \n", + " Download notebook\n", + "
    " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xHxb-dlhMIzW" + }, + "source": [ + "## Overview\n", + "\n", + "This notebook shows how to do lossy data compression using neural networks and [TensorFlow Compression](https://github.com/tensorflow/compression).\n", + "\n", + "Lossy compression involves making a trade-off between **rate**, the expected number of bits needed to encode a sample, and **distortion**, the expected error in the reconstruction of the sample.\n", + "\n", + "The examples below use an autoencoder-like model to compress images from the MNIST dataset. The method is based on the paper [End-to-end Optimized Image Compression](https://arxiv.org/abs/1611.01704).\n", + "\n", + "More background on learned data compression can be found in [this paper](https://arxiv.org/abs/2007.03034) targeted at people familiar with classical data compression, or [this survey](https://arxiv.org/abs/2202.06533) targeted at a machine learning audience.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MUXex9ctTuDB" + }, + "source": [ + "## Setup\n", + "\n", + "Install Tensorflow Compression via `pip`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "K489KsEgxuLI" + }, + "outputs": [], + "source": [ + "%%bash\n", + "# Installs the latest version of TFC compatible with the installed TF version.\n", + "\n", + "read MAJOR MINOR <<< \"$(pip show tensorflow | perl -p -0777 -e 's/.*Version: (\\d+)\\.(\\d+).*/\\1 \\2/sg')\"\n", + "pip install \"tensorflow-compression<$MAJOR.$(($MINOR+1))\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "WfVAmHCVxpTS" + }, + "source": [ + "Import library dependencies." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "IqR2PQG4ZaZ0" + }, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "import tensorflow as tf\n", + "import tensorflow_compression as tfc\n", + "import tensorflow_datasets as tfds\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wsncKT2iymgQ" + }, + "source": [ + "## Define the trainer model.\n", + "\n", + "Because the model resembles an autoencoder, and we need to perform a different set of functions during training and inference, the setup is a little different from, say, a classifier.\n", + "\n", + "The training model consists of three parts:\n", + "- the **analysis** (or encoder) transform, converting from the image into a latent space,\n", + "- the **synthesis** (or decoder) transform, converting from the latent space back into image space, and\n", + "- a **prior** and entropy model, modeling the marginal probabilities of the latents.\n", + "\n", + "First, define the transforms:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8yZESLgW-vp1" + }, + "outputs": [], + "source": [ + "def make_analysis_transform(latent_dims):\n", + " \"\"\"Creates the analysis (encoder) transform.\"\"\"\n", + " return tf.keras.Sequential([\n", + " tf.keras.layers.Conv2D(\n", + " 20, 5, use_bias=True, strides=2, padding=\"same\",\n", + " activation=\"leaky_relu\", name=\"conv_1\"),\n", + " tf.keras.layers.Conv2D(\n", + " 50, 5, use_bias=True, strides=2, padding=\"same\",\n", + " activation=\"leaky_relu\", name=\"conv_2\"),\n", + " tf.keras.layers.Flatten(),\n", + " tf.keras.layers.Dense(\n", + " 500, use_bias=True, activation=\"leaky_relu\", name=\"fc_1\"),\n", + " tf.keras.layers.Dense(\n", + " latent_dims, use_bias=True, activation=None, name=\"fc_2\"),\n", + " ], name=\"analysis_transform\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2sHdYBzF2xcu" + }, + "outputs": [], + "source": [ + "def make_synthesis_transform():\n", + " \"\"\"Creates the synthesis (decoder) transform.\"\"\"\n", + " return tf.keras.Sequential([\n", + " tf.keras.layers.Dense(\n", + " 500, use_bias=True, activation=\"leaky_relu\", name=\"fc_1\"),\n", + " tf.keras.layers.Dense(\n", + " 2450, use_bias=True, activation=\"leaky_relu\", name=\"fc_2\"),\n", + " tf.keras.layers.Reshape((7, 7, 50)),\n", + " tf.keras.layers.Conv2DTranspose(\n", + " 20, 5, use_bias=True, strides=2, padding=\"same\",\n", + " activation=\"leaky_relu\", name=\"conv_1\"),\n", + " tf.keras.layers.Conv2DTranspose(\n", + " 1, 5, use_bias=True, strides=2, padding=\"same\",\n", + " activation=\"leaky_relu\", name=\"conv_2\"),\n", + " ], name=\"synthesis_transform\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lYC8tHhkxTlK" + }, + "source": [ + "The trainer holds an instance of both transforms, as well as the parameters of the prior.\n", + "\n", + "Its `call` method is set up to compute:\n", + "- **rate**, an estimate of the number of bits needed to represent the batch of digits, and\n", + "- **distortion**, the mean absolute difference between the pixels of the original digits and their reconstructions.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ROn2DbzsBirI" + }, + "outputs": [], + "source": [ + "class MNISTCompressionTrainer(tf.keras.Model):\n", + " \"\"\"Model that trains a compressor/decompressor for MNIST.\"\"\"\n", + "\n", + " def __init__(self, latent_dims):\n", + " super().__init__()\n", + " self.analysis_transform = make_analysis_transform(latent_dims)\n", + " self.synthesis_transform = make_synthesis_transform()\n", + " self.prior_log_scales = tf.Variable(tf.zeros((latent_dims,)))\n", + "\n", + " @property\n", + " def prior(self):\n", + " return tfc.NoisyLogistic(loc=0., scale=tf.exp(self.prior_log_scales))\n", + "\n", + " def call(self, x, training):\n", + " \"\"\"Computes rate and distortion losses.\"\"\"\n", + " # Ensure inputs are floats in the range (0, 1).\n", + " x = tf.cast(x, self.compute_dtype) / 255.\n", + " x = tf.reshape(x, (-1, 28, 28, 1))\n", + "\n", + " # Compute latent space representation y, perturb it and model its entropy,\n", + " # then compute the reconstructed pixel-level representation x_hat.\n", + " y = self.analysis_transform(x)\n", + " entropy_model = tfc.ContinuousBatchedEntropyModel(\n", + " self.prior, coding_rank=1, compression=False)\n", + " y_tilde, rate = entropy_model(y, training=training)\n", + " x_tilde = self.synthesis_transform(y_tilde)\n", + "\n", + " # Average number of bits per MNIST digit.\n", + " rate = tf.reduce_mean(rate)\n", + "\n", + " # Mean absolute difference across pixels.\n", + " distortion = tf.reduce_mean(abs(x - x_tilde))\n", + "\n", + " return dict(rate=rate, distortion=distortion)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vEXbp9RV3kRX" + }, + "source": [ + "### Compute rate and distortion.\n", + "\n", + "Let's walk through this step by step, using one image from the training set. Load the MNIST dataset for training and validation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "7FV99WTrIBen" + }, + "outputs": [], + "source": [ + "training_dataset, validation_dataset = tfds.load(\n", + " \"mnist\",\n", + " split=[\"train\", \"test\"],\n", + " shuffle_files=True,\n", + " as_supervised=True,\n", + " with_info=False,\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SwKgNTg_QfjH" + }, + "source": [ + "And extract one image $x$:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "O-BSdeHcPBBf" + }, + "outputs": [], + "source": [ + "(x, _), = validation_dataset.take(1)\n", + "\n", + "plt.imshow(tf.squeeze(x))\n", + "print(f\"Data type: {x.dtype}\")\n", + "print(f\"Shape: {x.shape}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "V8IvuFkrRJIa" + }, + "source": [ + "To get the latent representation $y$, we need to cast it to `float32`, add a batch dimension, and pass it through the analysis transform." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "jA0DOWq23lEq" + }, + "outputs": [], + "source": [ + "x = tf.cast(x, tf.float32) / 255.\n", + "x = tf.reshape(x, (-1, 28, 28, 1))\n", + "y = make_analysis_transform(10)(x)\n", + "\n", + "print(\"y:\", y)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rTojJQvZT8SX" + }, + "source": [ + "The latents will be quantized at test time. To model this in a differentiable way during training, we add uniform noise in the interval $(-.5, .5)$ and call the result $\\tilde y$. This is the same terminology as used in the paper [End-to-end Optimized Image Compression](https://arxiv.org/abs/1611.01704)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Spr3503OUOFQ" + }, + "outputs": [], + "source": [ + "y_tilde = y + tf.random.uniform(y.shape, -.5, .5)\n", + "\n", + "print(\"y_tilde:\", y_tilde)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "7hRN89R7SA3U" + }, + "source": [ + "The \"prior\" is a probability density that we train to model the marginal distribution of the noisy latents. For example, it could be a set of independent [logistic distributions](https://en.wikipedia.org/wiki/Logistic_distribution) with different scales for each latent dimension. `tfc.NoisyLogistic` accounts for the fact that the latents have additive noise. As the scale approaches zero, a logistic distribution approaches a dirac delta (spike), but the added noise causes the \"noisy\" distribution to approach the uniform distribution instead." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "2tmA1Bw7ReMY" + }, + "outputs": [], + "source": [ + "prior = tfc.NoisyLogistic(loc=0., scale=tf.linspace(.01, 2., 10))\n", + "\n", + "_ = tf.linspace(-6., 6., 501)[:, None]\n", + "plt.plot(_, prior.prob(_));\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2NSWtBZmUvVY" + }, + "source": [ + "During training, `tfc.ContinuousBatchedEntropyModel` adds uniform noise, and uses the noise and the prior to compute a (differentiable) upper bound on the rate (the average number of bits necessary to encode the latent representation). That bound can be minimized as a loss." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "hFuGlyJuThBC" + }, + "outputs": [], + "source": [ + "entropy_model = tfc.ContinuousBatchedEntropyModel(\n", + " prior, coding_rank=1, compression=False)\n", + "y_tilde, rate = entropy_model(y, training=True)\n", + "\n", + "print(\"rate:\", rate)\n", + "print(\"y_tilde:\", y_tilde)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Cyr8DGgmWd32" + }, + "source": [ + "Lastly, the noisy latents are passed back through the synthesis transform to produce an image reconstruction $\\tilde x$. Distortion is the error between original image and reconstruction. Obviously, with the transforms untrained, the reconstruction is not very useful." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "gtmI0xGEVym0" + }, + "outputs": [], + "source": [ + "x_tilde = make_synthesis_transform()(y_tilde)\n", + "\n", + "# Mean absolute difference across pixels.\n", + "distortion = tf.reduce_mean(abs(x - x_tilde))\n", + "print(\"distortion:\", distortion)\n", + "\n", + "x_tilde = tf.saturate_cast(x_tilde[0] * 255, tf.uint8)\n", + "plt.imshow(tf.squeeze(x_tilde))\n", + "print(f\"Data type: {x_tilde.dtype}\")\n", + "print(f\"Shape: {x_tilde.shape}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UVz3I7E8ecij" + }, + "source": [ + "For every batch of digits, calling the `MNISTCompressionTrainer` produces the rate and distortion as an average over that batch:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ICJnjj1LeB8L" + }, + "outputs": [], + "source": [ + "(example_batch, _), = validation_dataset.batch(32).take(1)\n", + "trainer = MNISTCompressionTrainer(10)\n", + "example_output = trainer(example_batch)\n", + "\n", + "print(\"rate: \", example_output[\"rate\"])\n", + "print(\"distortion: \", example_output[\"distortion\"])\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lgdfRtmee5Mn" + }, + "source": [ + "In the next section, we set up the model to do gradient descent on these two losses." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fKGVwv5MAq6w" + }, + "source": [ + "## Train the model.\n", + "\n", + "We compile the trainer in a way that it optimizes the rate–distortion Lagrangian, that is, a sum of rate and distortion, where one of the terms is weighted by Lagrange parameter $\\lambda$.\n", + "\n", + "This loss function affects the different parts of the model differently:\n", + "- The analysis transform is trained to produce a latent representation that achieves the desired trade-off between rate and distortion.\n", + "- The synthesis transform is trained to minimize distortion, given the latent representation.\n", + "- The parameters of the prior are trained to minimize the rate given the latent representation. This is identical to fitting the prior to the marginal distribution of latents in a maximum likelihood sense." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "k5mm1aDkcgAf" + }, + "outputs": [], + "source": [ + "def pass_through_loss(_, x):\n", + " # Since rate and distortion are unsupervised, the loss doesn't need a target.\n", + " return x\n", + "\n", + "def make_mnist_compression_trainer(lmbda, latent_dims=50):\n", + " trainer = MNISTCompressionTrainer(latent_dims)\n", + " trainer.compile(\n", + " optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),\n", + " # Just pass through rate and distortion as losses/metrics.\n", + " loss=dict(rate=pass_through_loss, distortion=pass_through_loss),\n", + " metrics=dict(rate=pass_through_loss, distortion=pass_through_loss),\n", + " loss_weights=dict(rate=1., distortion=lmbda),\n", + " )\n", + " return trainer\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DPwd4DTs3Mfr" + }, + "source": [ + "Next, train the model. The human annotations are not necessary here, since we just want to compress the images, so we drop them using a `map` and instead add \"dummy\" targets for rate and distortion." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QNBpCTgzAV7M" + }, + "outputs": [], + "source": [ + "def add_rd_targets(image, label):\n", + " # Training is unsupervised, so labels aren't necessary here. However, we\n", + " # need to add \"dummy\" targets for rate and distortion.\n", + " return image, dict(rate=0., distortion=0.)\n", + "\n", + "def train_mnist_model(lmbda):\n", + " trainer = make_mnist_compression_trainer(lmbda)\n", + " trainer.fit(\n", + " training_dataset.map(add_rd_targets).batch(128).prefetch(8),\n", + " epochs=15,\n", + " validation_data=validation_dataset.map(add_rd_targets).batch(128).cache(),\n", + " validation_freq=1,\n", + " verbose=1,\n", + " )\n", + " return trainer\n", + "\n", + "trainer = train_mnist_model(lmbda=2000)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Td4xuttmCd7T" + }, + "source": [ + "## Compress some MNIST images.\n", + "\n", + "For compression and decompression at test time, we split the trained model in two parts:\n", + "\n", + "- The encoder side consists of the analysis transform and the entropy model.\n", + "- The decoder side consists of the synthesis transform and the same entropy model.\n", + "\n", + "At test time, the latents will not have additive noise, but they will be quantized and then losslessly compressed, so we give them new names. We call them and the image reconstruction $\\hat x$ and $\\hat y$, respectively (following [End-to-end Optimized Image Compression](https://arxiv.org/abs/1611.01704))." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sBRAPa5jksss" + }, + "outputs": [], + "source": [ + "class MNISTCompressor(tf.keras.Model):\n", + " \"\"\"Compresses MNIST images to strings.\"\"\"\n", + "\n", + " def __init__(self, analysis_transform, entropy_model):\n", + " super().__init__()\n", + " self.analysis_transform = analysis_transform\n", + " self.entropy_model = entropy_model\n", + "\n", + " def call(self, x):\n", + " # Ensure inputs are floats in the range (0, 1).\n", + " x = tf.cast(x, self.compute_dtype) / 255.\n", + " y = self.analysis_transform(x)\n", + " # Also return the exact information content of each digit.\n", + " _, bits = self.entropy_model(y, training=False)\n", + " return self.entropy_model.compress(y), bits\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "sSZ0X2xPnkN-" + }, + "outputs": [], + "source": [ + "class MNISTDecompressor(tf.keras.Model):\n", + " \"\"\"Decompresses MNIST images from strings.\"\"\"\n", + "\n", + " def __init__(self, entropy_model, synthesis_transform):\n", + " super().__init__()\n", + " self.entropy_model = entropy_model\n", + " self.synthesis_transform = synthesis_transform\n", + "\n", + " def call(self, string):\n", + " y_hat = self.entropy_model.decompress(string, ())\n", + " x_hat = self.synthesis_transform(y_hat)\n", + " # Scale and cast back to 8-bit integer.\n", + " return tf.saturate_cast(tf.round(x_hat * 255.), tf.uint8)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "GI7rxeOUDnaC" + }, + "source": [ + "When instantiated with `compression=True`, the entropy model converts the learned prior into tables for a range coding algorithm. When calling `compress()`, this algorithm is invoked to convert the latent space vector into bit sequences. The length of each binary string approximates the information content of the latent (the negative log likelihood of the latent under the prior).\n", + "\n", + "The entropy model for compression and decompression must be the same instance, because the range coding tables need to be exactly identical on both sides. Otherwise, decoding errors can occur." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Dnm_p7mbnigo" + }, + "outputs": [], + "source": [ + "def make_mnist_codec(trainer, **kwargs):\n", + " # The entropy model must be created with `compression=True` and the same\n", + " # instance must be shared between compressor and decompressor.\n", + " entropy_model = tfc.ContinuousBatchedEntropyModel(\n", + " trainer.prior, coding_rank=1, compression=True, **kwargs)\n", + " compressor = MNISTCompressor(trainer.analysis_transform, entropy_model)\n", + " decompressor = MNISTDecompressor(entropy_model, trainer.synthesis_transform)\n", + " return compressor, decompressor\n", + "\n", + "compressor, decompressor = make_mnist_codec(trainer)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SYu5sVVH3YMv" + }, + "source": [ + "Grab 16 images from the validation dataset. You can select a different subset by changing the argument to `skip`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qAxArlU728K5" + }, + "outputs": [], + "source": [ + "(originals, _), = validation_dataset.batch(16).skip(3).take(1)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CHeN_ny929YS" + }, + "source": [ + "Compress them to strings, and keep track of each of their information content in bits." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "smOk42gQ3IXv" + }, + "outputs": [], + "source": [ + "strings, entropies = compressor(originals)\n", + "\n", + "print(f\"String representation of first digit in hexadecimal: 0x{strings[0].numpy().hex()}\")\n", + "print(f\"Number of bits actually needed to represent it: {entropies[0]:0.2f}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "5j9R4bTT3Qhl" + }, + "source": [ + "Decompress the images back from the strings." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "yOP6pEqU3P0w" + }, + "outputs": [], + "source": [ + "reconstructions = decompressor(strings)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JWo0Q-vy23tt" + }, + "source": [ + "Display each of the 16 original digits together with its compressed binary representation, and the reconstructed digit." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "jU5IqzZzeEpf" + }, + "outputs": [], + "source": [ + "#@title\n", + "\n", + "def display_digits(originals, strings, entropies, reconstructions):\n", + " \"\"\"Visualizes 16 digits together with their reconstructions.\"\"\"\n", + " fig, axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(12.5, 5))\n", + " axes = axes.ravel()\n", + " for i in range(len(axes)):\n", + " image = tf.concat([\n", + " tf.squeeze(originals[i]),\n", + " tf.zeros((28, 14), tf.uint8),\n", + " tf.squeeze(reconstructions[i]),\n", + " ], 1)\n", + " axes[i].imshow(image)\n", + " axes[i].text(\n", + " .5, .5, f\"→ 0x{strings[i].numpy().hex()} →\\n{entropies[i]:0.2f} bits\",\n", + " ha=\"center\", va=\"top\", color=\"white\", fontsize=\"small\",\n", + " transform=axes[i].transAxes)\n", + " axes[i].axis(\"off\")\n", + " plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "km9PqVEtPJPc" + }, + "outputs": [], + "source": [ + "display_digits(originals, strings, entropies, reconstructions)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EzlrIOiYOzJc" + }, + "source": [ + "Note that the length of the encoded string differs from the information content of each digit.\n", + "\n", + "This is because the range coding process works with discrete probabilities, and has a small amount of overhead. So, especially for short strings, the correspondence is only approximate. However, range coding is **asymptotically optimal**: in the limit, the expected bit count will approach the cross entropy (the expected information content), for which the rate term in the training model is an upper bound." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "78qIG8t8FvJW" + }, + "source": [ + "## The rate–distortion trade-off\n", + "\n", + "Above, the model was trained for a specific trade-off (given by `lmbda=2000`) between the average number of bits used to represent each digit and the incurred error in the reconstruction.\n", + "\n", + "What happens when we repeat the experiment with different values?\n", + "\n", + "Let's start by reducing $\\lambda$ to 500." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "1iFcAD0WF78p" + }, + "outputs": [], + "source": [ + "def train_and_visualize_model(lmbda):\n", + " trainer = train_mnist_model(lmbda=lmbda)\n", + " compressor, decompressor = make_mnist_codec(trainer)\n", + " strings, entropies = compressor(originals)\n", + " reconstructions = decompressor(strings)\n", + " display_digits(originals, strings, entropies, reconstructions)\n", + "\n", + "train_and_visualize_model(lmbda=500)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Uy5OkgJMObMc" + }, + "source": [ + "The bit rate of our code goes down, as does the fidelity of the digits. However, most of the digits remain recognizable.\n", + "\n", + "Let's reduce $\\lambda$ further." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NQp9_9_5GcxH" + }, + "outputs": [], + "source": [ + "train_and_visualize_model(lmbda=300)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "3ELLMANN1OwMQ" + }, + "source": [ + "The strings begin to get much shorter now, on the order of one byte per digit. However, this comes at a cost. More digits are becoming unrecognizable.\n", + "\n", + "This demonstrates that this model is agnostic to human perceptions of error, it just measures the absolute deviation in terms of pixel values. To achieve a better perceived image quality, we would need to replace the pixel loss with a perceptual loss." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "v9cWHtH0LP_r" + }, + "source": [ + "## Use the decoder as a generative model.\n", + "\n", + "If we feed the decoder random bits, this will effectively sample from the distribution that the model learned to represent digits.\n", + "\n", + "First, re-instantiate the compressor/decompressor without a sanity check that would detect if the input string isn't completely decoded." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qnic8YsM0_ke" + }, + "outputs": [], + "source": [ + "compressor, decompressor = make_mnist_codec(trainer, decode_sanity_check=False)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "86uc9_Is1eeo" + }, + "source": [ + "Now, feed long enough random strings into the decompressor so that it can decode/sample digits from them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "o4fP7BkqKCHY" + }, + "outputs": [], + "source": [ + "import os\n", + "\n", + "strings = tf.constant([os.urandom(8) for _ in range(16)])\n", + "samples = decompressor(strings)\n", + "\n", + "fig, axes = plt.subplots(4, 4, sharex=True, sharey=True, figsize=(5, 5))\n", + "axes = axes.ravel()\n", + "for i in range(len(axes)):\n", + " axes[i].imshow(tf.squeeze(samples[i]))\n", + " axes[i].axis(\"off\")\n", + "plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)\n" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "data_compression.ipynb", + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/site/en/tutorials/generative/pix2pix.ipynb b/site/en/tutorials/generative/pix2pix.ipynb index 5c97053c50b..e380924d04d 100644 --- a/site/en/tutorials/generative/pix2pix.ipynb +++ b/site/en/tutorials/generative/pix2pix.ipynb @@ -72,13 +72,13 @@ "source": [ "This tutorial demonstrates how to build and train a conditional generative adversarial network (cGAN) called pix2pix that learns a mapping from input images to output images, as described in [Image-to-image translation with conditional adversarial networks](https://arxiv.org/abs/1611.07004) by Isola et al. (2017). pix2pix is not application specific—it can be applied to a wide range of tasks, including synthesizing photos from label maps, generating colorized photos from black and white images, turning Google Maps photos into aerial images, and even transforming sketches into photos.\n", "\n", - "In this example, your network will generate images of building facades using the [CMP Facade Database](http://cmp.felk.cvut.cz/~tylecr1/facade/) provided by the [Center for Machine Perception](http://cmp.felk.cvut.cz/) at the [Czech Technical University in Prague](https://www.cvut.cz/). To keep it short, you will use a [preprocessed copy]((https://people.eecs.berkeley.edu/~tinghuiz/projects/pix2pix/datasets/)) of this dataset created by the pix2pix authors.\n", + "In this example, your network will generate images of building facades using the [CMP Facade Database](http://cmp.felk.cvut.cz/~tylecr1/facade/) provided by the [Center for Machine Perception](http://cmp.felk.cvut.cz/) at the [Czech Technical University in Prague](https://www.cvut.cz/). To keep it short, you will use a [preprocessed copy](https://efrosgans.eecs.berkeley.edu/pix2pix/datasets/) of this dataset created by the pix2pix authors.\n", "\n", "In the pix2pix cGAN, you condition on input images and generate corresponding output images. cGANs were first proposed in [Conditional Generative Adversarial Nets](https://arxiv.org/abs/1411.1784) (Mirza and Osindero, 2014)\n", "\n", "The architecture of your network will contain:\n", "\n", - "- A generator with a [U-Net]([U-Net](https://arxiv.org/abs/1505.04597))-based architecture.\n", + "- A generator with a [U-Net](https://arxiv.org/abs/1505.04597)-based architecture.\n", "- A discriminator represented by a convolutional PatchGAN classifier (proposed in the [pix2pix paper](https://arxiv.org/abs/1611.07004)).\n", "\n", "Note that each epoch can take around 15 seconds on a single V100 GPU.\n", @@ -125,7 +125,7 @@ "source": [ "## Load the dataset\n", "\n", - "Download the CMP Facade Database data (30MB). Additional datasets are available in the same format [here](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/). In Colab you can select other datasets from the drop-down menu. Note that some of the other datasets are significantly larger (`edges2handbags` is 8GB). " + "Download the CMP Facade Database data (30MB). Additional datasets are available in the same format [here](http://efrosgans.eecs.berkeley.edu/pix2pix/datasets/). In Colab you can select other datasets from the drop-down menu. Note that some of the other datasets are significantly larger (`edges2handbags` is 8GB in size). " ] }, { @@ -156,7 +156,9 @@ "\n", "path_to_zip = pathlib.Path(path_to_zip)\n", "\n", - "PATH = path_to_zip.parent/dataset_name" + "extraction_dir = f'{dataset_name}_extracted/{dataset_name}'\n", + "\n", + "PATH = path_to_zip.parent/extraction_dir" ] }, { @@ -280,7 +282,7 @@ "\n", "1. Resize each `256 x 256` image to a larger height and width—`286 x 286`.\n", "2. Randomly crop it back to `256 x 256`.\n", - "3. Randomly flip the image horizontally i.e. left to right (random mirroring).\n", + "3. Randomly flip the image horizontally i.e., left to right (random mirroring).\n", "4. Normalize the images to the `[-1, 1]` range." ] }, @@ -490,7 +492,7 @@ "source": [ "## Build the generator\n", "\n", - "The generator of your pix2pix cGAN is a _modified_ [U-Net](https://arxiv.org/abs/1505.04597). A U-Net consists of an encoder (downsampler) and decoder (upsampler). (You can find out more about it in the [Image segmentation](https://www.tensorflow.org/tutorials/images/segmentation) tutorial and on the [U-Net project website](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/).)\n", + "The generator of your pix2pix cGAN is a _modified_ [U-Net](https://arxiv.org/abs/1505.04597). A U-Net consists of an encoder (downsampler) and decoder (upsampler). (You can find out more about it in the [Image segmentation](../images/segmentation.ipynb) tutorial and on the [U-Net project website](https://lmb.informatik.uni-freiburg.de/people/ronneber/u-net/).)\n", "\n", "- Each block in the encoder is: Convolution -> Batch normalization -> Leaky ReLU\n", "- Each block in the decoder is: Transposed convolution -> Batch normalization -> Dropout (applied to the first 3 blocks) -> ReLU\n", @@ -1007,8 +1009,7 @@ "id": "Rb0QQFHF-JfS" }, "source": [ - "Note: The `training=True` is intentional here since\n", - "you want the batch statistics, while running the model on the test dataset. If you use `training=False`, you get the accumulated statistics learned from the training dataset (which you don't want)." + "Note: The `training=True` is intentional here since you want the batch statistics, while running the model on the test dataset. If you use `training=False`, you get the accumulated statistics learned from the training dataset (which you don't want)." ] }, { @@ -1181,7 +1182,8 @@ "\n", "If you work on a local machine, you would launch a separate TensorBoard process. When working in a notebook, launch the viewer before starting the training to monitor with TensorBoard.\n", "\n", - "To launch the viewer paste the following into a code-cell:" + "Launch the TensorBoard viewer (Sorry, this doesn't\n", + "display on tensorflow.org):" ] }, { @@ -1199,72 +1201,30 @@ { "cell_type": "markdown", "metadata": { - "id": "Pe0-8Bzg22ox" - }, - "source": [ - "Finally, run the training loop:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "a1zZmKmvOH85" - }, - "outputs": [], - "source": [ - "fit(train_dataset, test_dataset, steps=40000)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "oeq9sByu86-B" - }, - "source": [ - "If you want to share the TensorBoard results _publicly_, you can upload the logs to [TensorBoard.dev](https://tensorboard.dev/) by copying the following into a code-cell.\n", - "\n", - "Note: This requires a Google account.\n", - "\n", - "```\n", - "!tensorboard dev upload --logdir {log_dir}\n", - "```" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "l-kT7WHRKz-E" + "id": "fyjixlMlBybN" }, "source": [ - "Caution: This command does not terminate. It's designed to continuously upload the results of long-running experiments. Once your data is uploaded you need to stop it using the \"interrupt execution\" option in your notebook tool." + "You can view the [results of a previous run](https://tensorboard.dev/experiment/lZ0C6FONROaUMfjYkVyJqw) of this notebook on [TensorBoard.dev](https://tensorboard.dev/)." ] }, { "cell_type": "markdown", "metadata": { - "id": "-lGhS_LfwQoL" + "id": "Pe0-8Bzg22ox" }, "source": [ - "You can view the [results of a previous run](https://tensorboard.dev/experiment/lZ0C6FONROaUMfjYkVyJqw) of this notebook on [TensorBoard.dev](https://tensorboard.dev/).\n", - "\n", - "TensorBoard.dev is a managed experience for hosting, tracking, and sharing ML experiments with everyone.\n", - "\n", - "It can also included inline using an `