diff --git a/.gitignore b/.gitignore index bcb6b89f6ff..80cf8846a58 100644 --- a/.gitignore +++ b/.gitignore @@ -30,4 +30,8 @@ env/ .idea .env* **/venv -**/noxfile.py \ No newline at end of file +**/noxfile.py + +# Auth Local secrets file +auth/custom-credentials/okta/custom-credentials-okta-secrets.json +auth/custom-credentials/aws/custom-credentials-aws-secrets.json diff --git a/.kokoro/docker/Dockerfile b/.kokoro/docker/Dockerfile index ba9af12a933..c37e7f091e2 100644 --- a/.kokoro/docker/Dockerfile +++ b/.kokoro/docker/Dockerfile @@ -110,33 +110,68 @@ RUN curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - \ && rm -rf /var/lib/apt/lists/* \ && rm -f /var/cache/apt/archives/*.deb -COPY fetch_gpg_keys.sh /tmp -# Install the desired versions of Python. -RUN set -ex \ - && export GNUPGHOME="$(mktemp -d)" \ - && echo "disable-ipv6" >> "${GNUPGHOME}/dirmngr.conf" \ - && /tmp/fetch_gpg_keys.sh \ - && for PYTHON_VERSION in 2.7.18 3.7.17 3.8.20 3.9.20 3.10.15 3.11.10 3.12.7 3.13.0; do \ - wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz "https://www.python.org/ftp/python/$%7BPYTHON_VERSION%%[a-z]*%7D/Python-$PYTHON_VERSION.tar.xz" \ - && wget --no-check-certificate -O python-${PYTHON_VERSION}.tar.xz.asc "https://www.python.org/ftp/python/$%7BPYTHON_VERSION%%[a-z]*%7D/Python-$PYTHON_VERSION.tar.xz.asc" \ - && gpg --batch --verify python-${PYTHON_VERSION}.tar.xz.asc python-${PYTHON_VERSION}.tar.xz \ - && rm -r python-${PYTHON_VERSION}.tar.xz.asc \ - && mkdir -p /usr/src/python-${PYTHON_VERSION} \ - && tar -xJC /usr/src/python-${PYTHON_VERSION} --strip-components=1 -f python-${PYTHON_VERSION}.tar.xz \ - && rm python-${PYTHON_VERSION}.tar.xz \ - && cd /usr/src/python-${PYTHON_VERSION} \ - && ./configure \ - --enable-shared \ - # This works only on Python 2.7 and throws a warning on every other - # version, but seems otherwise harmless. - --enable-unicode=ucs4 \ - --with-system-ffi \ - --without-ensurepip \ - && make -j$(nproc) \ - && make install \ - && ldconfig \ +# From https://www.python.org/downloads/metadata/sigstore/ +# Starting with Python 3.14, Sigstore is the only method of signing and verification of release artifacts. +RUN LATEST_VERSION="2.6.1" && \ + wget "https://github.com/sigstore/cosign/releases/download/v$%7BLATEST_VERSION%7D/cosign_$%7BLATEST_VERSION%7D_amd64.deb" && \ + dpkg -i cosign_${LATEST_VERSION}_amd64.deb && \ + rm cosign_${LATEST_VERSION}_amd64.deb + +ARG PYTHON_VERSIONS="3.7.17 3.8.20 3.9.23 3.10.18 3.11.13 3.12.11 3.13.8 3.14.0" + +SHELL ["/bin/bash", "-c"] + +RUN set -eux; \ + # Define the required associative arrays completely. + declare -A PYTHON_IDENTITIES; \ + PYTHON_IDENTITIES=(\ + [3.7]="nad@python.org" \ + [3.8]="lukasz@langa.pl" \ + [3.9]="lukasz@langa.pl" \ + [3.10]="pablogsal@python.org" \ + [3.11]="pablogsal@python.org" \ + [3.12]="thomas@python.org" \ + [3.13]="thomas@python.org" \ + [3.14]="hugo@python.org" \ + ); \ + declare -A PYTHON_ISSUERS; \ + PYTHON_ISSUERS=(\ + [3.7]="https://github.com/login/oauth" \ + [3.8]="https://github.com/login/oauth" \ + [3.9]="https://github.com/login/oauth" \ + [3.10]="https://accounts.google.com/" \ + [3.11]="https://accounts.google.com/" \ + [3.12]="https://accounts.google.com/" \ + [3.13]="https://accounts.google.com/" \ + [3.14]="https://github.com/login/oauth" \ + ); \ + \ + for VERSION in $PYTHON_VERSIONS; do \ + # 1. Define VERSION_GROUP (e.g., 3.14 from 3.14.0) + VERSION_GROUP="$(echo "${VERSION}" | cut -d . -f 1,2)"; \ + \ + # 2. Look up IDENTITY and ISSUER using the defined VERSION_GROUP + IDENTITY="${PYTHON_IDENTITIES[$VERSION_GROUP]}"; \ + ISSUER="${PYTHON_ISSUERS[$VERSION_GROUP]}"; \ + \ + wget --quiet -O python-${VERSION}.tar.xz "https://www.python.org/ftp/python/$%7BVERSION%7D/Python-$VERSION.tar.xz" \ + && wget --quiet -O python-${VERSION}.tar.xz.sigstore "https://www.python.org/ftp/python/$%7BVERSION%7D/Python-$VERSION.tar.xz.sigstore" \ + # Verify the Python tarball signature with cosign. + && cosign verify-blob python-${VERSION}.tar.xz \ + --certificate-oidc-issuer "${ISSUER}" \ + --certificate-identity "${IDENTITY}" \ + --bundle python-${VERSION}.tar.xz.sigstore \ + && mkdir -p /usr/src/python-${VERSION} \ + && tar -xJC /usr/src/python-${VERSION} --strip-components=1 -f python-${VERSION}.tar.xz \ + && rm python-${VERSION}.tar.xz \ + && cd /usr/src/python-${VERSION} \ + && ./configure \ + --enable-shared \ + --with-system-ffi \ + && make -j$(nproc) \ + && make install \ + && ldconfig \ ; done \ - && rm -rf "${GNUPGHOME}" \ && rm -rf /usr/src/python* \ && rm -rf ~/.cache/ @@ -158,6 +193,7 @@ RUN wget --no-check-certificate -O /tmp/get-pip-3-7.py 'https://bootstrap.pypa.i && [ "$(pip list |tac|tac| awk -F '[ ()]+' '$1 == "pip" { print $2; exit }')" = "$PYTHON_PIP_VERSION" ] # Ensure Pip for all python3 versions +RUN python3.14 /tmp/get-pip.py RUN python3.13 /tmp/get-pip.py RUN python3.12 /tmp/get-pip.py RUN python3.11 /tmp/get-pip.py @@ -175,6 +211,7 @@ RUN python3.10 -m pip RUN python3.11 -m pip RUN python3.12 -m pip RUN python3.13 -m pip +RUN python3.14 -m pip # Install "setuptools" for Python 3.12+ (see https://docs.python.org/3/whatsnew/3.12.html#distutils) RUN python3.12 -m pip install --no-cache-dir setuptools diff --git a/.kokoro/docker/fetch_gpg_keys.sh b/.kokoro/docker/fetch_gpg_keys.sh deleted file mode 100755 index 5b8dbbab1ed..00000000000 --- a/.kokoro/docker/fetch_gpg_keys.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# A script to fetch gpg keys with retry. - -function retry { - if [[ "${#}" -le 1 ]]; then - echo "Usage: ${0} retry_count commands.." - exit 1 - fi - local retries=${1} - local command="${@:2}" - until [[ "${retries}" -le 0 ]]; do - $command && return 0 - if [[ $? -ne 0 ]]; then - echo "command failed, retrying" - ((retries--)) - fi - done - return 1 -} - -# 2.7.17 (Benjamin Peterson) -retry 3 gpg --keyserver keyserver.ubuntu.com --recv-keys \ - C01E1CAD5EA2C4F0B8E3571504C367C218ADD4FF - -# 3.4.10, 3.5.9 (Larry Hastings) -retry 3 gpg --keyserver keyserver.ubuntu.com --recv-keys \ - 97FC712E4C024BBEA48A61ED3A5CA953F73C700D - -# 3.6.9, 3.7.5 (Ned Deily) -retry 3 gpg --keyserver keyserver.ubuntu.com --recv-keys \ - 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D - -# 3.8.0, 3.9.0 (Ɓukasz Langa) -retry 3 gpg --keyserver keyserver.ubuntu.com --recv-keys \ - E3FF2839C048B25C084DEBE9B26995E310250568 - -# 3.10.x and 3.11.x (Pablo Galindo Salgado) -retry 3 gpg --keyserver keyserver.ubuntu.com --recv-keys \ - A035C8C19219BA821ECEA86B64E628F8D684696D - -# 3.12.x and 3.13.x source files and tags (Thomas Wouters) -retry 3 gpg --keyserver keyserver.ubuntu.com --recv-keys \ - A821E680E5FA6305 \ No newline at end of file diff --git a/.kokoro/python3.14/common.cfg b/.kokoro/python3.14/common.cfg new file mode 100644 index 00000000000..8d12e9ed952 --- /dev/null +++ b/.kokoro/python3.14/common.cfg @@ -0,0 +1,59 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Format: //devtools/kokoro/config/proto/build.proto + +timeout_mins: 300 + +# Configure the docker image for kokoro-trampoline. +env_vars: { + key: "TRAMPOLINE_IMAGE" + value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker" +} + +# Download trampoline resources. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline" + +# Use the trampoline script to run in docker. +build_file: "python-docs-samples/.kokoro/trampoline_v2.sh" + +# Download secrets from Cloud Storage. +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples" + +# Access btlr binaries used in the tests +gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/btlr" + +# Copy results for Resultstore +action { + define_artifacts { + regex: "**/*sponge_log.xml" + } +} + +# Specify which tests to run +env_vars: { + key: "RUN_TESTS_SESSION" + value: "py-3.14" +} + +env_vars: { + key: "BUILD_SPECIFIC_GCLOUD_PROJECT" + value: "python-docs-samples-tests-314" +} + +# Number of test workers. +env_vars: { + key: "NUM_TEST_WORKERS" + value: "10" +} diff --git a/.kokoro/python3.14/continuous.cfg b/.kokoro/python3.14/continuous.cfg new file mode 100644 index 00000000000..5753c38482a --- /dev/null +++ b/.kokoro/python3.14/continuous.cfg @@ -0,0 +1,21 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Format: //devtools/kokoro/config/proto/build.proto + +# Tell the trampoline which build file to use. +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: ".kokoro/tests/run_tests_diff_head.sh" +} diff --git a/.kokoro/python3.14/periodic.cfg b/.kokoro/python3.14/periodic.cfg new file mode 100644 index 00000000000..8a14abb05ef --- /dev/null +++ b/.kokoro/python3.14/periodic.cfg @@ -0,0 +1,27 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Format: //devtools/kokoro/config/proto/build.proto + +# Tell the trampoline which build file to use. +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: ".kokoro/tests/run_tests.sh" +} + +# Tell Trampoline to upload the Docker image after successfull build. +env_vars: { + key: "TRAMPOLINE_IMAGE_UPLOAD" + value: "true" +} diff --git a/.kokoro/python3.14/presubmit.cfg b/.kokoro/python3.14/presubmit.cfg new file mode 100644 index 00000000000..b8ecd3b0d15 --- /dev/null +++ b/.kokoro/python3.14/presubmit.cfg @@ -0,0 +1,21 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Format: //devtools/kokoro/config/proto/build.proto + +# Tell the trampoline which build file to use. +env_vars: { + key: "TRAMPOLINE_BUILD_FILE" + value: ".kokoro/tests/run_tests_diff_main.sh" +} diff --git a/.kokoro/tests/run_tests.sh b/.kokoro/tests/run_tests.sh index 1715decdce7..191b40b09e0 100755 --- a/.kokoro/tests/run_tests.sh +++ b/.kokoro/tests/run_tests.sh @@ -58,7 +58,7 @@ if [[ $* == *--only-diff-head* ]]; then fi fi -# Because Kokoro runs presubmit builds simalteneously, we often see +# Because Kokoro runs presubmit builds simultaneously, we often see # quota related errors. I think we can avoid this by changing the # order of tests to execute (e.g. reverse order for py-3.8 # build). Currently there's no easy way to do that with btlr, so we diff --git a/AUTHORING_GUIDE.md b/AUTHORING_GUIDE.md index abf5e31abf1..6ae8d0a0372 100644 --- a/AUTHORING_GUIDE.md +++ b/AUTHORING_GUIDE.md @@ -68,7 +68,7 @@ We recommend using the Python version management tool [Pyenv](https://github.com/pyenv/pyenv) if you are using MacOS or Linux. **Googlers:** See [the internal Python policies -doc](https://g3doc.corp.google.com/company/teams/cloud-devrel/dpe/samples/python.md?cl=head). +doc](go/cloudsamples/language-guides/python). **Using MacOS?:** See [Setting up a Mac development environment with pyenv and pyenv-virtualenv](MAC_SETUP.md). diff --git a/appengine/flexible/django_cloudsql/requirements.txt b/appengine/flexible/django_cloudsql/requirements.txt index 284290f2532..e309f97d5bc 100644 --- a/appengine/flexible/django_cloudsql/requirements.txt +++ b/appengine/flexible/django_cloudsql/requirements.txt @@ -1,4 +1,4 @@ -Django==5.2.5 +Django==5.2.9 gunicorn==23.0.0 psycopg2-binary==2.9.10 django-environ==0.12.0 diff --git a/appengine/flexible/hello_world/app.yaml b/appengine/flexible/hello_world/app.yaml index ac38af83425..78198c8821a 100644 --- a/appengine/flexible/hello_world/app.yaml +++ b/appengine/flexible/hello_world/app.yaml @@ -17,7 +17,7 @@ env: flex entrypoint: gunicorn -b :$PORT main:app runtime_config: - operating_system: ubuntu22 + operating_system: ubuntu24 # This sample incurs costs to run on the App Engine flexible environment. # The settings below are to reduce costs during testing and are not appropriate diff --git a/appengine/flexible/hello_world_django/requirements.txt b/appengine/flexible/hello_world_django/requirements.txt index 564852cb740..435ef2cb8ee 100644 --- a/appengine/flexible/hello_world_django/requirements.txt +++ b/appengine/flexible/hello_world_django/requirements.txt @@ -1,2 +1,2 @@ -Django==5.2.5 +Django==5.2.9 gunicorn==23.0.0 diff --git a/appengine/flexible_python37_and_earlier/hello_world_django/requirements.txt b/appengine/flexible_python37_and_earlier/hello_world_django/requirements.txt index 564852cb740..435ef2cb8ee 100644 --- a/appengine/flexible_python37_and_earlier/hello_world_django/requirements.txt +++ b/appengine/flexible_python37_and_earlier/hello_world_django/requirements.txt @@ -1,2 +1,2 @@ -Django==5.2.5 +Django==5.2.9 gunicorn==23.0.0 diff --git a/appengine/standard/noxfile-template.py b/appengine/standard/noxfile-template.py index f96f3288d70..ae73deaa7f1 100644 --- a/appengine/standard/noxfile-template.py +++ b/appengine/standard/noxfile-template.py @@ -79,7 +79,7 @@ def get_pytest_env_vars(): # DO NOT EDIT - automatically generated. # All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] diff --git a/appengine/standard/noxfile_config.py b/appengine/standard/noxfile_config.py index 9d81eb86207..f39811085fa 100644 --- a/appengine/standard/noxfile_config.py +++ b/appengine/standard/noxfile_config.py @@ -24,7 +24,7 @@ TEST_CONFIG_OVERRIDE = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["3.6", "3.7", "3.8", "3.9", "3.10", "3.11"], + "ignored_versions": ["3.6", "3.7", "3.8", "3.10", "3.11", "3.12", "3.13"], # Old samples are opted out of enforcing Python type hints # All new samples should feature them "enforce_type_hints": False, diff --git a/appengine/standard_python3/bigquery/app.yaml b/appengine/standard_python3/bigquery/app.yaml index 83c91f5b872..472f1f0c034 100644 --- a/appengine/standard_python3/bigquery/app.yaml +++ b/appengine/standard_python3/bigquery/app.yaml @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 diff --git a/appengine/standard_python3/building-an-app/building-an-app-1/app.yaml b/appengine/standard_python3/building-an-app/building-an-app-1/app.yaml index 100d540982b..2ecf42a0f4f 100644 --- a/appengine/standard_python3/building-an-app/building-an-app-1/app.yaml +++ b/appengine/standard_python3/building-an-app/building-an-app-1/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python313 +runtime: python314 handlers: # This configures Google App Engine to serve the files in the app's static diff --git a/appengine/standard_python3/building-an-app/building-an-app-1/requirements-test.txt b/appengine/standard_python3/building-an-app/building-an-app-1/requirements-test.txt index c2845bffbe8..c987bcfee7e 100644 --- a/appengine/standard_python3/building-an-app/building-an-app-1/requirements-test.txt +++ b/appengine/standard_python3/building-an-app/building-an-app-1/requirements-test.txt @@ -1 +1,2 @@ -pytest==7.0.1 +pytest==7.0.1; python_version == '3.9' +pytest==9.0.2; python_version >= '3.10' diff --git a/appengine/standard_python3/building-an-app/building-an-app-2/app.yaml b/appengine/standard_python3/building-an-app/building-an-app-2/app.yaml index a0931a8a5d9..100d540982b 100644 --- a/appengine/standard_python3/building-an-app/building-an-app-2/app.yaml +++ b/appengine/standard_python3/building-an-app/building-an-app-2/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 handlers: # This configures Google App Engine to serve the files in the app's static diff --git a/appengine/standard_python3/building-an-app/building-an-app-3/app.yaml b/appengine/standard_python3/building-an-app/building-an-app-3/app.yaml index a0931a8a5d9..100d540982b 100644 --- a/appengine/standard_python3/building-an-app/building-an-app-3/app.yaml +++ b/appengine/standard_python3/building-an-app/building-an-app-3/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 handlers: # This configures Google App Engine to serve the files in the app's static diff --git a/appengine/standard_python3/building-an-app/building-an-app-4/app.yaml b/appengine/standard_python3/building-an-app/building-an-app-4/app.yaml index a0931a8a5d9..100d540982b 100644 --- a/appengine/standard_python3/building-an-app/building-an-app-4/app.yaml +++ b/appengine/standard_python3/building-an-app/building-an-app-4/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 handlers: # This configures Google App Engine to serve the files in the app's static diff --git a/appengine/standard_python3/bundled-services/blobstore/django/app.yaml b/appengine/standard_python3/bundled-services/blobstore/django/app.yaml index 96e1c924ee3..6994339e157 100644 --- a/appengine/standard_python3/bundled-services/blobstore/django/app.yaml +++ b/appengine/standard_python3/bundled-services/blobstore/django/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 app_engine_apis: true handlers: diff --git a/appengine/standard_python3/bundled-services/blobstore/flask/app.yaml b/appengine/standard_python3/bundled-services/blobstore/flask/app.yaml index 96e1c924ee3..6994339e157 100644 --- a/appengine/standard_python3/bundled-services/blobstore/flask/app.yaml +++ b/appengine/standard_python3/bundled-services/blobstore/flask/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 app_engine_apis: true handlers: diff --git a/appengine/standard_python3/bundled-services/blobstore/wsgi/app.yaml b/appengine/standard_python3/bundled-services/blobstore/wsgi/app.yaml index 96e1c924ee3..6994339e157 100644 --- a/appengine/standard_python3/bundled-services/blobstore/wsgi/app.yaml +++ b/appengine/standard_python3/bundled-services/blobstore/wsgi/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 app_engine_apis: true handlers: diff --git a/appengine/standard_python3/bundled-services/deferred/django/app.yaml b/appengine/standard_python3/bundled-services/deferred/django/app.yaml index 84314e1d25b..c2226a56b67 100644 --- a/appengine/standard_python3/bundled-services/deferred/django/app.yaml +++ b/appengine/standard_python3/bundled-services/deferred/django/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 app_engine_apis: true env_variables: NDB_USE_CROSS_COMPATIBLE_PICKLE_PROTOCOL: "True" diff --git a/appengine/standard_python3/bundled-services/deferred/flask/app.yaml b/appengine/standard_python3/bundled-services/deferred/flask/app.yaml index 84314e1d25b..c2226a56b67 100644 --- a/appengine/standard_python3/bundled-services/deferred/flask/app.yaml +++ b/appengine/standard_python3/bundled-services/deferred/flask/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 app_engine_apis: true env_variables: NDB_USE_CROSS_COMPATIBLE_PICKLE_PROTOCOL: "True" diff --git a/appengine/standard_python3/bundled-services/deferred/wsgi/app.yaml b/appengine/standard_python3/bundled-services/deferred/wsgi/app.yaml index 84314e1d25b..c2226a56b67 100644 --- a/appengine/standard_python3/bundled-services/deferred/wsgi/app.yaml +++ b/appengine/standard_python3/bundled-services/deferred/wsgi/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 app_engine_apis: true env_variables: NDB_USE_CROSS_COMPATIBLE_PICKLE_PROTOCOL: "True" diff --git a/appengine/standard_python3/bundled-services/mail/django/app.yaml b/appengine/standard_python3/bundled-services/mail/django/app.yaml index ff79a69182c..902fe897910 100644 --- a/appengine/standard_python3/bundled-services/mail/django/app.yaml +++ b/appengine/standard_python3/bundled-services/mail/django/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 app_engine_apis: true inbound_services: diff --git a/appengine/standard_python3/bundled-services/mail/django/requirements.txt b/appengine/standard_python3/bundled-services/mail/django/requirements.txt index 4922ec66011..bdd07a4620e 100644 --- a/appengine/standard_python3/bundled-services/mail/django/requirements.txt +++ b/appengine/standard_python3/bundled-services/mail/django/requirements.txt @@ -1,4 +1,4 @@ -Django==5.1.10; python_version >= "3.10" +Django==5.1.13; python_version >= "3.10" Django==4.2.16; python_version >= "3.8" and python_version < "3.10" Django==3.2.25; python_version < "3.8" django-environ==0.10.0 diff --git a/appengine/standard_python3/bundled-services/mail/flask/app.yaml b/appengine/standard_python3/bundled-services/mail/flask/app.yaml index ff79a69182c..79f6d993358 100644 --- a/appengine/standard_python3/bundled-services/mail/flask/app.yaml +++ b/appengine/standard_python3/bundled-services/mail/flask/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python312 app_engine_apis: true inbound_services: diff --git a/appengine/standard_python3/bundled-services/mail/wsgi/app.yaml b/appengine/standard_python3/bundled-services/mail/wsgi/app.yaml index ff79a69182c..79f6d993358 100644 --- a/appengine/standard_python3/bundled-services/mail/wsgi/app.yaml +++ b/appengine/standard_python3/bundled-services/mail/wsgi/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python312 app_engine_apis: true inbound_services: diff --git a/appengine/standard_python3/cloudsql/app.yaml b/appengine/standard_python3/cloudsql/app.yaml index 496b60f231b..dfb14663846 100644 --- a/appengine/standard_python3/cloudsql/app.yaml +++ b/appengine/standard_python3/cloudsql/app.yaml @@ -14,7 +14,7 @@ # [START gae_python38_cloudsql_config] # [START gae_python3_cloudsql_config] -runtime: python39 +runtime: python313 env_variables: CLOUD_SQL_USERNAME: YOUR-USERNAME diff --git a/appengine/standard_python3/cloudsql/requirements.txt b/appengine/standard_python3/cloudsql/requirements.txt index 7ca534fe2e0..7fe39c1a1b2 100644 --- a/appengine/standard_python3/cloudsql/requirements.txt +++ b/appengine/standard_python3/cloudsql/requirements.txt @@ -1,6 +1,6 @@ flask==3.0.0 # psycopg2==2.8.4 # you will need either the binary or the regular - for more info see http://initd.org/psycopg/docs/install.html -psycopg2-binary==2.9.9 +psycopg2-binary==2.9.11 PyMySQL==1.1.1 -SQLAlchemy==2.0.10 \ No newline at end of file +SQLAlchemy==2.0.44 diff --git a/appengine/standard_python3/custom-server/app.yaml b/appengine/standard_python3/custom-server/app.yaml index ff2f64b2b26..b67aef4f96e 100644 --- a/appengine/standard_python3/custom-server/app.yaml +++ b/appengine/standard_python3/custom-server/app.yaml @@ -14,7 +14,7 @@ # [START gae_python38_custom_runtime] # [START gae_python3_custom_runtime] -runtime: python39 +runtime: python313 entrypoint: uwsgi --http-socket :$PORT --wsgi-file main.py --callable app --master --processes 1 --threads 2 # [END gae_python3_custom_runtime] # [END gae_python38_custom_runtime] diff --git a/appengine/standard_python3/django/app.yaml b/appengine/standard_python3/django/app.yaml index 5a7255118c8..ddf86e23823 100644 --- a/appengine/standard_python3/django/app.yaml +++ b/appengine/standard_python3/django/app.yaml @@ -15,7 +15,7 @@ # # [START gaestd_py_django_app_yaml] -runtime: python39 +runtime: python313 env_variables: # This setting is used in settings.py to configure your ALLOWED_HOSTS diff --git a/appengine/standard_python3/django/requirements.txt b/appengine/standard_python3/django/requirements.txt index cdd4b54cf3e..60b4408e6b4 100644 --- a/appengine/standard_python3/django/requirements.txt +++ b/appengine/standard_python3/django/requirements.txt @@ -1,4 +1,4 @@ -Django==5.1.8; python_version >= "3.10" +Django==5.1.15; python_version >= "3.10" Django==4.2.17; python_version >= "3.8" and python_version < "3.10" Django==3.2.25; python_version < "3.8" django-environ==0.10.0 diff --git a/appengine/standard_python3/migration/urlfetch/app.yaml b/appengine/standard_python3/migration/urlfetch/app.yaml index dd75aa47c69..3aa9d9d2207 100644 --- a/appengine/standard_python3/migration/urlfetch/app.yaml +++ b/appengine/standard_python3/migration/urlfetch/app.yaml @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 diff --git a/appengine/standard_python3/pubsub/app.yaml b/appengine/standard_python3/pubsub/app.yaml index 9e3e948e4db..3c36b4bfb3c 100644 --- a/appengine/standard_python3/pubsub/app.yaml +++ b/appengine/standard_python3/pubsub/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 # [START gae_standard_pubsub_env] env_variables: diff --git a/appengine/standard_python3/redis/app.yaml b/appengine/standard_python3/redis/app.yaml index 2797ed154f7..138895c3737 100644 --- a/appengine/standard_python3/redis/app.yaml +++ b/appengine/standard_python3/redis/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 env_variables: REDIS_HOST: your-redis-host diff --git a/appengine/standard_python3/spanner/app.yaml b/appengine/standard_python3/spanner/app.yaml index a4e3167ec08..59a31baca33 100644 --- a/appengine/standard_python3/spanner/app.yaml +++ b/appengine/standard_python3/spanner/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 env_variables: SPANNER_INSTANCE: "YOUR-SPANNER-INSTANCE-ID" diff --git a/appengine/standard_python3/warmup/app.yaml b/appengine/standard_python3/warmup/app.yaml index fdda19a79b1..3cc59533b01 100644 --- a/appengine/standard_python3/warmup/app.yaml +++ b/appengine/standard_python3/warmup/app.yaml @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -runtime: python39 +runtime: python313 inbound_services: - warmup diff --git a/auth/custom-credentials/aws/Dockerfile b/auth/custom-credentials/aws/Dockerfile new file mode 100644 index 00000000000..d90d88aa0a8 --- /dev/null +++ b/auth/custom-credentials/aws/Dockerfile @@ -0,0 +1,15 @@ +FROM python:3.13-slim + +RUN useradd -m appuser + +WORKDIR /app + +COPY --chown=appuser:appuser requirements.txt . + +USER appuser +RUN pip install --no-cache-dir -r requirements.txt + +COPY --chown=appuser:appuser snippets.py . + + +CMD ["python3", "snippets.py"] diff --git a/auth/custom-credentials/aws/README.md b/auth/custom-credentials/aws/README.md new file mode 100644 index 00000000000..551c95ef691 --- /dev/null +++ b/auth/custom-credentials/aws/README.md @@ -0,0 +1,127 @@ +# Running the Custom AWS Credential Supplier Sample + +This sample demonstrates how to use a custom AWS security credential supplier to authenticate with Google Cloud using AWS as an external identity provider. It uses Boto3 (the AWS SDK for Python) to fetch credentials from sources like Amazon Elastic Kubernetes Service (EKS) with IAM Roles for Service Accounts(IRSA), Elastic Container Service (ECS), or Fargate. + +## Prerequisites + +* An AWS account. +* A Google Cloud project with the IAM API enabled. +* A GCS bucket. +* Python 3.10 or later installed. + +If you want to use AWS security credentials that cannot be retrieved using methods supported natively by the [google-auth](https://github.com/googleapis/google-auth-library-python) library, a custom `AwsSecurityCredentialsSupplier` implementation may be specified. The supplier must return valid, unexpired AWS security credentials when called by the Google Cloud Auth library. + + +## Running Locally + +For local development, you can provide credentials and configuration in a JSON file. + +### Install Dependencies + +Ensure you have Python installed, then install the required libraries: + +```bash +pip install -r requirements.txt +``` + +### Configure Credentials for Local Development + +1. Copy the example secrets file to a new file named `custom-credentials-aws-secrets.json`: + ```bash + cp custom-credentials-aws-secrets.json.example custom-credentials-aws-secrets.json + ``` +2. Open `custom-credentials-aws-secrets.json` and fill in the required values for your AWS and Google Cloud configuration. Do not check your `custom-credentials-aws-secrets.json` file into version control. + +**Note:** This file is only used for local development and is not needed when running in a containerized environment like EKS with IRSA. + + +### Run the Script + +```bash +python3 snippets.py +``` + +When run locally, the script will detect the `custom-credentials-aws-secrets.json` file and use it to configure the necessary environment variables for the Boto3 client. + +## Running in a Containerized Environment (EKS) + +This section provides a brief overview of how to run the sample in an Amazon EKS cluster. + +### EKS Cluster Setup + +First, you need an EKS cluster. You can create one using `eksctl` or the AWS Management Console. For detailed instructions, refer to the [Amazon EKS documentation](https://docs.aws.amazon.com/eks/latest/userguide/create-cluster.html). + +### Configure IAM Roles for Service Accounts (IRSA) + +IRSA enables you to associate an IAM role with a Kubernetes service account. This provides a secure way for your pods to access AWS services without hardcoding long-lived credentials. + +Run the following command to create the IAM role and bind it to a Kubernetes Service Account: + +```bash +eksctl create iamserviceaccount \ + --name your-k8s-service-account \ + --namespace default \ + --cluster your-cluster-name \ + --region your-aws-region \ + --role-name your-role-name \ + --attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \ + --approve +``` + +> **Note**: The `--attach-policy-arn` flag is used here to demonstrate attaching permissions. Update this with the specific AWS policy ARN your application requires. + +For a deep dive into how this works without using `eksctl`, refer to the [IAM Roles for Service Accounts](https://docs.aws.amazon.com/eks/latest/userguide/iam-roles-for-service-accounts.html) documentation. + +### Configure Google Cloud to Trust the AWS Role + +To allow your AWS role to authenticate as a Google Cloud service account, you need to configure Workload Identity Federation. This process involves these key steps: + +1. **Create a Workload Identity Pool and an AWS Provider:** The pool holds the configuration, and the provider is set up to trust your AWS account. + +2. **Create or select a Google Cloud Service Account:** This service account will be impersonated by your AWS role. + +3. **Bind the AWS Role to the Google Cloud Service Account:** Create an IAM policy binding that gives your AWS role the `Workload Identity User` (`roles/iam.workloadIdentityUser`) role on the Google Cloud service account. + +For more detailed information, see the documentation on [Configuring Workload Identity Federation](https://cloud.google.com/iam/docs/workload-identity-federation-with-other-clouds). + +**Alternative: Direct Access** + +> For supported resources, you can grant roles directly to the AWS identity, bypassing service account impersonation. To do this, grant a role (like `roles/storage.objectViewer`) to the workload identity principal (`principalSet://...`) directly on the resource's IAM policy. + +For more detailed information, see the documentation on [Configuring Workload Identity Federation](https://cloud.google.com/iam/docs/workload-identity-federation-with-other-clouds). + +### Containerize and Package the Application + +Create a `Dockerfile` for the Python application and push the image to a container registry (for example Amazon ECR) that your EKS cluster can access. + +**Note:** The provided [`Dockerfile`](Dockerfile) is an example and may need to be modified for your specific needs. + +Build and push the image: +```bash +docker build -t your-container-image:latest . +docker push your-container-image:latest +``` + +### Deploy to EKS + +Create a Kubernetes deployment manifest to deploy your application to the EKS cluster. See the [`pod.yaml`](pod.yaml) file for an example. + +**Note:** The provided [`pod.yaml`](pod.yaml) is an example and may need to be modified for your specific needs. + +Deploy the pod: + +```bash +kubectl apply -f pod.yaml +``` + +### Clean Up + +To clean up the resources, delete the EKS cluster and any other AWS and Google Cloud resources you created. + +```bash +eksctl delete cluster --name your-cluster-name +``` + +## Testing + +This sample is not continuously tested. It is provided for instructional purposes and may require modifications to work in your environment. diff --git a/auth/custom-credentials/aws/custom-credentials-aws-secrets.json.example b/auth/custom-credentials/aws/custom-credentials-aws-secrets.json.example new file mode 100644 index 00000000000..300dc70c138 --- /dev/null +++ b/auth/custom-credentials/aws/custom-credentials-aws-secrets.json.example @@ -0,0 +1,8 @@ +{ + "aws_access_key_id": "YOUR_AWS_ACCESS_KEY_ID", + "aws_secret_access_key": "YOUR_AWS_SECRET_ACCESS_KEY", + "aws_region": "YOUR_AWS_REGION", + "gcp_workload_audience": "YOUR_GCP_WORKLOAD_AUDIENCE", + "gcs_bucket_name": "YOUR_GCS_BUCKET_NAME", + "gcp_service_account_impersonation_url": "YOUR_GCP_SERVICE_ACCOUNT_IMPERSONATION_URL" +} diff --git a/auth/custom-credentials/aws/noxfile_config.py b/auth/custom-credentials/aws/noxfile_config.py new file mode 100644 index 00000000000..0ed973689f7 --- /dev/null +++ b/auth/custom-credentials/aws/noxfile_config.py @@ -0,0 +1,17 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +TEST_CONFIG_OVERRIDE = { + "ignored_versions": ["2.7", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"], +} diff --git a/auth/custom-credentials/aws/pod.yaml b/auth/custom-credentials/aws/pod.yaml new file mode 100644 index 00000000000..70b94bf25e2 --- /dev/null +++ b/auth/custom-credentials/aws/pod.yaml @@ -0,0 +1,40 @@ +# Copyright 2025 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Pod +metadata: + name: custom-credential-pod +spec: + # The Kubernetes Service Account that is annotated with the corresponding + # AWS IAM role ARN. See the README for instructions on setting up IAM + # Roles for Service Accounts (IRSA). + serviceAccountName: your-k8s-service-account + containers: + - name: gcp-auth-sample + # The container image pushed to the container registry + # For example, Amazon Elastic Container Registry + image: your-container-image:latest + env: + # REQUIRED: The AWS region. Boto3 requires this to be set explicitly + # in containers. + - name: AWS_REGION + value: "your-aws-region" + # REQUIRED: The full identifier of the Workload Identity Pool provider + - name: GCP_WORKLOAD_AUDIENCE + value: "your-gcp-workload-audience" + # OPTIONAL: Enable Google Cloud service account impersonation + # - name: GCP_SERVICE_ACCOUNT_IMPERSONATION_URL + # value: "your-gcp-service-account-impersonation-url" + - name: GCS_BUCKET_NAME + value: "your-gcs-bucket-name" diff --git a/auth/custom-credentials/aws/requirements-test.txt b/auth/custom-credentials/aws/requirements-test.txt new file mode 100644 index 00000000000..43b24059d3e --- /dev/null +++ b/auth/custom-credentials/aws/requirements-test.txt @@ -0,0 +1,2 @@ +-r requirements.txt +pytest==8.2.0 diff --git a/auth/custom-credentials/aws/requirements.txt b/auth/custom-credentials/aws/requirements.txt new file mode 100644 index 00000000000..2c302888ed7 --- /dev/null +++ b/auth/custom-credentials/aws/requirements.txt @@ -0,0 +1,5 @@ +boto3==1.40.53 +google-auth==2.43.0 +google-cloud-storage==2.19.0 +python-dotenv==1.1.1 +requests==2.32.3 diff --git a/auth/custom-credentials/aws/snippets.py b/auth/custom-credentials/aws/snippets.py new file mode 100644 index 00000000000..2d77a123015 --- /dev/null +++ b/auth/custom-credentials/aws/snippets.py @@ -0,0 +1,153 @@ +# Copyright 2025 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START auth_custom_credential_supplier_aws] +import json +import os +import sys + +import boto3 +from google.auth import aws +from google.auth import exceptions +from google.cloud import storage + + +class CustomAwsSupplier(aws.AwsSecurityCredentialsSupplier): + """Custom AWS Security Credentials Supplier using Boto3.""" + + def __init__(self): + """Initializes the Boto3 session, prioritizing environment variables for region.""" + # Explicitly read the region from the environment first. + region = os.getenv("AWS_REGION") or os.getenv("AWS_DEFAULT_REGION") + + # If region is None, Boto3's discovery chain will be used when needed. + self.session = boto3.Session(region_name=region) + self._cached_region = None + + def get_aws_region(self, context, request) -> str: + """Returns the AWS region using Boto3's default provider chain.""" + if self._cached_region: + return self._cached_region + + self._cached_region = self.session.region_name + + if not self._cached_region: + raise exceptions.GoogleAuthError( + "Boto3 was unable to resolve an AWS region." + ) + + return self._cached_region + + def get_aws_security_credentials( + self, context, request=None + ) -> aws.AwsSecurityCredentials: + """Retrieves AWS security credentials using Boto3's default provider chain.""" + creds = self.session.get_credentials() + if not creds: + raise exceptions.GoogleAuthError( + "Unable to resolve AWS credentials from Boto3." + ) + + return aws.AwsSecurityCredentials( + access_key_id=creds.access_key, + secret_access_key=creds.secret_key, + session_token=creds.token, + ) + + +def authenticate_with_aws_credentials(bucket_name, audience, impersonation_url=None): + """Authenticates using the custom AWS supplier and gets bucket metadata. + + Returns: + dict: The bucket metadata response from the Google Cloud Storage API. + """ + + custom_supplier = CustomAwsSupplier() + + credentials = aws.Credentials( + audience=audience, + subject_token_type="urn:ietf:params:aws:token-type:aws4_request", + service_account_impersonation_url=impersonation_url, + aws_security_credentials_supplier=custom_supplier, + scopes=["https://www.googleapis.com/auth/devstorage.read_only"], + ) + + storage_client = storage.Client(credentials=credentials) + + bucket = storage_client.get_bucket(bucket_name) + + return bucket._properties + + +# [END auth_custom_credential_supplier_aws] + + +def _load_config_from_file(): + """ + If a local secrets file is present, load it into the environment. + This is a "just-in-time" configuration for local development. These + variables are only set for the current process and are not exposed to the + shell. + """ + secrets_file = "custom-credentials-aws-secrets.json" + if os.path.exists(secrets_file): + with open(secrets_file, "r") as f: + try: + secrets = json.load(f) + except json.JSONDecodeError: + print(f"Error: '{secrets_file}' is not valid JSON.", file=sys.stderr) + return + + os.environ["AWS_ACCESS_KEY_ID"] = secrets.get("aws_access_key_id", "") + os.environ["AWS_SECRET_ACCESS_KEY"] = secrets.get("aws_secret_access_key", "") + os.environ["AWS_REGION"] = secrets.get("aws_region", "") + os.environ["GCP_WORKLOAD_AUDIENCE"] = secrets.get("gcp_workload_audience", "") + os.environ["GCS_BUCKET_NAME"] = secrets.get("gcs_bucket_name", "") + os.environ["GCP_SERVICE_ACCOUNT_IMPERSONATION_URL"] = secrets.get( + "gcp_service_account_impersonation_url", "" + ) + + +def main(): + + # Reads the custom-credentials-aws-secrets.json if running locally. + _load_config_from_file() + + # Now, read the configuration from the environment. In a local run, these + # will be the values we just set. In a containerized run, they will be + # the values provided by the environment. + gcp_audience = os.getenv("GCP_WORKLOAD_AUDIENCE") + sa_impersonation_url = os.getenv("GCP_SERVICE_ACCOUNT_IMPERSONATION_URL") + gcs_bucket_name = os.getenv("GCS_BUCKET_NAME") + + if not all([gcp_audience, gcs_bucket_name]): + print( + "Required configuration missing. Please provide it in a " + "custom-credentials-aws-secrets.json file or as environment variables: " + "GCP_WORKLOAD_AUDIENCE, GCS_BUCKET_NAME" + ) + return + + try: + print(f"Retrieving metadata for bucket: {gcs_bucket_name}...") + metadata = authenticate_with_aws_credentials( + gcs_bucket_name, gcp_audience, sa_impersonation_url + ) + print("--- SUCCESS! ---") + print(json.dumps(metadata, indent=2)) + except Exception as e: + print(f"Authentication or Request failed: {e}") + + +if __name__ == "__main__": + main() diff --git a/auth/custom-credentials/aws/snippets_test.py b/auth/custom-credentials/aws/snippets_test.py new file mode 100644 index 00000000000..e0382cfc6f5 --- /dev/null +++ b/auth/custom-credentials/aws/snippets_test.py @@ -0,0 +1,130 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +from unittest import mock + +import pytest + +import snippets + +# --- Unit Tests --- + + +@mock.patch.dict(os.environ, {"AWS_REGION": "us-west-2"}) +@mock.patch("boto3.Session") +def test_init_priority_env_var(mock_boto_session): + """Test that AWS_REGION env var takes priority during init.""" + snippets.CustomAwsSupplier() + mock_boto_session.assert_called_with(region_name="us-west-2") + + +@mock.patch.dict(os.environ, {}, clear=True) +@mock.patch("boto3.Session") +def test_get_aws_region_caching(mock_boto_session): + """Test that get_aws_region caches the result from Boto3.""" + mock_session_instance = mock_boto_session.return_value + mock_session_instance.region_name = "us-east-1" + + supplier = snippets.CustomAwsSupplier() + + # First call should hit the session + region = supplier.get_aws_region(None, None) + assert region == "us-east-1" + + # Change the mock to ensure we aren't calling it again + mock_session_instance.region_name = "us-west-2" + + # Second call should return the cached value + region2 = supplier.get_aws_region(None, None) + assert region2 == "us-east-1" + + +@mock.patch("boto3.Session") +def test_get_aws_security_credentials_success(mock_boto_session): + """Test successful retrieval of AWS credentials.""" + mock_session_instance = mock_boto_session.return_value + + mock_creds = mock.MagicMock() + mock_creds.access_key = "test-key" + mock_creds.secret_key = "test-secret" + mock_creds.token = "test-token" + mock_session_instance.get_credentials.return_value = mock_creds + + supplier = snippets.CustomAwsSupplier() + creds = supplier.get_aws_security_credentials(None) + + assert creds.access_key_id == "test-key" + assert creds.secret_access_key == "test-secret" + assert creds.session_token == "test-token" + + +@mock.patch("snippets.auth_requests.AuthorizedSession") +@mock.patch("snippets.aws.Credentials") +@mock.patch("snippets.CustomAwsSupplier") +def test_authenticate_unit_success(MockSupplier, MockAwsCreds, MockSession): + """Unit test for the main flow using mocks.""" + mock_response = mock.MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"name": "my-bucket"} + + mock_session_instance = MockSession.return_value + mock_session_instance.get.return_value = mock_response + + result = snippets.authenticate_with_aws_credentials( + bucket_name="my-bucket", + audience="//iam.googleapis.com/...", + impersonation_url=None, + ) + + assert result == {"name": "my-bucket"} + MockSupplier.assert_called_once() + MockAwsCreds.assert_called_once() + + +# --- System Test (Integration) --- + + +def test_authenticate_system(): + """ + System test that runs against the real API. + Skips automatically if custom-credentials-aws-secrets.json is missing or incomplete. + """ + if not os.path.exists("custom-credentials-aws-secrets.json"): + pytest.skip( + "Skipping system test: custom-credentials-aws-secrets.json not found." + ) + + with open("custom-credentials-aws-secrets.json", "r") as f: + secrets = json.load(f) + + required_keys = [ + "gcp_workload_audience", + "gcs_bucket_name", + "aws_access_key_id", + "aws_secret_access_key", + "aws_region", + ] + if not all(key in secrets and secrets[key] for key in required_keys): + pytest.skip( + "Skipping system test: custom-credentials-aws-secrets.json is missing or has empty required keys." + ) + + metadata = snippets.main() + + # Verify that the returned metadata is a dictionary with expected keys. + assert isinstance(metadata, dict) + assert "name" in metadata + assert metadata["name"] == secrets["gcs_bucket_name"] diff --git a/auth/custom-credentials/okta/README.md b/auth/custom-credentials/okta/README.md new file mode 100644 index 00000000000..96d444e85a4 --- /dev/null +++ b/auth/custom-credentials/okta/README.md @@ -0,0 +1,83 @@ +# Running the Custom Okta Credential Supplier Sample + +This sample demonstrates how to use a custom subject token supplier to authenticate with Google Cloud using Okta as an external identity provider. It uses the Client Credentials flow for machine-to-machine (M2M) authentication. + +## Prerequisites + +* An Okta developer account. +* A Google Cloud project with the IAM API enabled. +* A Google Cloud Storage bucket. Ensure that the authenticated user has access to this bucket. +* Python 3.10 or later installed. +* +## Okta Configuration + +Before running the sample, you need to configure an Okta application for Machine-to-Machine (M2M) communication. + +### Create an M2M Application in Okta + +1. Log in to your Okta developer console. +2. Navigate to **Applications** > **Applications** and click **Create App Integration**. +3. Select **API Services** as the sign-on method and click **Next**. +4. Give your application a name and click **Save**. + +### Obtain Okta Credentials + +Once the application is created, you will find the following information in the **General** tab: + +* **Okta Domain**: Your Okta developer domain (e.g., `https://dev-123456.okta.com`). +* **Client ID**: The client ID for your application. +* **Client Secret**: The client secret for your application. + +You will need these values to configure the sample. + +## Google Cloud Configuration + +You need to configure a Workload Identity Pool in Google Cloud to trust the Okta application. + +### Set up Workload Identity Federation + +1. In the Google Cloud Console, navigate to **IAM & Admin** > **Workload Identity Federation**. +2. Click **Create Pool** to create a new Workload Identity Pool. +3. Add a new **OIDC provider** to the pool. +4. Configure the provider with your Okta domain as the issuer URL. +5. Map the Okta `sub` (subject) assertion to a GCP principal. + +For detailed instructions, refer to the [Workload Identity Federation documentation](https://cloud.google.com/iam/docs/workload-identity-federation). + +## 3. Running the Script + +To run the sample on your local system, you need to install the dependencies and configure your credentials. + +### Install Dependencies + +```bash +pip install -r requirements.txt +``` + +### Configure Credentials + +1. Copy the example secrets file to a new file named `custom-credentials-okta-secrets.json`: + ```bash + cp custom-credentials-okta-secrets.json.example custom-credentials-okta-secrets.json + ``` +2. Open `custom-credentials-okta-secrets.json` and fill in the following values: + + * `okta_domain`: Your Okta developer domain (for example `https://dev-123456.okta.com`). + * `okta_client_id`: The client ID for your application. + * `okta_client_secret`: The client secret for your application. + * `gcp_workload_audience`: The audience for the Google Cloud Workload Identity Pool. This is the full identifier of the Workload Identity Pool provider. + * `gcs_bucket_name`: The name of the Google Cloud Storage bucket to access. + * `gcp_service_account_impersonation_url`: (Optional) The URL for service account impersonation. + + +### Run the Application + +```bash +python3 snippets.py +``` + +The script authenticates with Okta to get an OIDC token, exchanges that token for a Google Cloud federated token, and uses it to list metadata for the specified Google Cloud Storage bucket. + +## Testing + +This sample is not continuously tested. It is provided for instructional purposes and may require modifications to work in your environment. diff --git a/auth/custom-credentials/okta/custom-credentials-okta-secrets.json.example b/auth/custom-credentials/okta/custom-credentials-okta-secrets.json.example new file mode 100644 index 00000000000..fa04fda7cb2 --- /dev/null +++ b/auth/custom-credentials/okta/custom-credentials-okta-secrets.json.example @@ -0,0 +1,8 @@ +{ + "okta_domain": "https://your-okta-domain.okta.com/", + "okta_client_id": "your-okta-client-id", + "okta_client_secret": "your-okta-client-secret", + "gcp_workload_audience": "//iam.googleapis.com/projects/123456789/locations/global/workloadIdentityPools/my-pool/providers/my-provider", + "gcs_bucket_name": "your-gcs-bucket-name", + "gcp_service_account_impersonation_url": "https://iamcredentials.googleapis.com/v1/projects/-/serviceAccounts/my-service-account@my-project.iam.gserviceaccount.com:generateAccessToken" +} diff --git a/auth/custom-credentials/okta/noxfile_config.py b/auth/custom-credentials/okta/noxfile_config.py new file mode 100644 index 00000000000..0ed973689f7 --- /dev/null +++ b/auth/custom-credentials/okta/noxfile_config.py @@ -0,0 +1,17 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +TEST_CONFIG_OVERRIDE = { + "ignored_versions": ["2.7", "3.6", "3.7", "3.8", "3.9", "3.10", "3.11", "3.12"], +} diff --git a/auth/custom-credentials/okta/requirements-test.txt b/auth/custom-credentials/okta/requirements-test.txt new file mode 100644 index 00000000000..f47609d2651 --- /dev/null +++ b/auth/custom-credentials/okta/requirements-test.txt @@ -0,0 +1,2 @@ +-r requirements.txt +pytest==7.1.2 diff --git a/auth/custom-credentials/okta/requirements.txt b/auth/custom-credentials/okta/requirements.txt new file mode 100644 index 00000000000..d9669ebee9f --- /dev/null +++ b/auth/custom-credentials/okta/requirements.txt @@ -0,0 +1,4 @@ +requests==2.32.3 +google-cloud-storage==2.19.0 +google-auth==2.43.0 +python-dotenv==1.1.1 diff --git a/auth/custom-credentials/okta/snippets.py b/auth/custom-credentials/okta/snippets.py new file mode 100644 index 00000000000..02af2dadc93 --- /dev/null +++ b/auth/custom-credentials/okta/snippets.py @@ -0,0 +1,138 @@ +# Copyright 2025 Google LLC +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START auth_custom_credential_supplier_okta] +import json +import time +import urllib.parse + +from google.auth import identity_pool +from google.cloud import storage +import requests + + +class OktaClientCredentialsSupplier: + """A custom SubjectTokenSupplier that authenticates with Okta. + + This supplier uses the Client Credentials grant flow for machine-to-machine + (M2M) authentication with Okta. + """ + + def __init__(self, domain, client_id, client_secret): + self.okta_token_url = f"{domain.rstrip('/')}/oauth2/default/v1/token" + self.client_id = client_id + self.client_secret = client_secret + self.access_token = None + self.expiry_time = 0 + + def get_subject_token(self, context, request=None) -> str: + """Fetches a new token if the current one is expired or missing.""" + if self.access_token and time.time() < self.expiry_time - 60: + return self.access_token + self._fetch_okta_access_token() + return self.access_token + + def _fetch_okta_access_token(self): + """Performs the Client Credentials grant flow with Okta.""" + headers = { + "Content-Type": "application/x-www-form-urlencoded", + "Accept": "application/json", + } + data = { + "grant_type": "client_credentials", + "scope": "gcp.test.read", # Set scope as per Okta app config. + } + + response = requests.post( + self.okta_token_url, + headers=headers, + data=urllib.parse.urlencode(data), + auth=(self.client_id, self.client_secret), + ) + response.raise_for_status() + + token_data = response.json() + self.access_token = token_data["access_token"] + self.expiry_time = time.time() + token_data["expires_in"] + + +def authenticate_with_okta_credentials( + bucket_name, audience, domain, client_id, client_secret, impersonation_url=None +): + """Authenticates using the custom Okta supplier and gets bucket metadata. + + Returns: + dict: The bucket metadata response from the Google Cloud Storage API. + """ + + okta_supplier = OktaClientCredentialsSupplier(domain, client_id, client_secret) + + credentials = identity_pool.Credentials( + audience=audience, + subject_token_type="urn:ietf:params:oauth:token-type:jwt", + token_url="https://sts.googleapis.com/v1/token", + subject_token_supplier=okta_supplier, + default_scopes=["https://www.googleapis.com/auth/devstorage.read_only"], + service_account_impersonation_url=impersonation_url, + ) + + storage_client = storage.Client(credentials=credentials) + + bucket = storage_client.get_bucket(bucket_name) + + return bucket._properties + + +# [END auth_custom_credential_supplier_okta] + + +def main(): + try: + with open("custom-credentials-okta-secrets.json") as f: + secrets = json.load(f) + except FileNotFoundError: + print("Could not find custom-credentials-okta-secrets.json.") + return + + gcp_audience = secrets.get("gcp_workload_audience") + gcs_bucket_name = secrets.get("gcs_bucket_name") + sa_impersonation_url = secrets.get("gcp_service_account_impersonation_url") + + okta_domain = secrets.get("okta_domain") + okta_client_id = secrets.get("okta_client_id") + okta_client_secret = secrets.get("okta_client_secret") + + if not all( + [gcp_audience, gcs_bucket_name, okta_domain, okta_client_id, okta_client_secret] + ): + print("Missing required values in secrets.json.") + return + + try: + print(f"Retrieving metadata for bucket: {gcs_bucket_name}...") + metadata = authenticate_with_okta_credentials( + bucket_name=gcs_bucket_name, + audience=gcp_audience, + domain=okta_domain, + client_id=okta_client_id, + client_secret=okta_client_secret, + impersonation_url=sa_impersonation_url, + ) + print("--- SUCCESS! ---") + print(json.dumps(metadata, indent=2)) + except Exception as e: + print(f"Authentication or Request failed: {e}") + + +if __name__ == "__main__": + main() diff --git a/auth/custom-credentials/okta/snippets_test.py b/auth/custom-credentials/okta/snippets_test.py new file mode 100644 index 00000000000..1f05c4ad7bf --- /dev/null +++ b/auth/custom-credentials/okta/snippets_test.py @@ -0,0 +1,134 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import time +from unittest import mock +import urllib.parse + +import pytest + +import snippets + +# --- Unit Tests --- + + +def test_init_url_cleaning(): + """Test that the token URL strips trailing slashes.""" + s1 = snippets.OktaClientCredentialsSupplier("https://okta.com/", "id", "sec") + assert s1.okta_token_url == "https://okta.com/oauth2/default/v1/token" + + s2 = snippets.OktaClientCredentialsSupplier("https://okta.com/", "id", "sec") + assert s2.okta_token_url == "https://okta.com/oauth2/default/v1/token" + + +@mock.patch("requests.post") +def test_get_subject_token_fetch(mock_post): + """Test fetching a new token from Okta.""" + supplier = snippets.OktaClientCredentialsSupplier("https://okta.com/", "id", "sec") + + mock_response = mock.MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"access_token": "new-token", "expires_in": 3600} + mock_post.return_value = mock_response + + token = supplier.get_subject_token(None, None) + + assert token == "new-token" + mock_post.assert_called_once() + + # Verify args + _, kwargs = mock_post.call_args + assert kwargs["auth"] == ("id", "sec") + + sent_data = urllib.parse.parse_qs(kwargs["data"]) + assert sent_data["grant_type"][0] == "client_credentials" + + +@mock.patch("requests.post") +def test_get_subject_token_cached(mock_post): + """Test that cached token is returned if valid.""" + supplier = snippets.OktaClientCredentialsSupplier("https://okta.com/", "id", "sec") + supplier.access_token = "cached-token" + supplier.expiry_time = time.time() + 3600 + + token = supplier.get_subject_token(None, None) + + assert token == "cached-token" + mock_post.assert_not_called() + + +@mock.patch("snippets.auth_requests.AuthorizedSession") +@mock.patch("snippets.identity_pool.Credentials") +@mock.patch("snippets.OktaClientCredentialsSupplier") +def test_authenticate_unit_success(MockSupplier, MockCreds, MockSession): + """Unit test for the main Okta auth flow.""" + mock_response = mock.MagicMock() + mock_response.status_code = 200 + mock_response.json.return_value = {"name": "test-bucket"} + + mock_session_instance = MockSession.return_value + mock_session_instance.get.return_value = mock_response + + metadata = snippets.authenticate_with_okta_credentials( + bucket_name="test-bucket", + audience="test-aud", + domain="https://okta.com/", + client_id="id", + client_secret="sec", + impersonation_url=None, + ) + + assert metadata == {"name": "test-bucket"} + MockSupplier.assert_called_once() + MockCreds.assert_called_once() + + +# --- System Test --- + + +def test_authenticate_system(): + """ + System test that runs against the real API. + Skips automatically if custom-credentials-okta-secrets.json is missing or incomplete. + """ + if not os.path.exists("custom-credentials-okta-secrets.json"): + pytest.skip( + "Skipping system test: custom-credentials-okta-secrets.json not found." + ) + + with open("custom-credentials-okta-secrets.json", "r") as f: + secrets = json.load(f) + + required_keys = [ + "gcp_workload_audience", + "gcs_bucket_name", + "okta_domain", + "okta_client_id", + "okta_client_secret", + ] + if not all(key in secrets for key in required_keys): + pytest.skip( + "Skipping system test: custom-credentials-okta-secrets.json is missing required keys." + ) + + # The main() function handles the auth flow and printing. + # We mock the print function to verify the output. + with mock.patch("builtins.print") as mock_print: + snippets.main() + + # Check for the success message in the print output. + output = "\n".join([call.args[0] for call in mock_print.call_args_list]) + assert "--- SUCCESS! ---" in output diff --git a/bigquery_storage/__init__.py b/bigquery_storage/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/bigquery_storage/conftest.py b/bigquery_storage/conftest.py new file mode 100644 index 00000000000..63d53531471 --- /dev/null +++ b/bigquery_storage/conftest.py @@ -0,0 +1,46 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import os +import random +from typing import Generator + +from google.cloud import bigquery + +import pytest + + +@pytest.fixture(scope="session") +def project_id() -> str: + return os.environ["GOOGLE_CLOUD_PROJECT"] + + +@pytest.fixture(scope="session") +def dataset(project_id: str) -> Generator[bigquery.Dataset, None, None]: + client = bigquery.Client() + + # Add a random suffix to dataset name to avoid conflict, because we run + # a samples test on each supported Python version almost at the same time. + dataset_time = datetime.datetime.now().strftime("%y%m%d_%H%M%S") + suffix = f"_{(random.randint(0, 99)):02d}" + dataset_name = "samples_tests_" + dataset_time + suffix + + dataset_id = "{}.{}".format(project_id, dataset_name) + dataset = bigquery.Dataset(dataset_id) + dataset.location = "us-east7" + created_dataset = client.create_dataset(dataset) + yield created_dataset + + client.delete_dataset(created_dataset, delete_contents=True) diff --git a/.github/flakybot.yaml b/bigquery_storage/pyarrow/__init__.py similarity index 82% rename from .github/flakybot.yaml rename to bigquery_storage/pyarrow/__init__.py index 55543bcd50c..a2a70562f48 100644 --- a/.github/flakybot.yaml +++ b/bigquery_storage/pyarrow/__init__.py @@ -1,15 +1,15 @@ -# Copyright 2023 Google LLC +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# https://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -issuePriority: p2 \ No newline at end of file diff --git a/bigquery_storage/pyarrow/append_rows_with_arrow.py b/bigquery_storage/pyarrow/append_rows_with_arrow.py new file mode 100644 index 00000000000..78cb0a57573 --- /dev/null +++ b/bigquery_storage/pyarrow/append_rows_with_arrow.py @@ -0,0 +1,224 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from concurrent.futures import Future +import datetime +import decimal +from typing import Iterable + +from google.cloud import bigquery +from google.cloud import bigquery_storage_v1 +from google.cloud.bigquery import enums +from google.cloud.bigquery_storage_v1 import types as gapic_types +from google.cloud.bigquery_storage_v1.writer import AppendRowsStream +import pandas as pd +import pyarrow as pa + + +TABLE_LENGTH = 100_000 + +BQ_SCHEMA = [ + bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN), + bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64), + bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64), + bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC), + bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC), + bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING), + bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES), + bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE), + bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME), + bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME), + bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP), + bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY), + bigquery.SchemaField( + "range_date_col", enums.SqlTypeNames.RANGE, range_element_type="DATE" + ), + bigquery.SchemaField( + "range_datetime_col", + enums.SqlTypeNames.RANGE, + range_element_type="DATETIME", + ), + bigquery.SchemaField( + "range_timestamp_col", + enums.SqlTypeNames.RANGE, + range_element_type="TIMESTAMP", + ), +] + +PYARROW_SCHEMA = pa.schema( + [ + pa.field("bool_col", pa.bool_()), + pa.field("int64_col", pa.int64()), + pa.field("float64_col", pa.float64()), + pa.field("numeric_col", pa.decimal128(38, scale=9)), + pa.field("bignumeric_col", pa.decimal256(76, scale=38)), + pa.field("string_col", pa.string()), + pa.field("bytes_col", pa.binary()), + pa.field("date_col", pa.date32()), + pa.field("datetime_col", pa.timestamp("us")), + pa.field("time_col", pa.time64("us")), + pa.field("timestamp_col", pa.timestamp("us")), + pa.field("geography_col", pa.string()), + pa.field( + "range_date_col", + pa.struct([("start", pa.date32()), ("end", pa.date32())]), + ), + pa.field( + "range_datetime_col", + pa.struct([("start", pa.timestamp("us")), ("end", pa.timestamp("us"))]), + ), + pa.field( + "range_timestamp_col", + pa.struct([("start", pa.timestamp("us")), ("end", pa.timestamp("us"))]), + ), + ] +) + + +def bqstorage_write_client() -> bigquery_storage_v1.BigQueryWriteClient: + return bigquery_storage_v1.BigQueryWriteClient() + + +def make_table(project_id: str, dataset_id: str, bq_client: bigquery.Client) -> bigquery.Table: + table_id = "append_rows_w_arrow_test" + table_id_full = f"{project_id}.{dataset_id}.{table_id}" + bq_table = bigquery.Table(table_id_full, schema=BQ_SCHEMA) + created_table = bq_client.create_table(bq_table) + + return created_table + + +def create_stream(bqstorage_write_client: bigquery_storage_v1.BigQueryWriteClient, table: bigquery.Table) -> AppendRowsStream: + stream_name = f"projects/{table.project}/datasets/{table.dataset_id}/tables/{table.table_id}/_default" + request_template = gapic_types.AppendRowsRequest() + request_template.write_stream = stream_name + + # Add schema to the template. + arrow_data = gapic_types.AppendRowsRequest.ArrowData() + arrow_data.writer_schema.serialized_schema = PYARROW_SCHEMA.serialize().to_pybytes() + request_template.arrow_rows = arrow_data + + append_rows_stream = AppendRowsStream( + bqstorage_write_client, + request_template, + ) + return append_rows_stream + + +def generate_pyarrow_table(num_rows: int = TABLE_LENGTH) -> pa.Table: + date_1 = datetime.date(2020, 10, 1) + date_2 = datetime.date(2021, 10, 1) + + datetime_1 = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456) + datetime_2 = datetime.datetime(2017, 12, 3, 14, 11, 27, 123456) + + timestamp_1 = datetime.datetime( + 1999, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ) + timestamp_2 = datetime.datetime( + 2000, 12, 31, 23, 59, 59, 999999, tzinfo=datetime.timezone.utc + ) + + # Pandas Dataframe. + rows = [] + for i in range(num_rows): + row = { + "bool_col": True, + "int64_col": i, + "float64_col": float(i), + "numeric_col": decimal.Decimal("0.000000001"), + "bignumeric_col": decimal.Decimal("0.1234567891"), + "string_col": "data as string", + "bytes_col": str.encode("data in bytes"), + "date_col": datetime.date(2019, 5, 10), + "datetime_col": datetime_1, + "time_col": datetime.time(23, 59, 59, 999999), + "timestamp_col": timestamp_1, + "geography_col": "POINT(-121 41)", + "range_date_col": {"start": date_1, "end": date_2}, + "range_datetime_col": {"start": datetime_1, "end": datetime_2}, + "range_timestamp_col": {"start": timestamp_1, "end": timestamp_2}, + } + rows.append(row) + df = pd.DataFrame(rows) + + # Dataframe to PyArrow Table. + table = pa.Table.from_pandas(df, schema=PYARROW_SCHEMA) + + return table + + +def generate_write_requests( + pyarrow_table: pa.Table, +) -> Iterable[gapic_types.AppendRowsRequest]: + # Determine max_chunksize of the record batches. Because max size of + # AppendRowsRequest is 10 MB, we need to split the table if it's too big. + # See: https://cloud.google.com/bigquery/docs/reference/storage/rpc/google.cloud.bigquery.storage.v1#appendrowsrequest + max_request_bytes = 10 * 2**20 # 10 MB + chunk_num = int(pyarrow_table.nbytes / max_request_bytes) + 1 + chunk_size = int(pyarrow_table.num_rows / chunk_num) + + # Construct request(s). + for batch in pyarrow_table.to_batches(max_chunksize=chunk_size): + request = gapic_types.AppendRowsRequest() + request.arrow_rows.rows.serialized_record_batch = batch.serialize().to_pybytes() + yield request + + +def verify_result( + client: bigquery.Client, table: bigquery.Table, futures: "list[Future]" +) -> None: + bq_table = client.get_table(table) + + # Verify table schema. + assert bq_table.schema == BQ_SCHEMA + + # Verify table size. + query = client.query(f"SELECT COUNT(1) FROM `{bq_table}`;") + query_result = query.result().to_dataframe() + + # There might be extra rows due to retries. + assert query_result.iloc[0, 0] >= TABLE_LENGTH + + # Verify that table was split into multiple requests. + assert len(futures) == 2 + + +def main(project_id: str, dataset: bigquery.Dataset) -> None: + # Initialize clients. + write_client = bqstorage_write_client() + bq_client = bigquery.Client() + + # Create BigQuery table. + bq_table = make_table(project_id, dataset.dataset_id, bq_client) + + # Generate local PyArrow table. + pa_table = generate_pyarrow_table() + + # Convert PyArrow table to Protobuf requests. + requests = generate_write_requests(pa_table) + + # Create writing stream to the BigQuery table. + stream = create_stream(write_client, bq_table) + + # Send requests. + futures = [] + for request in requests: + future = stream.send(request) + futures.append(future) + future.result() # Optional, will block until writing is complete. + + # Verify results. + verify_result(bq_client, bq_table, futures) diff --git a/bigquery_storage/pyarrow/append_rows_with_arrow_test.py b/bigquery_storage/pyarrow/append_rows_with_arrow_test.py new file mode 100644 index 00000000000..f31de43b51f --- /dev/null +++ b/bigquery_storage/pyarrow/append_rows_with_arrow_test.py @@ -0,0 +1,21 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.cloud import bigquery + +from . import append_rows_with_arrow + + +def test_append_rows_with_arrow(project_id: str, dataset: bigquery.Dataset) -> None: + append_rows_with_arrow.main(project_id, dataset) diff --git a/bigquery_storage/pyarrow/noxfile_config.py b/bigquery_storage/pyarrow/noxfile_config.py new file mode 100644 index 00000000000..29edb31ffe8 --- /dev/null +++ b/bigquery_storage/pyarrow/noxfile_config.py @@ -0,0 +1,42 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You maye obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be imported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": True, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/bigquery_storage/pyarrow/requirements-test.txt b/bigquery_storage/pyarrow/requirements-test.txt new file mode 100644 index 00000000000..7561ed55ce2 --- /dev/null +++ b/bigquery_storage/pyarrow/requirements-test.txt @@ -0,0 +1,3 @@ +pytest===7.4.3; python_version == '3.7' +pytest===8.3.5; python_version == '3.8' +pytest==8.4.1; python_version >= '3.9' diff --git a/bigquery_storage/pyarrow/requirements.txt b/bigquery_storage/pyarrow/requirements.txt new file mode 100644 index 00000000000..a593373b829 --- /dev/null +++ b/bigquery_storage/pyarrow/requirements.txt @@ -0,0 +1,5 @@ +db_dtypes +google-cloud-bigquery +google-cloud-bigquery-storage +pandas +pyarrow diff --git a/bigquery_storage/quickstart/__init__.py b/bigquery_storage/quickstart/__init__.py new file mode 100644 index 00000000000..a2a70562f48 --- /dev/null +++ b/bigquery_storage/quickstart/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/bigquery_storage/quickstart/noxfile_config.py b/bigquery_storage/quickstart/noxfile_config.py new file mode 100644 index 00000000000..f1fa9e5618b --- /dev/null +++ b/bigquery_storage/quickstart/noxfile_config.py @@ -0,0 +1,42 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be imported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": True, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/bigquery_storage/quickstart/quickstart.py b/bigquery_storage/quickstart/quickstart.py new file mode 100644 index 00000000000..6f120ce9a58 --- /dev/null +++ b/bigquery_storage/quickstart/quickstart.py @@ -0,0 +1,95 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +def main(project_id: str = "your-project-id", snapshot_millis: int = 0) -> None: + # [START bigquerystorage_quickstart] + from google.cloud.bigquery_storage import BigQueryReadClient, types + + # TODO(developer): Set the project_id variable. + # project_id = 'your-project-id' + # + # The read session is created in this project. This project can be + # different from that which contains the table. + + client = BigQueryReadClient() + + # This example reads baby name data from the public datasets. + table = "projects/{}/datasets/{}/tables/{}".format( + "bigquery-public-data", "usa_names", "usa_1910_current" + ) + + requested_session = types.ReadSession() + requested_session.table = table + # This API can also deliver data serialized in Apache Arrow format. + # This example leverages Apache Avro. + requested_session.data_format = types.DataFormat.AVRO + + # We limit the output columns to a subset of those allowed in the table, + # and set a simple filter to only report names from the state of + # Washington (WA). + requested_session.read_options.selected_fields = ["name", "number", "state"] + requested_session.read_options.row_restriction = 'state = "WA"' + + # Set a snapshot time if it's been specified. + if snapshot_millis > 0: + snapshot_time = types.Timestamp() + snapshot_time.FromMilliseconds(snapshot_millis) + requested_session.table_modifiers.snapshot_time = snapshot_time + + parent = "projects/{}".format(project_id) + session = client.create_read_session( + parent=parent, + read_session=requested_session, + # We'll use only a single stream for reading data from the table. However, + # if you wanted to fan out multiple readers you could do so by having a + # reader process each individual stream. + max_stream_count=1, + ) + reader = client.read_rows(session.streams[0].name) + + # The read stream contains blocks of Avro-encoded bytes. The rows() method + # uses the fastavro library to parse these blocks as an iterable of Python + # dictionaries. Install fastavro with the following command: + # + # pip install google-cloud-bigquery-storage[fastavro] + rows = reader.rows(session) + + # Do any local processing by iterating over the rows. The + # google-cloud-bigquery-storage client reconnects to the API after any + # transient network errors or timeouts. + names = set() + states = set() + + # fastavro returns EOFError instead of StopIterationError starting v1.8.4. + # See https://github.com/googleapis/python-bigquery-storage/pull/687 + try: + for row in rows: + names.add(row["name"]) + states.add(row["state"]) + except EOFError: + pass + + print("Got {} unique names in states: {}".format(len(names), ", ".join(states))) + # [END bigquerystorage_quickstart] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("project_id") + parser.add_argument("--snapshot_millis", default=0, type=int) + args = parser.parse_args() + main(project_id=args.project_id, snapshot_millis=args.snapshot_millis) diff --git a/bigquery_storage/quickstart/quickstart_test.py b/bigquery_storage/quickstart/quickstart_test.py new file mode 100644 index 00000000000..3380c923847 --- /dev/null +++ b/bigquery_storage/quickstart/quickstart_test.py @@ -0,0 +1,40 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime + +import pytest + +from . import quickstart + + +def now_millis() -> int: + return int( + (datetime.datetime.utcnow() - datetime.datetime(1970, 1, 1)).total_seconds() + * 1000 + ) + + +def test_quickstart_wo_snapshot(capsys: pytest.CaptureFixture, project_id: str) -> None: + quickstart.main(project_id) + out, _ = capsys.readouterr() + assert "unique names in states: WA" in out + + +def test_quickstart_with_snapshot( + capsys: pytest.CaptureFixture, project_id: str +) -> None: + quickstart.main(project_id, now_millis() - 5000) + out, _ = capsys.readouterr() + assert "unique names in states: WA" in out diff --git a/bigquery_storage/quickstart/requirements-test.txt b/bigquery_storage/quickstart/requirements-test.txt new file mode 100644 index 00000000000..7561ed55ce2 --- /dev/null +++ b/bigquery_storage/quickstart/requirements-test.txt @@ -0,0 +1,3 @@ +pytest===7.4.3; python_version == '3.7' +pytest===8.3.5; python_version == '3.8' +pytest==8.4.1; python_version >= '3.9' diff --git a/bigquery_storage/quickstart/requirements.txt b/bigquery_storage/quickstart/requirements.txt new file mode 100644 index 00000000000..9d69822935d --- /dev/null +++ b/bigquery_storage/quickstart/requirements.txt @@ -0,0 +1,3 @@ +fastavro +google-cloud-bigquery +google-cloud-bigquery-storage==2.32.0 diff --git a/bigquery_storage/snippets/__init__.py b/bigquery_storage/snippets/__init__.py new file mode 100644 index 00000000000..0098709d195 --- /dev/null +++ b/bigquery_storage/snippets/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +# +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/bigquery_storage/snippets/append_rows_pending.py b/bigquery_storage/snippets/append_rows_pending.py new file mode 100644 index 00000000000..3c34b472cde --- /dev/null +++ b/bigquery_storage/snippets/append_rows_pending.py @@ -0,0 +1,132 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START bigquerystorage_append_rows_pending] +""" +This code sample demonstrates how to write records in pending mode +using the low-level generated client for Python. +""" + +from google.cloud import bigquery_storage_v1 +from google.cloud.bigquery_storage_v1 import types, writer +from google.protobuf import descriptor_pb2 + +# If you update the customer_record.proto protocol buffer definition, run: +# +# protoc --python_out=. customer_record.proto +# +# from the samples/snippets directory to generate the customer_record_pb2.py module. +from . import customer_record_pb2 + + +def create_row_data(row_num: int, name: str) -> bytes: + row = customer_record_pb2.CustomerRecord() + row.row_num = row_num + row.customer_name = name + return row.SerializeToString() + + +def append_rows_pending(project_id: str, dataset_id: str, table_id: str) -> None: + """Create a write stream, write some sample data, and commit the stream.""" + write_client = bigquery_storage_v1.BigQueryWriteClient() + parent = write_client.table_path(project_id, dataset_id, table_id) + write_stream = types.WriteStream() + + # When creating the stream, choose the type. Use the PENDING type to wait + # until the stream is committed before it is visible. See: + # https://cloud.google.com/bigquery/docs/reference/storage/rpc/google.cloud.bigquery.storage.v1#google.cloud.bigquery.storage.v1.WriteStream.Type + write_stream.type_ = types.WriteStream.Type.PENDING + write_stream = write_client.create_write_stream( + parent=parent, write_stream=write_stream + ) + stream_name = write_stream.name + + # Create a template with fields needed for the first request. + request_template = types.AppendRowsRequest() + + # The initial request must contain the stream name. + request_template.write_stream = stream_name + + # So that BigQuery knows how to parse the serialized_rows, generate a + # protocol buffer representation of your message descriptor. + proto_schema = types.ProtoSchema() + proto_descriptor = descriptor_pb2.DescriptorProto() + customer_record_pb2.CustomerRecord.DESCRIPTOR.CopyToProto(proto_descriptor) + proto_schema.proto_descriptor = proto_descriptor + proto_data = types.AppendRowsRequest.ProtoData() + proto_data.writer_schema = proto_schema + request_template.proto_rows = proto_data + + # Some stream types support an unbounded number of requests. Construct an + # AppendRowsStream to send an arbitrary number of requests to a stream. + append_rows_stream = writer.AppendRowsStream(write_client, request_template) + + # Create a batch of row data by appending proto2 serialized bytes to the + # serialized_rows repeated field. + proto_rows = types.ProtoRows() + proto_rows.serialized_rows.append(create_row_data(1, "Alice")) + proto_rows.serialized_rows.append(create_row_data(2, "Bob")) + + # Set an offset to allow resuming this stream if the connection breaks. + # Keep track of which requests the server has acknowledged and resume the + # stream at the first non-acknowledged message. If the server has already + # processed a message with that offset, it will return an ALREADY_EXISTS + # error, which can be safely ignored. + # + # The first request must always have an offset of 0. + request = types.AppendRowsRequest() + request.offset = 0 + proto_data = types.AppendRowsRequest.ProtoData() + proto_data.rows = proto_rows + request.proto_rows = proto_data + + response_future_1 = append_rows_stream.send(request) + + # Send another batch. + proto_rows = types.ProtoRows() + proto_rows.serialized_rows.append(create_row_data(3, "Charles")) + + # Since this is the second request, you only need to include the row data. + # The name of the stream and protocol buffers DESCRIPTOR is only needed in + # the first request. + request = types.AppendRowsRequest() + proto_data = types.AppendRowsRequest.ProtoData() + proto_data.rows = proto_rows + request.proto_rows = proto_data + + # Offset must equal the number of rows that were previously sent. + request.offset = 2 + + response_future_2 = append_rows_stream.send(request) + + print(response_future_1.result()) + print(response_future_2.result()) + + # Shutdown background threads and close the streaming connection. + append_rows_stream.close() + + # A PENDING type stream must be "finalized" before being committed. No new + # records can be written to the stream after this method has been called. + write_client.finalize_write_stream(name=write_stream.name) + + # Commit the stream you created earlier. + batch_commit_write_streams_request = types.BatchCommitWriteStreamsRequest() + batch_commit_write_streams_request.parent = parent + batch_commit_write_streams_request.write_streams = [write_stream.name] + write_client.batch_commit_write_streams(batch_commit_write_streams_request) + + print(f"Writes to stream: '{write_stream.name}' have been committed.") + + +# [END bigquerystorage_append_rows_pending] diff --git a/bigquery_storage/snippets/append_rows_pending_test.py b/bigquery_storage/snippets/append_rows_pending_test.py new file mode 100644 index 00000000000..791e9609779 --- /dev/null +++ b/bigquery_storage/snippets/append_rows_pending_test.py @@ -0,0 +1,72 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pathlib +import random + +from google.cloud import bigquery +import pytest + +from . import append_rows_pending + +DIR = pathlib.Path(__file__).parent + + +regions = ["US", "non-US"] + + +@pytest.fixture(params=regions) +def sample_data_table( + request: pytest.FixtureRequest, + bigquery_client: bigquery.Client, + project_id: str, + dataset_id: str, + dataset_id_non_us: str, +) -> str: + dataset = dataset_id + if request.param != "US": + dataset = dataset_id_non_us + schema = bigquery_client.schema_from_json(str(DIR / "customer_record_schema.json")) + table_id = f"append_rows_proto2_{random.randrange(10000)}" + full_table_id = f"{project_id}.{dataset}.{table_id}" + table = bigquery.Table(full_table_id, schema=schema) + table = bigquery_client.create_table(table, exists_ok=True) + yield full_table_id + bigquery_client.delete_table(table, not_found_ok=True) + + +def test_append_rows_pending( + capsys: pytest.CaptureFixture, + bigquery_client: bigquery.Client, + sample_data_table: str, +) -> None: + project_id, dataset_id, table_id = sample_data_table.split(".") + append_rows_pending.append_rows_pending( + project_id=project_id, dataset_id=dataset_id, table_id=table_id + ) + out, _ = capsys.readouterr() + assert "have been committed" in out + + rows = bigquery_client.query( + f"SELECT * FROM `{project_id}.{dataset_id}.{table_id}`" + ).result() + row_items = [ + # Convert to sorted tuple of items to more easily search for expected rows. + tuple(sorted(row.items())) + for row in rows + ] + + assert (("customer_name", "Alice"), ("row_num", 1)) in row_items + assert (("customer_name", "Bob"), ("row_num", 2)) in row_items + assert (("customer_name", "Charles"), ("row_num", 3)) in row_items diff --git a/bigquery_storage/snippets/append_rows_proto2.py b/bigquery_storage/snippets/append_rows_proto2.py new file mode 100644 index 00000000000..d610b31faa2 --- /dev/null +++ b/bigquery_storage/snippets/append_rows_proto2.py @@ -0,0 +1,256 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START bigquerystorage_append_rows_raw_proto2] +""" +This code sample demonstrates using the low-level generated client for Python. +""" + +import datetime +import decimal + +from google.cloud import bigquery_storage_v1 +from google.cloud.bigquery_storage_v1 import types, writer +from google.protobuf import descriptor_pb2 + +# If you make updates to the sample_data.proto protocol buffers definition, +# run: +# +# protoc --python_out=. sample_data.proto +# +# from the samples/snippets directory to generate the sample_data_pb2 module. +from . import sample_data_pb2 + + +def append_rows_proto2(project_id: str, dataset_id: str, table_id: str) -> None: + """Create a write stream, write some sample data, and commit the stream.""" + write_client = bigquery_storage_v1.BigQueryWriteClient() + parent = write_client.table_path(project_id, dataset_id, table_id) + write_stream = types.WriteStream() + + # When creating the stream, choose the type. Use the PENDING type to wait + # until the stream is committed before it is visible. See: + # https://cloud.google.com/bigquery/docs/reference/storage/rpc/google.cloud.bigquery.storage.v1#google.cloud.bigquery.storage.v1.WriteStream.Type + write_stream.type_ = types.WriteStream.Type.PENDING + write_stream = write_client.create_write_stream( + parent=parent, write_stream=write_stream + ) + stream_name = write_stream.name + + # Create a template with fields needed for the first request. + request_template = types.AppendRowsRequest() + + # The initial request must contain the stream name. + request_template.write_stream = stream_name + + # So that BigQuery knows how to parse the serialized_rows, generate a + # protocol buffer representation of your message descriptor. + proto_schema = types.ProtoSchema() + proto_descriptor = descriptor_pb2.DescriptorProto() + sample_data_pb2.SampleData.DESCRIPTOR.CopyToProto(proto_descriptor) + proto_schema.proto_descriptor = proto_descriptor + proto_data = types.AppendRowsRequest.ProtoData() + proto_data.writer_schema = proto_schema + request_template.proto_rows = proto_data + + # Some stream types support an unbounded number of requests. Construct an + # AppendRowsStream to send an arbitrary number of requests to a stream. + append_rows_stream = writer.AppendRowsStream(write_client, request_template) + + # Create a batch of row data by appending proto2 serialized bytes to the + # serialized_rows repeated field. + proto_rows = types.ProtoRows() + + row = sample_data_pb2.SampleData() + row.row_num = 1 + row.bool_col = True + row.bytes_col = b"Hello, World!" + row.float64_col = float("+inf") + row.int64_col = 123 + row.string_col = "Howdy!" + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 2 + row.bool_col = False + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 3 + row.bytes_col = b"See you later!" + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 4 + row.float64_col = 1000000.125 + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 5 + row.int64_col = 67000 + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 6 + row.string_col = "Auf Wiedersehen!" + proto_rows.serialized_rows.append(row.SerializeToString()) + + # Set an offset to allow resuming this stream if the connection breaks. + # Keep track of which requests the server has acknowledged and resume the + # stream at the first non-acknowledged message. If the server has already + # processed a message with that offset, it will return an ALREADY_EXISTS + # error, which can be safely ignored. + # + # The first request must always have an offset of 0. + request = types.AppendRowsRequest() + request.offset = 0 + proto_data = types.AppendRowsRequest.ProtoData() + proto_data.rows = proto_rows + request.proto_rows = proto_data + + response_future_1 = append_rows_stream.send(request) + + # Create a batch of rows containing scalar values that don't directly + # correspond to a protocol buffers scalar type. See the documentation for + # the expected data formats: + # https://cloud.google.com/bigquery/docs/write-api#data_type_conversions + proto_rows = types.ProtoRows() + + row = sample_data_pb2.SampleData() + row.row_num = 7 + date_value = datetime.date(2021, 8, 12) + epoch_value = datetime.date(1970, 1, 1) + delta = date_value - epoch_value + row.date_col = delta.days + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 8 + datetime_value = datetime.datetime(2021, 8, 12, 9, 46, 23, 987456) + row.datetime_col = datetime_value.strftime("%Y-%m-%d %H:%M:%S.%f") + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 9 + row.geography_col = "POINT(-122.347222 47.651111)" + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 10 + numeric_value = decimal.Decimal("1.23456789101112e+6") + row.numeric_col = str(numeric_value) + bignumeric_value = decimal.Decimal("-1.234567891011121314151617181920e+16") + row.bignumeric_col = str(bignumeric_value) + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 11 + time_value = datetime.time(11, 7, 48, 123456) + row.time_col = time_value.strftime("%H:%M:%S.%f") + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 12 + timestamp_value = datetime.datetime( + 2021, 8, 12, 16, 11, 22, 987654, tzinfo=datetime.timezone.utc + ) + epoch_value = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) + delta = timestamp_value - epoch_value + row.timestamp_col = int(delta.total_seconds()) * 1000000 + int(delta.microseconds) + proto_rows.serialized_rows.append(row.SerializeToString()) + + # Since this is the second request, you only need to include the row data. + # The name of the stream and protocol buffers DESCRIPTOR is only needed in + # the first request. + request = types.AppendRowsRequest() + proto_data = types.AppendRowsRequest.ProtoData() + proto_data.rows = proto_rows + request.proto_rows = proto_data + + # Offset must equal the number of rows that were previously sent. + request.offset = 6 + + response_future_2 = append_rows_stream.send(request) + + # Create a batch of rows with STRUCT and ARRAY BigQuery data types. In + # protocol buffers, these correspond to nested messages and repeated + # fields, respectively. + proto_rows = types.ProtoRows() + + row = sample_data_pb2.SampleData() + row.row_num = 13 + row.int64_list.append(1) + row.int64_list.append(2) + row.int64_list.append(3) + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 14 + row.struct_col.sub_int_col = 7 + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 15 + sub_message = sample_data_pb2.SampleData.SampleStruct() + sub_message.sub_int_col = -1 + row.struct_list.append(sub_message) + sub_message = sample_data_pb2.SampleData.SampleStruct() + sub_message.sub_int_col = -2 + row.struct_list.append(sub_message) + sub_message = sample_data_pb2.SampleData.SampleStruct() + sub_message.sub_int_col = -3 + row.struct_list.append(sub_message) + proto_rows.serialized_rows.append(row.SerializeToString()) + + row = sample_data_pb2.SampleData() + row.row_num = 16 + date_value = datetime.date(2021, 8, 8) + epoch_value = datetime.date(1970, 1, 1) + delta = date_value - epoch_value + row.range_date.start = delta.days + proto_rows.serialized_rows.append(row.SerializeToString()) + + request = types.AppendRowsRequest() + request.offset = 12 + proto_data = types.AppendRowsRequest.ProtoData() + proto_data.rows = proto_rows + request.proto_rows = proto_data + + # For each request sent, a message is expected in the responses iterable. + # This sample sends 3 requests, therefore expect exactly 3 responses. + response_future_3 = append_rows_stream.send(request) + + # All three requests are in-flight, wait for them to finish being processed + # before finalizing the stream. + print(response_future_1.result()) + print(response_future_2.result()) + print(response_future_3.result()) + + # Shutdown background threads and close the streaming connection. + append_rows_stream.close() + + # A PENDING type stream must be "finalized" before being committed. No new + # records can be written to the stream after this method has been called. + write_client.finalize_write_stream(name=write_stream.name) + + # Commit the stream you created earlier. + batch_commit_write_streams_request = types.BatchCommitWriteStreamsRequest() + batch_commit_write_streams_request.parent = parent + batch_commit_write_streams_request.write_streams = [write_stream.name] + write_client.batch_commit_write_streams(batch_commit_write_streams_request) + + print(f"Writes to stream: '{write_stream.name}' have been committed.") + + +# [END bigquerystorage_append_rows_raw_proto2] diff --git a/bigquery_storage/snippets/append_rows_proto2_test.py b/bigquery_storage/snippets/append_rows_proto2_test.py new file mode 100644 index 00000000000..15e5b9d9105 --- /dev/null +++ b/bigquery_storage/snippets/append_rows_proto2_test.py @@ -0,0 +1,128 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import decimal +import pathlib +import random + +from google.cloud import bigquery +import pytest + +from . import append_rows_proto2 + +DIR = pathlib.Path(__file__).parent + + +regions = ["US", "non-US"] + + +@pytest.fixture(params=regions) +def sample_data_table( + request: pytest.FixtureRequest, + bigquery_client: bigquery.Client, + project_id: str, + dataset_id: str, + dataset_id_non_us: str, +) -> str: + dataset = dataset_id + if request.param != "US": + dataset = dataset_id_non_us + schema = bigquery_client.schema_from_json(str(DIR / "sample_data_schema.json")) + table_id = f"append_rows_proto2_{random.randrange(10000)}" + full_table_id = f"{project_id}.{dataset}.{table_id}" + table = bigquery.Table(full_table_id, schema=schema) + table = bigquery_client.create_table(table, exists_ok=True) + yield full_table_id + bigquery_client.delete_table(table, not_found_ok=True) + + +def test_append_rows_proto2( + capsys: pytest.CaptureFixture, + bigquery_client: bigquery.Client, + sample_data_table: str, +) -> None: + project_id, dataset_id, table_id = sample_data_table.split(".") + append_rows_proto2.append_rows_proto2( + project_id=project_id, dataset_id=dataset_id, table_id=table_id + ) + out, _ = capsys.readouterr() + assert "have been committed" in out + + rows = bigquery_client.query( + f"SELECT * FROM `{project_id}.{dataset_id}.{table_id}`" + ).result() + row_items = [ + # Convert to sorted tuple of items, omitting NULL values, to make + # searching for expected rows easier. + tuple( + sorted( + item for item in row.items() if item[1] is not None and item[1] != [] + ) + ) + for row in rows + ] + + assert ( + ("bool_col", True), + ("bytes_col", b"Hello, World!"), + ("float64_col", float("+inf")), + ("int64_col", 123), + ("row_num", 1), + ("string_col", "Howdy!"), + ) in row_items + assert (("bool_col", False), ("row_num", 2)) in row_items + assert (("bytes_col", b"See you later!"), ("row_num", 3)) in row_items + assert (("float64_col", 1000000.125), ("row_num", 4)) in row_items + assert (("int64_col", 67000), ("row_num", 5)) in row_items + assert (("row_num", 6), ("string_col", "Auf Wiedersehen!")) in row_items + assert (("date_col", datetime.date(2021, 8, 12)), ("row_num", 7)) in row_items + assert ( + ("datetime_col", datetime.datetime(2021, 8, 12, 9, 46, 23, 987456)), + ("row_num", 8), + ) in row_items + assert ( + ("geography_col", "POINT(-122.347222 47.651111)"), + ("row_num", 9), + ) in row_items + assert ( + ("bignumeric_col", decimal.Decimal("-1.234567891011121314151617181920e+16")), + ("numeric_col", decimal.Decimal("1.23456789101112e+6")), + ("row_num", 10), + ) in row_items + assert ( + ("row_num", 11), + ("time_col", datetime.time(11, 7, 48, 123456)), + ) in row_items + assert ( + ("row_num", 12), + ( + "timestamp_col", + datetime.datetime( + 2021, 8, 12, 16, 11, 22, 987654, tzinfo=datetime.timezone.utc + ), + ), + ) in row_items + assert (("int64_list", [1, 2, 3]), ("row_num", 13)) in row_items + assert ( + ("row_num", 14), + ("struct_col", {"sub_int_col": 7}), + ) in row_items + assert ( + ("row_num", 15), + ( + "struct_list", + [{"sub_int_col": -1}, {"sub_int_col": -2}, {"sub_int_col": -3}], + ), + ) in row_items diff --git a/bigquery_storage/snippets/conftest.py b/bigquery_storage/snippets/conftest.py new file mode 100644 index 00000000000..5f1e958183c --- /dev/null +++ b/bigquery_storage/snippets/conftest.py @@ -0,0 +1,65 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Generator + +from google.cloud import bigquery +import pytest +import test_utils.prefixer + +prefixer = test_utils.prefixer.Prefixer("python-bigquery-storage", "samples/snippets") + + +@pytest.fixture(scope="session", autouse=True) +def cleanup_datasets(bigquery_client: bigquery.Client) -> None: + for dataset in bigquery_client.list_datasets(): + if prefixer.should_cleanup(dataset.dataset_id): + bigquery_client.delete_dataset( + dataset, delete_contents=True, not_found_ok=True + ) + + +@pytest.fixture(scope="session") +def bigquery_client() -> bigquery.Client: + return bigquery.Client() + + +@pytest.fixture(scope="session") +def project_id(bigquery_client: bigquery.Client) -> str: + return bigquery_client.project + + +@pytest.fixture(scope="session") +def dataset_id( + bigquery_client: bigquery.Client, project_id: str +) -> Generator[str, None, None]: + dataset_id = prefixer.create_prefix() + full_dataset_id = f"{project_id}.{dataset_id}" + dataset = bigquery.Dataset(full_dataset_id) + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) + + +@pytest.fixture(scope="session") +def dataset_id_non_us( + bigquery_client: bigquery.Client, project_id: str +) -> Generator[str, None, None]: + dataset_id = prefixer.create_prefix() + full_dataset_id = f"{project_id}.{dataset_id}" + dataset = bigquery.Dataset(full_dataset_id) + dataset.location = "asia-northeast1" + bigquery_client.create_dataset(dataset) + yield dataset_id + bigquery_client.delete_dataset(dataset, delete_contents=True, not_found_ok=True) diff --git a/bigquery_storage/snippets/customer_record.proto b/bigquery_storage/snippets/customer_record.proto new file mode 100644 index 00000000000..6c79336b6fa --- /dev/null +++ b/bigquery_storage/snippets/customer_record.proto @@ -0,0 +1,30 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// [START bigquerystorage_append_rows_pending_customer_record] +// The BigQuery Storage API expects protocol buffer data to be encoded in the +// proto2 wire format. This allows it to disambiguate missing optional fields +// from default values without the need for wrapper types. +syntax = "proto2"; + +// Define a message type representing the rows in your table. The message +// cannot contain fields which are not present in the table. +message CustomerRecord { + + optional string customer_name = 1; + + // Use the required keyword for client-side validation of required fields. + required int64 row_num = 2; +} +// [END bigquerystorage_append_rows_pending_customer_record] diff --git a/bigquery_storage/snippets/customer_record_pb2.py b/bigquery_storage/snippets/customer_record_pb2.py new file mode 100644 index 00000000000..457ead954d8 --- /dev/null +++ b/bigquery_storage/snippets/customer_record_pb2.py @@ -0,0 +1,51 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: customer_record.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import message as _message +from google.protobuf import reflection as _reflection +from google.protobuf import symbol_database as _symbol_database + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x15\x63ustomer_record.proto"8\n\x0e\x43ustomerRecord\x12\x15\n\rcustomer_name\x18\x01 \x01(\t\x12\x0f\n\x07row_num\x18\x02 \x02(\x03' +) + + +_CUSTOMERRECORD = DESCRIPTOR.message_types_by_name["CustomerRecord"] +CustomerRecord = _reflection.GeneratedProtocolMessageType( + "CustomerRecord", + (_message.Message,), + { + "DESCRIPTOR": _CUSTOMERRECORD, + "__module__": "customer_record_pb2" + # @@protoc_insertion_point(class_scope:CustomerRecord) + }, +) +_sym_db.RegisterMessage(CustomerRecord) + +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + _CUSTOMERRECORD._serialized_start = 25 + _CUSTOMERRECORD._serialized_end = 81 +# @@protoc_insertion_point(module_scope) diff --git a/bigquery_storage/snippets/customer_record_schema.json b/bigquery_storage/snippets/customer_record_schema.json new file mode 100644 index 00000000000..e04b31a7ead --- /dev/null +++ b/bigquery_storage/snippets/customer_record_schema.json @@ -0,0 +1,11 @@ +[ + { + "name": "customer_name", + "type": "STRING" + }, + { + "name": "row_num", + "type": "INTEGER", + "mode": "REQUIRED" + } +] diff --git a/bigquery_storage/snippets/noxfile_config.py b/bigquery_storage/snippets/noxfile_config.py new file mode 100644 index 00000000000..f1fa9e5618b --- /dev/null +++ b/bigquery_storage/snippets/noxfile_config.py @@ -0,0 +1,42 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be imported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": True, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/bigquery_storage/snippets/requirements-test.txt b/bigquery_storage/snippets/requirements-test.txt new file mode 100644 index 00000000000..230ca56dc3a --- /dev/null +++ b/bigquery_storage/snippets/requirements-test.txt @@ -0,0 +1,4 @@ +google-cloud-testutils==1.6.4 +pytest===7.4.3; python_version == '3.7' +pytest===8.3.5; python_version == '3.8' +pytest==8.4.1; python_version >= '3.9' diff --git a/bigquery_storage/snippets/requirements.txt b/bigquery_storage/snippets/requirements.txt new file mode 100644 index 00000000000..8a456493526 --- /dev/null +++ b/bigquery_storage/snippets/requirements.txt @@ -0,0 +1,6 @@ +google-cloud-bigquery-storage==2.32.0 +google-cloud-bigquery===3.30.0; python_version <= '3.8' +google-cloud-bigquery==3.35.1; python_version >= '3.9' +pytest===7.4.3; python_version == '3.7' +pytest===8.3.5; python_version == '3.8' +pytest==8.4.1; python_version >= '3.9' diff --git a/bigquery_storage/snippets/sample_data.proto b/bigquery_storage/snippets/sample_data.proto new file mode 100644 index 00000000000..6f0bb93a65c --- /dev/null +++ b/bigquery_storage/snippets/sample_data.proto @@ -0,0 +1,70 @@ +// Copyright 2021 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// [START bigquerystorage_append_rows_raw_proto2_definition] +// The BigQuery Storage API expects protocol buffer data to be encoded in the +// proto2 wire format. This allows it to disambiguate missing optional fields +// from default values without the need for wrapper types. +syntax = "proto2"; + +// Define a message type representing the rows in your table. The message +// cannot contain fields which are not present in the table. +message SampleData { + // Use a nested message to encode STRUCT column values. + // + // References to external messages are not allowed. Any message definitions + // must be nested within the root message representing row data. + message SampleStruct { + optional int64 sub_int_col = 1; + } + + message RangeValue { + optional int32 start = 1; + optional int32 end = 2; + } + + // The following types map directly between protocol buffers and their + // corresponding BigQuery data types. + optional bool bool_col = 1; + optional bytes bytes_col = 2; + optional double float64_col = 3; + optional int64 int64_col = 4; + optional string string_col = 5; + + // The following data types require some encoding to use. See the + // documentation for the expected data formats: + // https://cloud.google.com/bigquery/docs/write-api#data_type_conversion + optional int32 date_col = 6; + optional string datetime_col = 7; + optional string geography_col = 8; + optional string numeric_col = 9; + optional string bignumeric_col = 10; + optional string time_col = 11; + optional int64 timestamp_col = 12; + + // Use a repeated field to represent a BigQuery ARRAY value. + repeated int64 int64_list = 13; + + // Use a nested message to encode STRUCT and ARRAY values. + optional SampleStruct struct_col = 14; + repeated SampleStruct struct_list = 15; + + // Range types, see: + // https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type + optional RangeValue range_date = 16; + + // Use the required keyword for client-side validation of required fields. + required int64 row_num = 17; +} +// [END bigquerystorage_append_rows_raw_proto2_definition] diff --git a/bigquery_storage/snippets/sample_data_pb2.py b/bigquery_storage/snippets/sample_data_pb2.py new file mode 100644 index 00000000000..54ef06d99fa --- /dev/null +++ b/bigquery_storage/snippets/sample_data_pb2.py @@ -0,0 +1,43 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: sample_data.proto +"""Generated protocol buffer code.""" +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +from google.protobuf.internal import builder as _builder + +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( + b'\n\x11sample_data.proto"\xff\x03\n\nSampleData\x12\x10\n\x08\x62ool_col\x18\x01 \x01(\x08\x12\x11\n\tbytes_col\x18\x02 \x01(\x0c\x12\x13\n\x0b\x66loat64_col\x18\x03 \x01(\x01\x12\x11\n\tint64_col\x18\x04 \x01(\x03\x12\x12\n\nstring_col\x18\x05 \x01(\t\x12\x10\n\x08\x64\x61te_col\x18\x06 \x01(\x05\x12\x14\n\x0c\x64\x61tetime_col\x18\x07 \x01(\t\x12\x15\n\rgeography_col\x18\x08 \x01(\t\x12\x13\n\x0bnumeric_col\x18\t \x01(\t\x12\x16\n\x0e\x62ignumeric_col\x18\n \x01(\t\x12\x10\n\x08time_col\x18\x0b \x01(\t\x12\x15\n\rtimestamp_col\x18\x0c \x01(\x03\x12\x12\n\nint64_list\x18\r \x03(\x03\x12,\n\nstruct_col\x18\x0e \x01(\x0b\x32\x18.SampleData.SampleStruct\x12-\n\x0bstruct_list\x18\x0f \x03(\x0b\x32\x18.SampleData.SampleStruct\x12*\n\nrange_date\x18\x10 \x01(\x0b\x32\x16.SampleData.RangeValue\x12\x0f\n\x07row_num\x18\x11 \x02(\x03\x1a#\n\x0cSampleStruct\x12\x13\n\x0bsub_int_col\x18\x01 \x01(\x03\x1a(\n\nRangeValue\x12\r\n\x05start\x18\x01 \x01(\x05\x12\x0b\n\x03\x65nd\x18\x02 \x01(\x05' +) + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "sample_data_pb2", globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + DESCRIPTOR._options = None + _SAMPLEDATA._serialized_start = 22 + _SAMPLEDATA._serialized_end = 533 + _SAMPLEDATA_SAMPLESTRUCT._serialized_start = 456 + _SAMPLEDATA_SAMPLESTRUCT._serialized_end = 491 + _SAMPLEDATA_RANGEVALUE._serialized_start = 493 + _SAMPLEDATA_RANGEVALUE._serialized_end = 533 +# @@protoc_insertion_point(module_scope) diff --git a/bigquery_storage/snippets/sample_data_schema.json b/bigquery_storage/snippets/sample_data_schema.json new file mode 100644 index 00000000000..40efb7122b5 --- /dev/null +++ b/bigquery_storage/snippets/sample_data_schema.json @@ -0,0 +1,81 @@ + +[ + { + "name": "bool_col", + "type": "BOOLEAN" + }, + { + "name": "bytes_col", + "type": "BYTES" + }, + { + "name": "date_col", + "type": "DATE" + }, + { + "name": "datetime_col", + "type": "DATETIME" + }, + { + "name": "float64_col", + "type": "FLOAT" + }, + { + "name": "geography_col", + "type": "GEOGRAPHY" + }, + { + "name": "int64_col", + "type": "INTEGER" + }, + { + "name": "numeric_col", + "type": "NUMERIC" + }, + { + "name": "bignumeric_col", + "type": "BIGNUMERIC" + }, + { + "name": "row_num", + "type": "INTEGER", + "mode": "REQUIRED" + }, + { + "name": "string_col", + "type": "STRING" + }, + { + "name": "time_col", + "type": "TIME" + }, + { + "name": "timestamp_col", + "type": "TIMESTAMP" + }, + { + "name": "int64_list", + "type": "INTEGER", + "mode": "REPEATED" + }, + { + "name": "struct_col", + "type": "RECORD", + "fields": [ + {"name": "sub_int_col", "type": "INTEGER"} + ] + }, + { + "name": "struct_list", + "type": "RECORD", + "fields": [ + {"name": "sub_int_col", "type": "INTEGER"} + ], + "mode": "REPEATED" + }, + { + "name": "range_date", + "type": "RANGE", + "rangeElementType": {"type": "DATE"} + } + ] diff --git a/bigquery_storage/to_dataframe/__init__.py b/bigquery_storage/to_dataframe/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/bigquery_storage/to_dataframe/jupyter_test.py b/bigquery_storage/to_dataframe/jupyter_test.py new file mode 100644 index 00000000000..c2046b8c80e --- /dev/null +++ b/bigquery_storage/to_dataframe/jupyter_test.py @@ -0,0 +1,67 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import IPython +from IPython.terminal import interactiveshell +from IPython.testing import tools +import pytest + +# Ignore semicolon lint warning because semicolons are used in notebooks +# flake8: noqa E703 + + +@pytest.fixture(scope="session") +def ipython(): + config = tools.default_config() + config.TerminalInteractiveShell.simple_prompt = True + shell = interactiveshell.TerminalInteractiveShell.instance(config=config) + return shell + + +@pytest.fixture() +def ipython_interactive(request, ipython): + """Activate IPython's builtin hooks + + for the duration of the test scope. + """ + with ipython.builtin_trap: + yield ipython + + +def _strip_region_tags(sample_text): + """Remove blank lines and region tags from sample text""" + magic_lines = [ + line for line in sample_text.split("\n") if len(line) > 0 and "# [" not in line + ] + return "\n".join(magic_lines) + + +def test_jupyter_tutorial(ipython): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("google.cloud.bigquery") + + # This code sample intentionally queries a lot of data to demonstrate the + # speed-up of using the BigQuery Storage API to download the results. + sample = """ + # [START bigquerystorage_jupyter_tutorial_query_default] + %%bigquery tax_forms + SELECT * FROM `bigquery-public-data.irs_990.irs_990_2012` + # [END bigquerystorage_jupyter_tutorial_query_default] + """ + result = ip.run_cell(_strip_region_tags(sample)) + result.raise_error() # Throws an exception if the cell failed. + + assert "tax_forms" in ip.user_ns # verify that variable exists diff --git a/bigquery_storage/to_dataframe/noxfile_config.py b/bigquery_storage/to_dataframe/noxfile_config.py new file mode 100644 index 00000000000..f1fa9e5618b --- /dev/null +++ b/bigquery_storage/to_dataframe/noxfile_config.py @@ -0,0 +1,42 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be imported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + "ignored_versions": ["2.7"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": True, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/bigquery_storage/to_dataframe/read_query_results.py b/bigquery_storage/to_dataframe/read_query_results.py new file mode 100644 index 00000000000..e947e8afe93 --- /dev/null +++ b/bigquery_storage/to_dataframe/read_query_results.py @@ -0,0 +1,49 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas + + +def read_query_results() -> pandas.DataFrame: + # [START bigquerystorage_pandas_tutorial_read_query_results] + from google.cloud import bigquery + + bqclient = bigquery.Client() + + # Download query results. + query_string = """ + SELECT + CONCAT( + 'https://stackoverflow.com/questions/', + CAST(id as STRING)) as url, + view_count + FROM `bigquery-public-data.stackoverflow.posts_questions` + WHERE tags like '%google-bigquery%' + ORDER BY view_count DESC + """ + + dataframe = ( + bqclient.query(query_string) + .result() + .to_dataframe( + # Optionally, explicitly request to use the BigQuery Storage API. As of + # google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage + # API is used by default. + create_bqstorage_client=True, + ) + ) + print(dataframe.head()) + # [END bigquerystorage_pandas_tutorial_read_query_results] + + return dataframe diff --git a/bigquery_storage/to_dataframe/read_query_results_test.py b/bigquery_storage/to_dataframe/read_query_results_test.py new file mode 100644 index 00000000000..b5cb5517401 --- /dev/null +++ b/bigquery_storage/to_dataframe/read_query_results_test.py @@ -0,0 +1,23 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from . import read_query_results + + +def test_read_query_results(capsys: pytest.CaptureFixture) -> None: + read_query_results.read_query_results() + out, _ = capsys.readouterr() + assert "stackoverflow" in out diff --git a/bigquery_storage/to_dataframe/read_table_bigquery.py b/bigquery_storage/to_dataframe/read_table_bigquery.py new file mode 100644 index 00000000000..7a69a64d77d --- /dev/null +++ b/bigquery_storage/to_dataframe/read_table_bigquery.py @@ -0,0 +1,45 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pandas + + +def read_table() -> pandas.DataFrame: + # [START bigquerystorage_pandas_tutorial_read_table] + from google.cloud import bigquery + + bqclient = bigquery.Client() + + # Download a table. + table = bigquery.TableReference.from_string( + "bigquery-public-data.utility_us.country_code_iso" + ) + rows = bqclient.list_rows( + table, + selected_fields=[ + bigquery.SchemaField("country_name", "STRING"), + bigquery.SchemaField("fips_code", "STRING"), + ], + ) + dataframe = rows.to_dataframe( + # Optionally, explicitly request to use the BigQuery Storage API. As of + # google-cloud-bigquery version 1.26.0 and above, the BigQuery Storage + # API is used by default. + create_bqstorage_client=True, + ) + print(dataframe.head()) + # [END bigquerystorage_pandas_tutorial_read_table] + + return dataframe diff --git a/bigquery_storage/to_dataframe/read_table_bigquery_test.py b/bigquery_storage/to_dataframe/read_table_bigquery_test.py new file mode 100644 index 00000000000..5b45c4d5163 --- /dev/null +++ b/bigquery_storage/to_dataframe/read_table_bigquery_test.py @@ -0,0 +1,23 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from . import read_table_bigquery + + +def test_read_table(capsys: pytest.CaptureFixture) -> None: + read_table_bigquery.read_table() + out, _ = capsys.readouterr() + assert "country_name" in out diff --git a/bigquery_storage/to_dataframe/read_table_bqstorage.py b/bigquery_storage/to_dataframe/read_table_bqstorage.py new file mode 100644 index 00000000000..ce1cd3872ae --- /dev/null +++ b/bigquery_storage/to_dataframe/read_table_bqstorage.py @@ -0,0 +1,74 @@ +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pandas as pd + + +def read_table(your_project_id: str) -> pd.DataFrame: + original_your_project_id = your_project_id + # [START bigquerystorage_pandas_tutorial_read_session] + your_project_id = "project-for-read-session" + # [END bigquerystorage_pandas_tutorial_read_session] + your_project_id = original_your_project_id + + # [START bigquerystorage_pandas_tutorial_read_session] + import pandas + + from google.cloud import bigquery_storage + from google.cloud.bigquery_storage import types + + bqstorageclient = bigquery_storage.BigQueryReadClient() + + project_id = "bigquery-public-data" + dataset_id = "new_york_trees" + table_id = "tree_species" + table = f"projects/{project_id}/datasets/{dataset_id}/tables/{table_id}" + + # Select columns to read with read options. If no read options are + # specified, the whole table is read. + read_options = types.ReadSession.TableReadOptions( + selected_fields=["species_common_name", "fall_color"] + ) + + parent = "projects/{}".format(your_project_id) + + requested_session = types.ReadSession( + table=table, + # Avro is also supported, but the Arrow data format is optimized to + # work well with column-oriented data structures such as pandas + # DataFrames. + data_format=types.DataFormat.ARROW, + read_options=read_options, + ) + read_session = bqstorageclient.create_read_session( + parent=parent, + read_session=requested_session, + max_stream_count=1, + ) + + # This example reads from only a single stream. Read from multiple streams + # to fetch data faster. Note that the session may not contain any streams + # if there are no rows to read. + stream = read_session.streams[0] + reader = bqstorageclient.read_rows(stream.name) + + # Parse all Arrow blocks and create a dataframe. + frames = [] + for message in reader.rows().pages: + frames.append(message.to_dataframe()) + dataframe = pandas.concat(frames) + print(dataframe.head()) + # [END bigquerystorage_pandas_tutorial_read_session] + + return dataframe diff --git a/bigquery_storage/to_dataframe/read_table_bqstorage_test.py b/bigquery_storage/to_dataframe/read_table_bqstorage_test.py new file mode 100644 index 00000000000..7b46a6b180a --- /dev/null +++ b/bigquery_storage/to_dataframe/read_table_bqstorage_test.py @@ -0,0 +1,23 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from . import read_table_bqstorage + + +def test_read_table(capsys: pytest.CaptureFixture, project_id: str) -> None: + read_table_bqstorage.read_table(your_project_id=project_id) + out, _ = capsys.readouterr() + assert "species_common_name" in out diff --git a/bigquery_storage/to_dataframe/requirements-test.txt b/bigquery_storage/to_dataframe/requirements-test.txt new file mode 100644 index 00000000000..7561ed55ce2 --- /dev/null +++ b/bigquery_storage/to_dataframe/requirements-test.txt @@ -0,0 +1,3 @@ +pytest===7.4.3; python_version == '3.7' +pytest===8.3.5; python_version == '3.8' +pytest==8.4.1; python_version >= '3.9' diff --git a/bigquery_storage/to_dataframe/requirements.txt b/bigquery_storage/to_dataframe/requirements.txt new file mode 100644 index 00000000000..e3b75fdaf5f --- /dev/null +++ b/bigquery_storage/to_dataframe/requirements.txt @@ -0,0 +1,19 @@ +google-auth==2.40.3 +google-cloud-bigquery-storage==2.32.0 +google-cloud-bigquery===3.30.0; python_version <= '3.8' +google-cloud-bigquery==3.35.1; python_version >= '3.9' +pyarrow===12.0.1; python_version == '3.7' +pyarrow===17.0.0; python_version == '3.8' +pyarrow==21.0.0; python_version >= '3.9' +ipython===7.31.1; python_version == '3.7' +ipython===8.10.0; python_version == '3.8' +ipython===8.18.1; python_version == '3.9' +ipython===8.33.0; python_version == '3.10' +ipython==9.4.0; python_version >= '3.11' +ipywidgets==8.1.7 +pandas===1.3.5; python_version == '3.7' +pandas===2.0.3; python_version == '3.8' +pandas==2.3.1; python_version >= '3.9' +tqdm==4.67.1 +db-dtypes===1.4.2; python_version <= '3.8' +db-dtypes==1.4.3; python_version >= '3.9' diff --git a/cloud-sql/mysql/sqlalchemy/requirements.txt b/cloud-sql/mysql/sqlalchemy/requirements.txt index 9ad6f872bc5..397f59c2759 100644 --- a/cloud-sql/mysql/sqlalchemy/requirements.txt +++ b/cloud-sql/mysql/sqlalchemy/requirements.txt @@ -2,6 +2,6 @@ Flask==2.2.2 SQLAlchemy==2.0.40 PyMySQL==1.1.1 gunicorn==23.0.0 -cloud-sql-python-connector==1.18.2 +cloud-sql-python-connector==1.18.4 functions-framework==3.9.2 Werkzeug==2.3.8 diff --git a/cloud-sql/postgres/client-side-encryption/requirements.txt b/cloud-sql/postgres/client-side-encryption/requirements.txt index 1749cee78fb..1ec3e93d497 100644 --- a/cloud-sql/postgres/client-side-encryption/requirements.txt +++ b/cloud-sql/postgres/client-side-encryption/requirements.txt @@ -1,3 +1,3 @@ SQLAlchemy==2.0.40 -pg8000==1.31.2 +pg8000==1.31.5 tink==1.9.0 diff --git a/cloud-sql/postgres/sqlalchemy/requirements.txt b/cloud-sql/postgres/sqlalchemy/requirements.txt index 77c7218ef61..d3a74b1c5ef 100644 --- a/cloud-sql/postgres/sqlalchemy/requirements.txt +++ b/cloud-sql/postgres/sqlalchemy/requirements.txt @@ -1,7 +1,7 @@ Flask==2.2.2 -pg8000==1.31.2 +pg8000==1.31.5 SQLAlchemy==2.0.40 -cloud-sql-python-connector==1.18.2 +cloud-sql-python-connector==1.18.4 gunicorn==23.0.0 functions-framework==3.9.2 Werkzeug==2.3.8 diff --git a/cloud-sql/sql-server/sqlalchemy/requirements.txt b/cloud-sql/sql-server/sqlalchemy/requirements.txt index 112da74c209..3302326ab42 100644 --- a/cloud-sql/sql-server/sqlalchemy/requirements.txt +++ b/cloud-sql/sql-server/sqlalchemy/requirements.txt @@ -3,7 +3,7 @@ gunicorn==23.0.0 python-tds==1.16.0 pyopenssl==25.0.0 SQLAlchemy==2.0.40 -cloud-sql-python-connector==1.18.2 +cloud-sql-python-connector==1.18.4 sqlalchemy-pytds==1.0.2 functions-framework==3.9.2 Werkzeug==2.3.8 diff --git a/composer/workflows/airflow_db_cleanup.py b/composer/workflows/airflow_db_cleanup.py index 4eee89ef9ab..45119168111 100644 --- a/composer/workflows/airflow_db_cleanup.py +++ b/composer/workflows/airflow_db_cleanup.py @@ -360,7 +360,8 @@ def build_query( logging.info("INITIAL QUERY : " + str(query)) - if dag_id: + if hasattr(airflow_db_model, 'dag_id'): + logging.info("Filtering by dag_id: " + str(dag_id)) query = query.filter(airflow_db_model.dag_id == dag_id) if airflow_db_model == DagRun: diff --git a/connectgateway/requirements.txt b/connectgateway/requirements.txt index 7141c827fa1..531ee9e7eb4 100644 --- a/connectgateway/requirements.txt +++ b/connectgateway/requirements.txt @@ -1,4 +1,4 @@ google-cloud-gke-connect-gateway==0.10.4 google-auth==2.38.0 -kubernetes==33.1.0 +kubernetes==34.1.0 google-api-core==2.24.2 diff --git a/dataflow/flex-templates/pipeline_with_dependencies/requirements.txt b/dataflow/flex-templates/pipeline_with_dependencies/requirements.txt index eeed8f6f3ce..bef166bb943 100644 --- a/dataflow/flex-templates/pipeline_with_dependencies/requirements.txt +++ b/dataflow/flex-templates/pipeline_with_dependencies/requirements.txt @@ -305,7 +305,7 @@ typing-extensions==4.10.0 # via apache-beam tzlocal==5.2 # via js2py -urllib3==2.5.0 +urllib3==2.6.0 # via requests wrapt==1.16.0 # via deprecated diff --git a/dialogflow-cx/noxfile_config.py b/dialogflow-cx/noxfile_config.py index 462f6d428f7..cc8143940ee 100644 --- a/dialogflow-cx/noxfile_config.py +++ b/dialogflow-cx/noxfile_config.py @@ -22,7 +22,7 @@ TEST_CONFIG_OVERRIDE = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7", "3.7", "3.9", "3.10", "3.11", "3.12", "3.13"], + "ignored_versions": ["2.7", "3.7", "3.8", "3.9", "3.11", "3.12", "3.13"], # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string diff --git a/dialogflow-cx/requirements.txt b/dialogflow-cx/requirements.txt index f6c8da4cde7..fe7011b74ee 100644 --- a/dialogflow-cx/requirements.txt +++ b/dialogflow-cx/requirements.txt @@ -1,8 +1,8 @@ -google-cloud-dialogflow-cx==1.38.0 +google-cloud-dialogflow-cx==2.0.0 Flask==3.0.3 python-dateutil==2.9.0.post0 functions-framework==3.9.2 -Werkzeug==3.0.6 +Werkzeug==3.1.4 termcolor==3.0.0; python_version >= "3.9" termcolor==2.4.0; python_version == "3.8" pyaudio==0.2.14 \ No newline at end of file diff --git a/discoveryengine/standalone_apis_sample.py b/discoveryengine/standalone_apis_sample.py index 72725957e87..1a0ff112904 100644 --- a/discoveryengine/standalone_apis_sample.py +++ b/discoveryengine/standalone_apis_sample.py @@ -123,3 +123,183 @@ def rank_sample( # [END genappbuilder_rank] return response + + +def grounded_generation_inline_vais_sample( + project_number: str, + engine_id: str, +) -> discoveryengine.GenerateGroundedContentResponse: + # [START genappbuilder_grounded_generation_inline_vais] + from google.cloud import discoveryengine_v1 as discoveryengine + + # TODO(developer): Uncomment these variables before running the sample. + # project_number = "YOUR_PROJECT_NUMBER" + # engine_id = "YOUR_ENGINE_ID" + + client = discoveryengine.GroundedGenerationServiceClient() + + request = discoveryengine.GenerateGroundedContentRequest( + # The full resource name of the location. + # Format: projects/{project_number}/locations/{location} + location=client.common_location_path(project=project_number, location="global"), + generation_spec=discoveryengine.GenerateGroundedContentRequest.GenerationSpec( + model_id="gemini-2.5-flash", + ), + # Conversation between user and model + contents=[ + discoveryengine.GroundedGenerationContent( + role="user", + parts=[ + discoveryengine.GroundedGenerationContent.Part( + text="How did Google do in 2020? Where can I find BigQuery docs?" + ) + ], + ) + ], + system_instruction=discoveryengine.GroundedGenerationContent( + parts=[ + discoveryengine.GroundedGenerationContent.Part( + text="Add a smiley emoji after the answer." + ) + ], + ), + # What to ground on. + grounding_spec=discoveryengine.GenerateGroundedContentRequest.GroundingSpec( + grounding_sources=[ + discoveryengine.GenerateGroundedContentRequest.GroundingSource( + inline_source=discoveryengine.GenerateGroundedContentRequest.GroundingSource.InlineSource( + grounding_facts=[ + discoveryengine.GroundingFact( + fact_text=( + "The BigQuery documentation can be found at https://cloud.google.com/bigquery/docs/introduction" + ), + attributes={ + "title": "BigQuery Overview", + "uri": "https://cloud.google.com/bigquery/docs/introduction", + }, + ), + ] + ), + ), + discoveryengine.GenerateGroundedContentRequest.GroundingSource( + search_source=discoveryengine.GenerateGroundedContentRequest.GroundingSource.SearchSource( + # The full resource name of the serving config for a Vertex AI Search App + serving_config=f"projects/{project_number}/locations/global/collections/default_collection/engines/{engine_id}/servingConfigs/default_search", + ), + ), + ] + ), + ) + response = client.generate_grounded_content(request) + + # Handle the response + print(response) + # [END genappbuilder_grounded_generation_inline_vais] + + return response + + +def grounded_generation_google_search_sample( + project_number: str, +) -> discoveryengine.GenerateGroundedContentResponse: + # [START genappbuilder_grounded_generation_google_search] + from google.cloud import discoveryengine_v1 as discoveryengine + + # TODO(developer): Uncomment these variables before running the sample. + # project_number = "YOUR_PROJECT_NUMBER" + + client = discoveryengine.GroundedGenerationServiceClient() + + request = discoveryengine.GenerateGroundedContentRequest( + # The full resource name of the location. + # Format: projects/{project_number}/locations/{location} + location=client.common_location_path(project=project_number, location="global"), + generation_spec=discoveryengine.GenerateGroundedContentRequest.GenerationSpec( + model_id="gemini-2.5-flash", + ), + # Conversation between user and model + contents=[ + discoveryengine.GroundedGenerationContent( + role="user", + parts=[ + discoveryengine.GroundedGenerationContent.Part( + text="How much is Google stock?" + ) + ], + ) + ], + system_instruction=discoveryengine.GroundedGenerationContent( + parts=[ + discoveryengine.GroundedGenerationContent.Part(text="Be comprehensive.") + ], + ), + # What to ground on. + grounding_spec=discoveryengine.GenerateGroundedContentRequest.GroundingSpec( + grounding_sources=[ + discoveryengine.GenerateGroundedContentRequest.GroundingSource( + google_search_source=discoveryengine.GenerateGroundedContentRequest.GroundingSource.GoogleSearchSource( + # Optional: For Dynamic Retrieval + dynamic_retrieval_config=discoveryengine.GenerateGroundedContentRequest.DynamicRetrievalConfiguration( + predictor=discoveryengine.GenerateGroundedContentRequest.DynamicRetrievalConfiguration.DynamicRetrievalPredictor( + threshold=0.7 + ) + ) + ) + ), + ] + ), + ) + response = client.generate_grounded_content(request) + + # Handle the response + print(response) + # [END genappbuilder_grounded_generation_google_search] + + return response + + +def grounded_generation_streaming_sample( + project_number: str, +) -> discoveryengine.GenerateGroundedContentResponse: + # [START genappbuilder_grounded_generation_streaming] + from google.cloud import discoveryengine_v1 as discoveryengine + + # TODO(developer): Uncomment these variables before running the sample. + # project_id = "YOUR_PROJECT_ID" + + client = discoveryengine.GroundedGenerationServiceClient() + + request = discoveryengine.GenerateGroundedContentRequest( + # The full resource name of the location. + # Format: projects/{project_number}/locations/{location} + location=client.common_location_path(project=project_number, location="global"), + generation_spec=discoveryengine.GenerateGroundedContentRequest.GenerationSpec( + model_id="gemini-2.5-flash", + ), + # Conversation between user and model + contents=[ + discoveryengine.GroundedGenerationContent( + role="user", + parts=[ + discoveryengine.GroundedGenerationContent.Part( + text="Summarize how to delete a data store in Vertex AI Agent Builder?" + ) + ], + ) + ], + grounding_spec=discoveryengine.GenerateGroundedContentRequest.GroundingSpec( + grounding_sources=[ + discoveryengine.GenerateGroundedContentRequest.GroundingSource( + google_search_source=discoveryengine.GenerateGroundedContentRequest.GroundingSource.GoogleSearchSource() + ), + ] + ), + ) + responses = client.stream_generate_grounded_content(iter([request])) + + for response in responses: + # Handle the response + print(response) + # [END genappbuilder_grounded_generation_streaming] + + return response diff --git a/discoveryengine/standalone_apis_sample_test.py b/discoveryengine/standalone_apis_sample_test.py index f0c00cb937d..60405afd7db 100644 --- a/discoveryengine/standalone_apis_sample_test.py +++ b/discoveryengine/standalone_apis_sample_test.py @@ -17,6 +17,8 @@ from discoveryengine import standalone_apis_sample +from google.cloud import resourcemanager_v3 + project_id = os.environ["GOOGLE_CLOUD_PROJECT"] @@ -32,3 +34,27 @@ def test_rank(): response = standalone_apis_sample.rank_sample(project_id) assert response assert response.records + + +def test_grounded_generation_inline_vais_sample(): + # Grounded Generation requires Project Number + client = resourcemanager_v3.ProjectsClient() + project = client.get_project(name=client.project_path(project_id)) + project_number = client.parse_project_path(project.name)["project"] + + response = standalone_apis_sample.grounded_generation_inline_vais_sample( + project_number, engine_id="test-search-engine_1689960780551" + ) + assert response + + +def test_grounded_generation_google_search_sample(): + # Grounded Generation requires Project Number + client = resourcemanager_v3.ProjectsClient() + project = client.get_project(name=client.project_path(project_id)) + project_number = client.parse_project_path(project.name)["project"] + + response = standalone_apis_sample.grounded_generation_google_search_sample( + project_number + ) + assert response diff --git a/functions/billing_stop_on_notification/requirements.txt b/functions/billing_stop_on_notification/requirements.txt index 912b07cd0a3..b730a52aa07 100644 --- a/functions/billing_stop_on_notification/requirements.txt +++ b/functions/billing_stop_on_notification/requirements.txt @@ -1,3 +1,5 @@ +# [START functions_billing_stop_requirements] functions-framework==3.* google-cloud-billing==1.16.2 google-cloud-logging==3.12.1 +# [END functions_billing_stop_requirements] diff --git a/genai/batch_prediction/batchpredict_embeddings_with_gcs.py b/genai/batch_prediction/batchpredict_embeddings_with_gcs.py index 41420db3141..4fb8148e9f5 100644 --- a/genai/batch_prediction/batchpredict_embeddings_with_gcs.py +++ b/genai/batch_prediction/batchpredict_embeddings_with_gcs.py @@ -34,7 +34,7 @@ def generate_content(output_uri: str) -> str: print(f"Job name: {job.name}") print(f"Job state: {job.state}") # Example response: - # Job name: projects/%PROJECT_ID%/locations/us-central1/batchPredictionJobs/9876453210000000000 + # Job name: projects/.../locations/.../batchPredictionJobs/9876453210000000000 # Job state: JOB_STATE_PENDING # See the documentation: https://googleapis.github.io/python-genai/genai.html#genai.types.BatchJob diff --git a/genai/batch_prediction/batchpredict_with_bq.py b/genai/batch_prediction/batchpredict_with_bq.py index 30ea7c4a90f..bf051f2a223 100644 --- a/genai/batch_prediction/batchpredict_with_bq.py +++ b/genai/batch_prediction/batchpredict_with_bq.py @@ -35,7 +35,7 @@ def generate_content(output_uri: str) -> str: print(f"Job name: {job.name}") print(f"Job state: {job.state}") # Example response: - # Job name: projects/%PROJECT_ID%/locations/us-central1/batchPredictionJobs/9876453210000000000 + # Job name: projects/.../locations/.../batchPredictionJobs/9876453210000000000 # Job state: JOB_STATE_PENDING # See the documentation: https://googleapis.github.io/python-genai/genai.html#genai.types.BatchJob diff --git a/genai/batch_prediction/batchpredict_with_gcs.py b/genai/batch_prediction/batchpredict_with_gcs.py index 75061405ff0..fcedf217bdc 100644 --- a/genai/batch_prediction/batchpredict_with_gcs.py +++ b/genai/batch_prediction/batchpredict_with_gcs.py @@ -36,7 +36,7 @@ def generate_content(output_uri: str) -> str: print(f"Job name: {job.name}") print(f"Job state: {job.state}") # Example response: - # Job name: projects/%PROJECT_ID%/locations/us-central1/batchPredictionJobs/9876453210000000000 + # Job name: projects/.../locations/.../batchPredictionJobs/9876453210000000000 # Job state: JOB_STATE_PENDING # See the documentation: https://googleapis.github.io/python-genai/genai.html#genai.types.BatchJob diff --git a/genai/batch_prediction/get_batch_job.py b/genai/batch_prediction/get_batch_job.py index a17b0457e25..c6e0453da64 100644 --- a/genai/batch_prediction/get_batch_job.py +++ b/genai/batch_prediction/get_batch_job.py @@ -23,7 +23,7 @@ def get_batch_job(batch_job_name: str) -> types.BatchJob: client = genai.Client(http_options=HttpOptions(api_version="v1")) # Get the batch job - # Eg. batch_job_name = "projects/123456789012/locations/us-central1/batchPredictionJobs/1234567890123456789" +# Eg. batch_job_name = "projects/123456789012/locations/.../batchPredictionJobs/1234567890123456789" batch_job = client.batches.get(name=batch_job_name) print(f"Job state: {batch_job.state}") diff --git a/genai/batch_prediction/requirements-test.txt b/genai/batch_prediction/requirements-test.txt index 937db8fb0d5..e43b7792721 100644 --- a/genai/batch_prediction/requirements-test.txt +++ b/genai/batch_prediction/requirements-test.txt @@ -1,4 +1,2 @@ google-api-core==2.24.0 -google-cloud-bigquery==3.29.0 -google-cloud-storage==2.19.0 pytest==8.2.0 diff --git a/genai/batch_prediction/requirements.txt b/genai/batch_prediction/requirements.txt index 7ad844cebcf..4f44a6593bb 100644 --- a/genai/batch_prediction/requirements.txt +++ b/genai/batch_prediction/requirements.txt @@ -1 +1,3 @@ -google-genai==1.27.0 +google-cloud-bigquery==3.29.0 +google-cloud-storage==2.19.0 +google-genai==1.42.0 diff --git a/genai/batch_prediction/test_batch_prediction_examples.py b/genai/batch_prediction/test_batch_prediction_examples.py index 5183161be65..5079dfd2cd0 100644 --- a/genai/batch_prediction/test_batch_prediction_examples.py +++ b/genai/batch_prediction/test_batch_prediction_examples.py @@ -11,19 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - -# -# Using Google Cloud Vertex AI to test the code samples. -# -from datetime import datetime as dt -import os - from unittest.mock import MagicMock, patch -from google.cloud import bigquery, storage from google.genai import types from google.genai.types import JobState -import pytest import batchpredict_embeddings_with_gcs import batchpredict_with_bq @@ -31,67 +22,113 @@ import get_batch_job -os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" -os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" -# The project name is included in the CICD pipeline -# os.environ['GOOGLE_CLOUD_PROJECT'] = "add-your-project-name" -BQ_OUTPUT_DATASET = f"{os.environ['GOOGLE_CLOUD_PROJECT']}.gen_ai_batch_prediction" -GCS_OUTPUT_BUCKET = "python-docs-samples-tests" - - -@pytest.fixture(scope="session") -def bq_output_uri() -> str: - table_name = f"text_output_{dt.now().strftime('%Y_%m_%d_T%H_%M_%S')}" - table_uri = f"{BQ_OUTPUT_DATASET}.{table_name}" +@patch("google.genai.Client") +@patch("time.sleep", return_value=None) +def test_batch_prediction_embeddings_with_gcs( + mock_sleep: MagicMock, mock_genai_client: MagicMock +) -> None: + # Mock the API response + mock_batch_job_running = types.BatchJob( + name="test-batch-job", state="JOB_STATE_RUNNING" + ) + mock_batch_job_succeeded = types.BatchJob( + name="test-batch-job", state="JOB_STATE_SUCCEEDED" + ) - yield f"bq://{table_uri}" + mock_genai_client.return_value.batches.create.return_value = ( + mock_batch_job_running + ) + mock_genai_client.return_value.batches.get.return_value = ( + mock_batch_job_succeeded + ) - bq_client = bigquery.Client() - bq_client.delete_table(table_uri, not_found_ok=True) + response = batchpredict_embeddings_with_gcs.generate_content( + output_uri="gs://test-bucket/test-prefix" + ) + mock_genai_client.assert_called_once_with( + http_options=types.HttpOptions(api_version="v1") + ) + mock_genai_client.return_value.batches.create.assert_called_once() + mock_genai_client.return_value.batches.get.assert_called_once() + assert response == JobState.JOB_STATE_SUCCEEDED -@pytest.fixture(scope="session") -def gcs_output_uri() -> str: - prefix = f"text_output/{dt.now()}" - yield f"gs://{GCS_OUTPUT_BUCKET}/{prefix}" +@patch("google.genai.Client") +@patch("time.sleep", return_value=None) +def test_batch_prediction_with_bq( + mock_sleep: MagicMock, mock_genai_client: MagicMock +) -> None: + # Mock the API response + mock_batch_job_running = types.BatchJob( + name="test-batch-job", state="JOB_STATE_RUNNING" + ) + mock_batch_job_succeeded = types.BatchJob( + name="test-batch-job", state="JOB_STATE_SUCCEEDED" + ) - storage_client = storage.Client() - bucket = storage_client.get_bucket(GCS_OUTPUT_BUCKET) - blobs = bucket.list_blobs(prefix=prefix) - for blob in blobs: - blob.delete() + mock_genai_client.return_value.batches.create.return_value = ( + mock_batch_job_running + ) + mock_genai_client.return_value.batches.get.return_value = ( + mock_batch_job_succeeded + ) + response = batchpredict_with_bq.generate_content( + output_uri="bq://test-project.test_dataset.test_table" + ) -def test_batch_prediction_embeddings_with_gcs(gcs_output_uri: str) -> None: - response = batchpredict_embeddings_with_gcs.generate_content( - output_uri=gcs_output_uri + mock_genai_client.assert_called_once_with( + http_options=types.HttpOptions(api_version="v1") ) + mock_genai_client.return_value.batches.create.assert_called_once() + mock_genai_client.return_value.batches.get.assert_called_once() assert response == JobState.JOB_STATE_SUCCEEDED -def test_batch_prediction_with_bq(bq_output_uri: str) -> None: - response = batchpredict_with_bq.generate_content(output_uri=bq_output_uri) - assert response == JobState.JOB_STATE_SUCCEEDED +@patch("google.genai.Client") +@patch("time.sleep", return_value=None) +def test_batch_prediction_with_gcs( + mock_sleep: MagicMock, mock_genai_client: MagicMock +) -> None: + # Mock the API response + mock_batch_job_running = types.BatchJob( + name="test-batch-job", state="JOB_STATE_RUNNING" + ) + mock_batch_job_succeeded = types.BatchJob( + name="test-batch-job", state="JOB_STATE_SUCCEEDED" + ) + mock_genai_client.return_value.batches.create.return_value = ( + mock_batch_job_running + ) + mock_genai_client.return_value.batches.get.return_value = ( + mock_batch_job_succeeded + ) + + response = batchpredict_with_gcs.generate_content( + output_uri="gs://test-bucket/test-prefix" + ) -def test_batch_prediction_with_gcs(gcs_output_uri: str) -> None: - response = batchpredict_with_gcs.generate_content(output_uri=gcs_output_uri) + mock_genai_client.assert_called_once_with( + http_options=types.HttpOptions(api_version="v1") + ) + mock_genai_client.return_value.batches.create.assert_called_once() + mock_genai_client.return_value.batches.get.assert_called_once() assert response == JobState.JOB_STATE_SUCCEEDED @patch("google.genai.Client") def test_get_batch_job(mock_genai_client: MagicMock) -> None: # Mock the API response - mock_batch_job = types.BatchJob( - name="test-batch-job", - state="JOB_STATE_PENDING" - ) + mock_batch_job = types.BatchJob(name="test-batch-job", state="JOB_STATE_PENDING") mock_genai_client.return_value.batches.get.return_value = mock_batch_job response = get_batch_job.get_batch_job("test-batch-job") - mock_genai_client.assert_called_once_with(http_options=types.HttpOptions(api_version="v1")) + mock_genai_client.assert_called_once_with( + http_options=types.HttpOptions(api_version="v1") + ) mock_genai_client.return_value.batches.get.assert_called_once() assert response == mock_batch_job diff --git a/genai/bounding_box/requirements.txt b/genai/bounding_box/requirements.txt index 661c889ab37..86da356810f 100644 --- a/genai/bounding_box/requirements.txt +++ b/genai/bounding_box/requirements.txt @@ -1,2 +1,2 @@ -google-genai==1.27.0 +google-genai==1.42.0 pillow==11.1.0 diff --git a/genai/content_cache/contentcache_create_with_txt_gcs_pdf.py b/genai/content_cache/contentcache_create_with_txt_gcs_pdf.py index 1e158d940c4..2ed5ee6b713 100644 --- a/genai/content_cache/contentcache_create_with_txt_gcs_pdf.py +++ b/genai/content_cache/contentcache_create_with_txt_gcs_pdf.py @@ -47,7 +47,7 @@ def create_content_cache() -> str: contents=contents, system_instruction=system_instruction, # (Optional) For enhanced security, the content cache can be encrypted using a Cloud KMS key - # kms_key_name = "projects/.../locations/us-central1/keyRings/.../cryptoKeys/..." + # kms_key_name = "projects/.../locations/.../keyRings/.../cryptoKeys/..." display_name="example-cache", ttl="86400s", ), @@ -56,7 +56,7 @@ def create_content_cache() -> str: print(content_cache.name) print(content_cache.usage_metadata) # Example response: - # projects/111111111111/locations/us-central1/cachedContents/1111111111111111111 + # projects/111111111111/locations/.../cachedContents/1111111111111111111 # CachedContentUsageMetadata(audio_duration_seconds=None, image_count=167, # text_count=153, total_token_count=43130, video_duration_seconds=None) # [END googlegenaisdk_contentcache_create_with_txt_gcs_pdf] diff --git a/genai/content_cache/contentcache_delete.py b/genai/content_cache/contentcache_delete.py index 3761b84ea6a..9afe8962a5a 100644 --- a/genai/content_cache/contentcache_delete.py +++ b/genai/content_cache/contentcache_delete.py @@ -19,11 +19,11 @@ def delete_context_caches(cache_name: str) -> str: client = genai.Client() # Delete content cache using name - # E.g cache_name = 'projects/111111111111/locations/us-central1/cachedContents/1111111111111111111' + # E.g cache_name = 'projects/111111111111/locations/.../cachedContents/1111111111111111111' client.caches.delete(name=cache_name) print("Deleted Cache", cache_name) # Example response - # Deleted Cache projects/111111111111/locations/us-central1/cachedContents/1111111111111111111 + # Deleted Cache projects/111111111111/locations/.../cachedContents/1111111111111111111 # [END googlegenaisdk_contentcache_delete] return cache_name diff --git a/genai/content_cache/contentcache_list.py b/genai/content_cache/contentcache_list.py index f477da31b29..9f0f2a6b510 100644 --- a/genai/content_cache/contentcache_list.py +++ b/genai/content_cache/contentcache_list.py @@ -29,8 +29,8 @@ def list_context_caches() -> str: print(f"Expires at: {content_cache.expire_time}") # Example response: - # * Cache `projects/111111111111/locations/us-central1/cachedContents/1111111111111111111` for - # model `projects/111111111111/locations/us-central1/publishers/google/models/gemini-XXX-pro-XXX` + # * Cache `projects/111111111111/locations/.../cachedContents/1111111111111111111` for + # model `projects/111111111111/locations/.../publishers/google/models/gemini-XXX-pro-XXX` # * Last updated at: 2025-02-13 14:46:42.620490+00:00 # * CachedContentUsageMetadata(audio_duration_seconds=None, image_count=167, text_count=153, total_token_count=43130, video_duration_seconds=None) # ... diff --git a/genai/content_cache/contentcache_update.py b/genai/content_cache/contentcache_update.py index 1f1136359be..27f96743385 100644 --- a/genai/content_cache/contentcache_update.py +++ b/genai/content_cache/contentcache_update.py @@ -25,7 +25,7 @@ def update_content_cache(cache_name: str) -> str: client = genai.Client(http_options=HttpOptions(api_version="v1")) # Get content cache by name - # cache_name = "projects/111111111111/locations/us-central1/cachedContents/1111111111111111111" + # cache_name = "projects/.../locations/.../cachedContents/1111111111111111111" content_cache = client.caches.get(name=cache_name) print("Expire time", content_cache.expire_time) # Example response diff --git a/genai/content_cache/contentcache_use_with_txt.py b/genai/content_cache/contentcache_use_with_txt.py index 488d5c763af..7e85e52cd72 100644 --- a/genai/content_cache/contentcache_use_with_txt.py +++ b/genai/content_cache/contentcache_use_with_txt.py @@ -20,7 +20,7 @@ def generate_content(cache_name: str) -> str: client = genai.Client(http_options=HttpOptions(api_version="v1")) # Use content cache to generate text response - # E.g cache_name = 'projects/111111111111/locations/us-central1/cachedContents/1111111111111111111' + # E.g cache_name = 'projects/.../locations/.../cachedContents/1111111111111111111' response = client.models.generate_content( model="gemini-2.5-flash", contents="Summarize the pdfs", diff --git a/genai/content_cache/requirements.txt b/genai/content_cache/requirements.txt index 7ad844cebcf..1efe7b29dbc 100644 --- a/genai/content_cache/requirements.txt +++ b/genai/content_cache/requirements.txt @@ -1 +1 @@ -google-genai==1.27.0 +google-genai==1.42.0 diff --git a/genai/controlled_generation/requirements.txt b/genai/controlled_generation/requirements.txt index 7ad844cebcf..1efe7b29dbc 100644 --- a/genai/controlled_generation/requirements.txt +++ b/genai/controlled_generation/requirements.txt @@ -1 +1 @@ -google-genai==1.27.0 +google-genai==1.42.0 diff --git a/genai/count_tokens/counttoken_localtokenizer_compute_with_txt.py b/genai/count_tokens/counttoken_localtokenizer_compute_with_txt.py new file mode 100644 index 00000000000..889044e63af --- /dev/null +++ b/genai/count_tokens/counttoken_localtokenizer_compute_with_txt.py @@ -0,0 +1,36 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def counttoken_localtokenizer_compute_with_txt() -> int: + # [START googlegenaisdk_counttoken_localtokenizer_compute_with_txt] + from google.genai.local_tokenizer import LocalTokenizer + + tokenizer = LocalTokenizer(model_name="gemini-2.5-flash") + response = tokenizer.compute_tokens("What's the longest word in the English language?") + print(response) + # Example output: + # tokens_info=[TokensInfo( + # role='user', + # token_ids=[3689, 236789, 236751, 506, + # 27801, 3658, 528, 506, 5422, 5192, 236881], + # tokens=[b'What', b"'", b's', b' the', b' longest', + # b' word', b' in', b' the', b' English', b' language', b'?'] + # )] + # [END googlegenaisdk_counttoken_localtokenizer_compute_with_txt] + return response.tokens_info + + +if __name__ == "__main__": + counttoken_localtokenizer_compute_with_txt() diff --git a/genai/count_tokens/counttoken_localtokenizer_with_txt.py b/genai/count_tokens/counttoken_localtokenizer_with_txt.py new file mode 100644 index 00000000000..e784d393c9b --- /dev/null +++ b/genai/count_tokens/counttoken_localtokenizer_with_txt.py @@ -0,0 +1,30 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def counttoken_localtokenizer_with_txt() -> int: + # [START googlegenaisdk_counttoken_localtokenizer_with_txt] + from google.genai.local_tokenizer import LocalTokenizer + + tokenizer = LocalTokenizer(model_name="gemini-2.5-flash") + response = tokenizer.count_tokens("What's the highest mountain in Africa?") + print(response) + # Example output: + # total_tokens=10 + # [END googlegenaisdk_counttoken_localtokenizer_with_txt] + return response.total_tokens + + +if __name__ == "__main__": + counttoken_localtokenizer_with_txt() diff --git a/genai/count_tokens/counttoken_with_txt.py b/genai/count_tokens/counttoken_with_txt.py index 84464c5cf82..fcbf9484087 100644 --- a/genai/count_tokens/counttoken_with_txt.py +++ b/genai/count_tokens/counttoken_with_txt.py @@ -25,7 +25,7 @@ def count_tokens() -> int: ) print(response) # Example output: - # total_tokens=10 + # total_tokens=9 # cached_content_token_count=None # [END googlegenaisdk_counttoken_with_txt] return response.total_tokens diff --git a/genai/count_tokens/requirements.txt b/genai/count_tokens/requirements.txt index 7ad844cebcf..726dd09178a 100644 --- a/genai/count_tokens/requirements.txt +++ b/genai/count_tokens/requirements.txt @@ -1 +1,2 @@ -google-genai==1.27.0 +google-genai==1.42.0 +sentencepiece==0.2.1 diff --git a/genai/count_tokens/test_count_tokens_examples.py b/genai/count_tokens/test_count_tokens_examples.py index b654ff872d8..e83f20cd14c 100644 --- a/genai/count_tokens/test_count_tokens_examples.py +++ b/genai/count_tokens/test_count_tokens_examples.py @@ -19,6 +19,8 @@ import os import counttoken_compute_with_txt +import counttoken_localtokenizer_compute_with_txt +import counttoken_localtokenizer_with_txt import counttoken_resp_with_txt import counttoken_with_txt import counttoken_with_txt_vid @@ -43,3 +45,11 @@ def test_counttoken_with_txt() -> None: def test_counttoken_with_txt_vid() -> None: assert counttoken_with_txt_vid.count_tokens() + + +def test_counttoken_localtokenizer_with_txt() -> None: + assert counttoken_localtokenizer_with_txt.counttoken_localtokenizer_with_txt() + + +def test_counttoken_localtokenizer_compute_with_txt() -> None: + assert counttoken_localtokenizer_compute_with_txt.counttoken_localtokenizer_compute_with_txt() diff --git a/genai/embeddings/requirements.txt b/genai/embeddings/requirements.txt index 7ad844cebcf..1efe7b29dbc 100644 --- a/genai/embeddings/requirements.txt +++ b/genai/embeddings/requirements.txt @@ -1 +1 @@ -google-genai==1.27.0 +google-genai==1.42.0 diff --git a/genai/express_mode/requirements.txt b/genai/express_mode/requirements.txt index 7ad844cebcf..1efe7b29dbc 100644 --- a/genai/express_mode/requirements.txt +++ b/genai/express_mode/requirements.txt @@ -1 +1 @@ -google-genai==1.27.0 +google-genai==1.42.0 diff --git a/genai/image_generation/imggen_mmflash_edit_img_with_txt_img.py b/genai/image_generation/imggen_mmflash_edit_img_with_txt_img.py index 8f7a00e733c..e2d9888a027 100644 --- a/genai/image_generation/imggen_mmflash_edit_img_with_txt_img.py +++ b/genai/image_generation/imggen_mmflash_edit_img_with_txt_img.py @@ -26,7 +26,7 @@ def generate_content() -> str: image = Image.open("test_resources/example-image-eiffel-tower.png") response = client.models.generate_content( - model="gemini-2.5-flash-image-preview", + model="gemini-3-pro-image-preview", contents=[image, "Edit this image to make it look like a cartoon."], config=GenerateContentConfig(response_modalities=[Modality.TEXT, Modality.IMAGE]), ) @@ -36,12 +36,7 @@ def generate_content() -> str: elif part.inline_data: image = Image.open(BytesIO((part.inline_data.data))) image.save("output_folder/bw-example-image.png") - # Example response: - # Here's the cartoon-style edit of the image: - # Cartoon-style edit: - # - Simplified the Eiffel Tower with bolder lines and slightly exaggerated proportions. - # - Brightened and saturated the colors of the sky, fireworks, and foliage for a more vibrant, cartoonish look. - # .... + # [END googlegenaisdk_imggen_mmflash_edit_img_with_txt_img] return "output_folder/bw-example-image.png" diff --git a/genai/image_generation/imggen_mmflash_locale_aware_with_txt.py b/genai/image_generation/imggen_mmflash_locale_aware_with_txt.py index 9114087babc..305be883d22 100644 --- a/genai/image_generation/imggen_mmflash_locale_aware_with_txt.py +++ b/genai/image_generation/imggen_mmflash_locale_aware_with_txt.py @@ -23,7 +23,7 @@ def generate_content() -> str: client = genai.Client() response = client.models.generate_content( - model="gemini-2.5-flash-image-preview", + model="gemini-2.5-flash-image", contents=("Generate a photo of a breakfast meal."), config=GenerateContentConfig(response_modalities=[Modality.TEXT, Modality.IMAGE]), ) diff --git a/genai/image_generation/imggen_mmflash_multiple_imgs_with_txt.py b/genai/image_generation/imggen_mmflash_multiple_imgs_with_txt.py index 51326b5b738..2b831ca97d9 100644 --- a/genai/image_generation/imggen_mmflash_multiple_imgs_with_txt.py +++ b/genai/image_generation/imggen_mmflash_multiple_imgs_with_txt.py @@ -23,7 +23,7 @@ def generate_content() -> str: client = genai.Client() response = client.models.generate_content( - model="gemini-2.5-flash-image-preview", + model="gemini-2.5-flash-image", contents=("Generate 3 images a cat sitting on a chair."), config=GenerateContentConfig(response_modalities=[Modality.TEXT, Modality.IMAGE]), ) diff --git a/genai/image_generation/imggen_mmflash_txt_and_img_with_txt.py b/genai/image_generation/imggen_mmflash_txt_and_img_with_txt.py index 057315157c2..7a9d11103a7 100644 --- a/genai/image_generation/imggen_mmflash_txt_and_img_with_txt.py +++ b/genai/image_generation/imggen_mmflash_txt_and_img_with_txt.py @@ -23,7 +23,7 @@ def generate_content() -> int: client = genai.Client() response = client.models.generate_content( - model="gemini-2.5-flash-image-preview", + model="gemini-3-pro-image-preview", contents=( "Generate an illustrated recipe for a paella." "Create images to go alongside the text as you generate the recipe" @@ -38,9 +38,7 @@ def generate_content() -> int: image = Image.open(BytesIO((part.inline_data.data))) image.save(f"output_folder/example-image-{i+1}.png") fp.write(f"![image](example-image-{i+1}.png)") - # Example response: - # A markdown page for a Paella recipe(`paella-recipe.md`) has been generated. - # It includes detailed steps and several images illustrating the cooking process. + # [END googlegenaisdk_imggen_mmflash_txt_and_img_with_txt] return True diff --git a/genai/image_generation/imggen_mmflash_with_txt.py b/genai/image_generation/imggen_mmflash_with_txt.py index 41aca349d6d..0ee371b7e84 100644 --- a/genai/image_generation/imggen_mmflash_with_txt.py +++ b/genai/image_generation/imggen_mmflash_with_txt.py @@ -23,16 +23,10 @@ def generate_content() -> str: client = genai.Client() response = client.models.generate_content( - model="gemini-2.5-flash-image-preview", + model="gemini-3-pro-image-preview", contents=("Generate an image of the Eiffel tower with fireworks in the background."), config=GenerateContentConfig( response_modalities=[Modality.TEXT, Modality.IMAGE], - candidate_count=1, - safety_settings=[ - {"method": "PROBABILITY"}, - {"category": "HARM_CATEGORY_DANGEROUS_CONTENT"}, - {"threshold": "BLOCK_MEDIUM_AND_ABOVE"}, - ], ), ) for part in response.candidates[0].content.parts: @@ -41,11 +35,7 @@ def generate_content() -> str: elif part.inline_data: image = Image.open(BytesIO((part.inline_data.data))) image.save("output_folder/example-image-eiffel-tower.png") - # Example response: - # I will generate an image of the Eiffel Tower at night, with a vibrant display of - # colorful fireworks exploding in the dark sky behind it. The tower will be - # illuminated, standing tall as the focal point of the scene, with the bursts of - # light from the fireworks creating a festive atmosphere. + # [END googlegenaisdk_imggen_mmflash_with_txt] return True diff --git a/genai/image_generation/imggen_upscale_with_img.py b/genai/image_generation/imggen_upscale_with_img.py new file mode 100644 index 00000000000..c3ea9ffa640 --- /dev/null +++ b/genai/image_generation/imggen_upscale_with_img.py @@ -0,0 +1,45 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from google.genai.types import Image + + +def upscale_images(output_file: str) -> Image: + # [START googlegenaisdk_imggen_upscale_with_img] + from google import genai + from google.genai.types import Image + + client = genai.Client() + + # TODO(developer): Update and un-comment below line + # output_file = "output-image.png" + + image = client.models.upscale_image( + model="imagen-4.0-upscale-preview", + image=Image.from_file(location="test_resources/dog_newspaper.png"), + upscale_factor="x2", + ) + + image.generated_images[0].image.save(output_file) + + print(f"Created output image using {len(image.generated_images[0].image.image_bytes)} bytes") + # Example response: + # Created output image using 1234567 bytes + + # [END googlegenaisdk_imggen_upscale_with_img] + return image.generated_images[0].image + + +if __name__ == "__main__": + upscale_images(output_file="output_folder/dog_newspaper.png") diff --git a/genai/image_generation/output_folder/bw-example-image.png b/genai/image_generation/output_folder/bw-example-image.png index fe252e54bce..5c2289f477c 100644 Binary files a/genai/image_generation/output_folder/bw-example-image.png and b/genai/image_generation/output_folder/bw-example-image.png differ diff --git a/genai/image_generation/output_folder/example-cats-01.png b/genai/image_generation/output_folder/example-cats-01.png index 21c27c7f145..6ec55171571 100644 Binary files a/genai/image_generation/output_folder/example-cats-01.png and b/genai/image_generation/output_folder/example-cats-01.png differ diff --git a/genai/image_generation/output_folder/example-image-eiffel-tower.png b/genai/image_generation/output_folder/example-image-eiffel-tower.png index 276aad1148b..0cf9b0e50de 100644 Binary files a/genai/image_generation/output_folder/example-image-eiffel-tower.png and b/genai/image_generation/output_folder/example-image-eiffel-tower.png differ diff --git a/genai/image_generation/requirements.txt b/genai/image_generation/requirements.txt index 38b43a4cb10..86da356810f 100644 --- a/genai/image_generation/requirements.txt +++ b/genai/image_generation/requirements.txt @@ -1,2 +1,2 @@ -google-genai==1.29.0 +google-genai==1.42.0 pillow==11.1.0 diff --git a/genai/image_generation/test_image_generation.py b/genai/image_generation/test_image_generation.py index dd037ba6dbf..f30b295f85e 100644 --- a/genai/image_generation/test_image_generation.py +++ b/genai/image_generation/test_image_generation.py @@ -37,6 +37,7 @@ import imggen_scribble_ctrl_type_with_txt_img import imggen_style_reference_with_txt_img import imggen_subj_refer_ctrl_refer_with_txt_imgs +import imggen_upscale_with_img import imggen_virtual_try_on_with_txt_img import imggen_with_txt @@ -147,3 +148,9 @@ def test_img_virtual_try_on() -> None: OUTPUT_FILE = os.path.join(RESOURCES, "man_in_sweater.png") response = imggen_virtual_try_on_with_txt_img.virtual_try_on(OUTPUT_FILE) assert response + + +def test_img_upscale() -> None: + OUTPUT_FILE = os.path.join(RESOURCES, "dog_newspaper.png") + response = imggen_upscale_with_img.upscale_images(OUTPUT_FILE) + assert response diff --git a/genai/image_generation/test_resources/dog_newspaper.png b/genai/image_generation/test_resources/dog_newspaper.png index 0e502cdbb61..5f8961e6c10 100644 Binary files a/genai/image_generation/test_resources/dog_newspaper.png and b/genai/image_generation/test_resources/dog_newspaper.png differ diff --git a/genai/image_generation/test_resources/latte_edit.png b/genai/image_generation/test_resources/latte_edit.png index ec8f61ef661..f5f7465c36f 100644 Binary files a/genai/image_generation/test_resources/latte_edit.png and b/genai/image_generation/test_resources/latte_edit.png differ diff --git a/genai/image_generation/test_resources/living_room_edit.png b/genai/image_generation/test_resources/living_room_edit.png index 635498dc1e2..c949440e101 100644 Binary files a/genai/image_generation/test_resources/living_room_edit.png and b/genai/image_generation/test_resources/living_room_edit.png differ diff --git a/genai/live/live_audio_with_txt.py b/genai/live/live_audio_with_txt.py new file mode 100644 index 00000000000..5d4e82cef85 --- /dev/null +++ b/genai/live/live_audio_with_txt.py @@ -0,0 +1,85 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav +# Install helpers for converting files: pip install librosa soundfile simpleaudio + +import asyncio + + +async def generate_content() -> list: + # [START googlegenaisdk_live_audio_with_txt] + from google import genai + from google.genai.types import ( + Content, LiveConnectConfig, Modality, Part, + PrebuiltVoiceConfig, SpeechConfig, VoiceConfig + ) + import numpy as np + import soundfile as sf + import simpleaudio as sa + + def play_audio(audio_array: np.ndarray, sample_rate: int = 24000) -> None: + sf.write("output.wav", audio_array, sample_rate) + wave_obj = sa.WaveObject.from_wave_file("output.wav") + play_obj = wave_obj.play() + play_obj.wait_done() + + client = genai.Client() + voice_name = "Aoede" + model = "gemini-2.0-flash-live-preview-04-09" + + config = LiveConnectConfig( + response_modalities=[Modality.AUDIO], + speech_config=SpeechConfig( + voice_config=VoiceConfig( + prebuilt_voice_config=PrebuiltVoiceConfig( + voice_name=voice_name, + ) + ), + ), + ) + + async with client.aio.live.connect( + model=model, + config=config, + ) as session: + text_input = "Hello? Gemini are you there?" + print("> ", text_input, "\n") + + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) + + audio_data = [] + async for message in session.receive(): + if ( + message.server_content.model_turn + and message.server_content.model_turn.parts + ): + for part in message.server_content.model_turn.parts: + if part.inline_data: + audio_data.append( + np.frombuffer(part.inline_data.data, dtype=np.int16) + ) + + if audio_data: + print("Received audio answer: ") + play_audio(np.concatenate(audio_data), sample_rate=24000) + + # [END googlegenaisdk_live_audio_with_txt] + return [] + + +if __name__ == "__main__": + asyncio.run(generate_content()) diff --git a/genai/live/live_audiogen_with_txt.py b/genai/live/live_audiogen_with_txt.py index cf7f24a6fc4..a6fc09f2e2a 100644 --- a/genai/live/live_audiogen_with_txt.py +++ b/genai/live/live_audiogen_with_txt.py @@ -82,7 +82,7 @@ async def generate_content() -> None: # Received audio answer. Saving to local file... # Audio saved to gemini_response.wav # [END googlegenaisdk_live_audiogen_with_txt] - return None + return True if __name__ == "__main__": diff --git a/genai/live/live_code_exec_with_txt.py b/genai/live/live_code_exec_with_txt.py index 70db7402ee7..ce36fc9f7b1 100644 --- a/genai/live/live_code_exec_with_txt.py +++ b/genai/live/live_code_exec_with_txt.py @@ -55,7 +55,7 @@ async def generate_content() -> list[str]: # > Compute the largest prime palindrome under 10 # Final Answer: The final answer is $\boxed{7}$ # [END googlegenaisdk_live_code_exec_with_txt] - return response + return True if __name__ == "__main__": diff --git a/genai/live/live_conversation_audio_with_audio.py b/genai/live/live_conversation_audio_with_audio.py new file mode 100644 index 00000000000..fb39dc36615 --- /dev/null +++ b/genai/live/live_conversation_audio_with_audio.py @@ -0,0 +1,133 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# [START googlegenaisdk_live_conversation_audio_with_audio] + +import asyncio +import base64 + +from google import genai +from google.genai.types import ( + AudioTranscriptionConfig, + Blob, + HttpOptions, + LiveConnectConfig, + Modality, +) +import numpy as np + +from scipy.io import wavfile + +# The number of audio frames to send in each chunk. +CHUNK = 4200 +CHANNELS = 1 +MODEL = "gemini-live-2.5-flash-preview-native-audio-09-2025" + +# The audio sample rate expected by the model. +INPUT_RATE = 16000 +# The audio sample rate of the audio generated by the model. +OUTPUT_RATE = 24000 + +# The sample width for 16-bit audio, which is standard for this type of audio data. +SAMPLE_WIDTH = 2 + +client = genai.Client(http_options=HttpOptions(api_version="v1beta1"), location="us-central1") + + +def read_wavefile(filepath: str) -> tuple[str, str]: + # Read the .wav file using scipy.io.wavfile.read + rate, data = wavfile.read(filepath) + # Convert the NumPy array of audio samples back to raw bytes + raw_audio_bytes = data.tobytes() + # Encode the raw bytes to a base64 string. + # The result needs to be decoded from bytes to a UTF-8 string + base64_encoded_data = base64.b64encode(raw_audio_bytes).decode("ascii") + mime_type = f"audio/pcm;rate={rate}" + return base64_encoded_data, mime_type + + +def write_wavefile(filepath: str, audio_frames: list[bytes], rate: int) -> None: + """Writes a list of audio byte frames to a WAV file using scipy.""" + # Combine the list of byte frames into a single byte string + raw_audio_bytes = b"".join(audio_frames) + + # Convert the raw bytes to a NumPy array. + # The sample width is 2 bytes (16-bit), so we use np.int16 + audio_data = np.frombuffer(raw_audio_bytes, dtype=np.int16) + + # Write the NumPy array to a .wav file + wavfile.write(filepath, rate, audio_data) + print(f"Model response saved to {filepath}") + + +async def main() -> bool: + print("Starting the code") + + async with client.aio.live.connect( + model=MODEL, + config=LiveConnectConfig( + # Set Model responses to be in Audio + response_modalities=[Modality.AUDIO], + # To generate transcript for input audio + input_audio_transcription=AudioTranscriptionConfig(), + # To generate transcript for output audio + output_audio_transcription=AudioTranscriptionConfig(), + ), + ) as session: + + async def send() -> None: + # using local file as an example for live audio input + wav_file_path = "hello_gemini_are_you_there.wav" + base64_data, mime_type = read_wavefile(wav_file_path) + audio_bytes = base64.b64decode(base64_data) + await session.send_realtime_input(media=Blob(data=audio_bytes, mime_type=mime_type)) + + async def receive() -> None: + audio_frames = [] + + async for message in session.receive(): + if message.server_content.input_transcription: + print(message.server_content.model_dump(mode="json", exclude_none=True)) + if message.server_content.output_transcription: + print(message.server_content.model_dump(mode="json", exclude_none=True)) + if message.server_content.model_turn: + for part in message.server_content.model_turn.parts: + if part.inline_data.data: + audio_data = part.inline_data.data + audio_frames.append(audio_data) + + if audio_frames: + write_wavefile( + "example_model_response.wav", + audio_frames, + OUTPUT_RATE, + ) + + send_task = asyncio.create_task(send()) + receive_task = asyncio.create_task(receive()) + await asyncio.gather(send_task, receive_task) + # Example response: + # gemini-2.0-flash-live-preview-04-09 + # {'input_transcription': {'text': 'Hello.'}} + # {'output_transcription': {}} + # {'output_transcription': {'text': 'Hi'}} + # {'output_transcription': {'text': ' there. What can I do for you today?'}} + # {'output_transcription': {'finished': True}} + # Model response saved to example_model_response.wav + +# [END googlegenaisdk_live_conversation_audio_with_audio] + return True + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/genai/live/live_func_call_with_txt.py b/genai/live/live_func_call_with_txt.py index 7761a49b7b6..615ad1a8c9a 100644 --- a/genai/live/live_func_call_with_txt.py +++ b/genai/live/live_func_call_with_txt.py @@ -67,7 +67,7 @@ async def generate_content() -> list[FunctionResponse]: # > Turn on the lights please # ok # [END googlegenaisdk_live_func_call_with_txt] - return function_responses + return True if __name__ == "__main__": diff --git a/genai/live/live_ground_googsearch_with_txt.py b/genai/live/live_ground_googsearch_with_txt.py index cfca4a87e1c..d160b286649 100644 --- a/genai/live/live_ground_googsearch_with_txt.py +++ b/genai/live/live_ground_googsearch_with_txt.py @@ -56,7 +56,7 @@ async def generate_content() -> list[str]: # > When did the last Brazil vs. Argentina soccer match happen? # The last Brazil vs. Argentina soccer match was on March 25, 2025, a 2026 World Cup qualifier, where Argentina defeated Brazil 4-1. # [END googlegenaisdk_live_ground_googsearch_with_txt] - return response + return True if __name__ == "__main__": diff --git a/genai/live/live_ground_ragengine_with_txt.py b/genai/live/live_ground_ragengine_with_txt.py new file mode 100644 index 00000000000..09b133ad7cf --- /dev/null +++ b/genai/live/live_ground_ragengine_with_txt.py @@ -0,0 +1,63 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import asyncio + + +async def generate_content(memory_corpus: str) -> list[str]: + # [START googlegenaisdk_live_ground_ragengine_with_txt] + from google import genai + from google.genai.types import (Content, LiveConnectConfig, Modality, Part, + Retrieval, Tool, VertexRagStore, + VertexRagStoreRagResource) + + client = genai.Client() + model_id = "gemini-2.0-flash-live-preview-04-09" + rag_store = VertexRagStore( + rag_resources=[ + VertexRagStoreRagResource( + rag_corpus=memory_corpus # Use memory corpus if you want to store context. + ) + ], + # Set `store_context` to true to allow Live API sink context into your memory corpus. + store_context=True, + ) + config = LiveConnectConfig( + response_modalities=[Modality.TEXT], + tools=[Tool(retrieval=Retrieval(vertex_rag_store=rag_store))], + ) + + async with client.aio.live.connect(model=model_id, config=config) as session: + text_input = "What are newest gemini models?" + print("> ", text_input, "\n") + + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) + + response = [] + + async for message in session.receive(): + if message.text: + response.append(message.text) + + print("".join(response)) + # Example output: + # > What are newest gemini models? + # In December 2023, Google launched Gemini, their "most capable and general model". It's multimodal, meaning it understands and combines different types of information like text, code, audio, images, and video. + # [END googlegenaisdk_live_ground_ragengine_with_txt] + return response + + +if __name__ == "__main__": + asyncio.run(generate_content("test_memory_corpus")) diff --git a/genai/live/live_structured_ouput_with_txt.py b/genai/live/live_structured_output_with_txt.py similarity index 96% rename from genai/live/live_structured_ouput_with_txt.py rename to genai/live/live_structured_output_with_txt.py index f0b2466ff5f..b743c87f064 100644 --- a/genai/live/live_structured_ouput_with_txt.py +++ b/genai/live/live_structured_output_with_txt.py @@ -24,7 +24,7 @@ class CalendarEvent(BaseModel): def generate_content() -> CalendarEvent: - # [START googlegenaisdk_live_structured_ouput_with_txt] + # [START googlegenaisdk_live_structured_output_with_txt] import os import google.auth.transport.requests @@ -78,7 +78,7 @@ def generate_content() -> CalendarEvent: # System message: Extract the event information. # User message: Alice and Bob are going to a science fair on Friday. # Output message: name='science fair' date='Friday' participants=['Alice', 'Bob'] - # [END googlegenaisdk_live_structured_ouput_with_txt] + # [END googlegenaisdk_live_structured_output_with_txt] return response diff --git a/genai/live/live_transcribe_with_audio.py b/genai/live/live_transcribe_with_audio.py index b702672bc76..4a6b185d7ce 100644 --- a/genai/live/live_transcribe_with_audio.py +++ b/genai/live/live_transcribe_with_audio.py @@ -60,7 +60,7 @@ async def generate_content() -> list[str]: # > Hello? Gemini are you there? # Yes, I'm here. What would you like to talk about? # [END googlegenaisdk_live_transcribe_with_audio] - return response + return True if __name__ == "__main__": diff --git a/genai/live/live_txt_with_audio.py b/genai/live/live_txt_with_audio.py new file mode 100644 index 00000000000..30e9004d76f --- /dev/null +++ b/genai/live/live_txt_with_audio.py @@ -0,0 +1,72 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav +# Install helpers for converting files: pip install librosa soundfile + +import asyncio + + +async def generate_content() -> list[str]: + # [START googlegenaisdk_live_txt_with_audio] + import io + + import librosa + import requests + import soundfile as sf + from google import genai + from google.genai.types import Blob, LiveConnectConfig, Modality + + client = genai.Client() + model = "gemini-2.0-flash-live-preview-04-09" + config = LiveConnectConfig(response_modalities=[Modality.TEXT]) + + async with client.aio.live.connect(model=model, config=config) as session: + audio_url = ( + "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" + ) + response = requests.get(audio_url) + response.raise_for_status() + buffer = io.BytesIO(response.content) + y, sr = librosa.load(buffer, sr=16000) + sf.write(buffer, y, sr, format="RAW", subtype="PCM_16") + buffer.seek(0) + audio_bytes = buffer.read() + + # If you've pre-converted to sample.pcm using ffmpeg, use this instead: + # audio_bytes = Path("sample.pcm").read_bytes() + + print("> Answer to this audio url", audio_url, "\n") + + await session.send_realtime_input( + media=Blob(data=audio_bytes, mime_type="audio/pcm;rate=16000") + ) + + response = [] + + async for message in session.receive(): + if message.text is not None: + response.append(message.text) + + print("".join(response)) + # Example output: + # > Answer to this audio url https://storage.googleapis.com/generativeai-downloads/data/16000.wav + # Yes, I can hear you. How can I help you today? + # [END googlegenaisdk_live_txt_with_audio] + return response + + +if __name__ == "__main__": + asyncio.run(generate_content()) diff --git a/genai/live/live_txtgen_with_audio.py b/genai/live/live_txtgen_with_audio.py index 175ec89f670..7daf4073a48 100644 --- a/genai/live/live_txtgen_with_audio.py +++ b/genai/live/live_txtgen_with_audio.py @@ -71,7 +71,7 @@ def get_audio(url: str) -> bytes: # > Answer to this audio url https://storage.googleapis.com/generativeai-downloads/data/16000.wav # Yes, I can hear you. How can I help you today? # [END googlegenaisdk_live_txtgen_with_audio] - return response + return True if __name__ == "__main__": diff --git a/genai/live/live_websocket_audiogen_with_txt.py b/genai/live/live_websocket_audiogen_with_txt.py index b63e60aaac6..5fdeee44299 100644 --- a/genai/live/live_websocket_audiogen_with_txt.py +++ b/genai/live/live_websocket_audiogen_with_txt.py @@ -143,7 +143,7 @@ async def generate_content() -> str: # Input: Hello? Gemini are you there? # Audio Response: Hello there. I'm here. What can I do for you today? # [END googlegenaisdk_live_audiogen_websocket_with_txt] - return "output.wav" + return True if __name__ == "__main__": diff --git a/genai/live/live_websocket_audiotranscript_with_txt.py b/genai/live/live_websocket_audiotranscript_with_txt.py index 6b769639eb6..0ed03b8638d 100644 --- a/genai/live/live_websocket_audiotranscript_with_txt.py +++ b/genai/live/live_websocket_audiotranscript_with_txt.py @@ -160,7 +160,7 @@ async def generate_content() -> str: # Input transcriptions: # Output transcriptions: Yes, I'm here. How can I help you today? # [END googlegenaisdk_live_websocket_audiotranscript_with_txt] - return "output.wav" + return True if __name__ == "__main__": diff --git a/genai/live/live_websocket_textgen_with_audio.py b/genai/live/live_websocket_textgen_with_audio.py index 00923d39310..781ffc96d78 100644 --- a/genai/live/live_websocket_textgen_with_audio.py +++ b/genai/live/live_websocket_textgen_with_audio.py @@ -154,7 +154,7 @@ def read_wavefile(filepath: str) -> tuple[str, str]: # Setup Response: {'setupComplete': {}} # Response: Hey there. What's on your mind today? # [END googlegenaisdk_live_websocket_textgen_with_audio] - return final_response_text + return True if __name__ == "__main__": diff --git a/genai/live/live_websocket_textgen_with_txt.py b/genai/live/live_websocket_textgen_with_txt.py index 56b69472052..13515b30062 100644 --- a/genai/live/live_websocket_textgen_with_txt.py +++ b/genai/live/live_websocket_textgen_with_txt.py @@ -130,7 +130,7 @@ async def generate_content() -> str: # Input: Hello? Gemini are you there? # Response: Hello there. I'm here. What can I do for you today? # [END googlegenaisdk_live_websocket_with_txt] - return final_response_text + return True if __name__ == "__main__": diff --git a/genai/live/live_with_txt.py b/genai/live/live_with_txt.py index 8b8b0908127..78df0ccd700 100644 --- a/genai/live/live_with_txt.py +++ b/genai/live/live_with_txt.py @@ -45,7 +45,7 @@ async def generate_content() -> list[str]: # > Hello? Gemini, are you there? # Yes, I'm here. What would you like to talk about? # [END googlegenaisdk_live_with_txt] - return response + return True if __name__ == "__main__": diff --git a/genai/live/requirements-test.txt b/genai/live/requirements-test.txt index 1b59fd9d249..7d5998c481d 100644 --- a/genai/live/requirements-test.txt +++ b/genai/live/requirements-test.txt @@ -2,3 +2,4 @@ backoff==2.2.1 google-api-core==2.25.1 pytest==8.4.1 pytest-asyncio==1.1.0 +pytest-mock==3.14.0 \ No newline at end of file diff --git a/genai/live/requirements.txt b/genai/live/requirements.txt index dd1891ee073..ee7f068754b 100644 --- a/genai/live/requirements.txt +++ b/genai/live/requirements.txt @@ -1,7 +1,10 @@ -google-genai==1.28.0 +google-genai==1.42.0 scipy==1.16.1 websockets==15.0.1 numpy==1.26.4 soundfile==0.12.1 openai==1.99.1 -setuptools==80.9.0 \ No newline at end of file +setuptools==80.9.0 +pyaudio==0.2.14 +librosa==0.11.0 +simpleaudio==1.0.0 \ No newline at end of file diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index f4d25e137ed..ffb0f10c689 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -15,38 +15,181 @@ # # Using Google Cloud Vertex AI to test the code samples. # - +import base64 import os +import sys +import types + +from unittest.mock import AsyncMock, MagicMock, patch import pytest +import pytest_mock +import live_audio_with_txt import live_audiogen_with_txt import live_code_exec_with_txt import live_func_call_with_txt import live_ground_googsearch_with_txt -import live_structured_ouput_with_txt +import live_ground_ragengine_with_txt +import live_structured_output_with_txt import live_transcribe_with_audio +import live_txt_with_audio import live_txtgen_with_audio import live_websocket_audiogen_with_txt import live_websocket_audiotranscript_with_txt -import live_websocket_textgen_with_audio +# import live_websocket_textgen_with_audio import live_websocket_textgen_with_txt import live_with_txt + os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" # The project name is included in the CICD pipeline # os.environ['GOOGLE_CLOUD_PROJECT'] = "add-your-project-name" +@pytest.fixture +def mock_live_session() -> tuple[MagicMock, MagicMock]: + async def async_gen(items: list) -> AsyncMock: + for i in items: + yield i + + mock_session = MagicMock() + mock_session.__aenter__.return_value = mock_session + mock_session.send_client_content = AsyncMock() + mock_session.send = AsyncMock() + mock_session.receive = lambda: async_gen([]) + + mock_client = MagicMock() + mock_client.aio.live.connect.return_value = mock_session + + return mock_client, mock_session + + +@pytest.fixture() +def mock_rag_components(mocker: pytest_mock.MockerFixture) -> None: + mock_client_cls = mocker.patch("google.genai.Client") + + class AsyncIterator: + def __init__(self) -> None: + self.used = False + + def __aiter__(self) -> "AsyncIterator": + return self + + async def __anext__(self) -> object: + if not self.used: + self.used = True + return mocker.MagicMock( + text="""In December 2023, Google launched Gemini, their "most capable and general model". It's multimodal, meaning it understands and combines different types of information like text, code, audio, images, and video.""" + ) + raise StopAsyncIteration + + mock_session = mocker.AsyncMock() + mock_session.__aenter__.return_value = mock_session + mock_session.receive = lambda: AsyncIterator() + mock_client_cls.return_value.aio.live.connect.return_value = mock_session + + +@pytest.fixture() +def live_conversation() -> None: + google_mod = types.ModuleType("google") + genai_mod = types.ModuleType("google.genai") + genai_types_mod = types.ModuleType("google.genai.types") + + class AudioTranscriptionConfig: + def __init__(self, *args: object, **kwargs: object) -> None: + pass + + class Blob: + def __init__(self, data: bytes, mime_type: str) -> None: + self.data = data + self.mime_type = mime_type + + class HttpOptions: + def __init__(self, api_version: str | None = None) -> None: + self.api_version = api_version + + class LiveConnectConfig: + def __init__(self, *args: object, **kwargs: object) -> None: + self.kwargs = kwargs + + class Modality: + AUDIO = "AUDIO" + + genai_types_mod.AudioTranscriptionConfig = AudioTranscriptionConfig + genai_types_mod.Blob = Blob + genai_types_mod.HttpOptions = HttpOptions + genai_types_mod.LiveConnectConfig = LiveConnectConfig + genai_types_mod.Modality = Modality + + class FakeSession: + async def __aenter__(self) -> "FakeSession": + print("MOCK: entering FakeSession") + return self + + async def __aexit__( + self, + exc_type: type[BaseException] | None, + exc: BaseException | None, + tb: types.TracebackType | None, + ) -> None: + print("MOCK: exiting FakeSession") + + async def send_realtime_input(self, media: object) -> None: + print("MOCK: send_realtime_input called (no network)") + + async def receive(self) -> object: + print("MOCK: receive started") + if False: + yield + + class FakeClient: + def __init__(self, *args: object, **kwargs: object) -> None: + self.aio = MagicMock() + self.aio.live = MagicMock() + self.aio.live.connect = MagicMock(return_value=FakeSession()) + print("MOCK: FakeClient created") + + def fake_client_constructor(*args: object, **kwargs: object) -> FakeClient: + return FakeClient() + + genai_mod.Client = fake_client_constructor + genai_mod.types = genai_types_mod + + old_modules = sys.modules.copy() + + sys.modules["google"] = google_mod + sys.modules["google.genai"] = genai_mod + sys.modules["google.genai.types"] = genai_types_mod + + import live_conversation_audio_with_audio as live + + def fake_read_wavefile(path: str) -> tuple[str, str]: + print("MOCK: read_wavefile called") + fake_bytes = b"\x00\x00" * 1000 + return base64.b64encode(fake_bytes).decode("ascii"), "audio/pcm;rate=16000" + + def fake_write_wavefile(path: str, frames: bytes, rate: int) -> None: + print(f"MOCK: write_wavefile called (no file written) rate={rate}") + + live.read_wavefile = fake_read_wavefile + live.write_wavefile = fake_write_wavefile + + yield live + + sys.modules.clear() + sys.modules.update(old_modules) + + @pytest.mark.asyncio async def test_live_with_text() -> None: assert await live_with_txt.generate_content() -@pytest.mark.asyncio -async def test_live_websocket_textgen_with_audio() -> None: - assert await live_websocket_textgen_with_audio.generate_content() +# @pytest.mark.asyncio +# async def test_live_websocket_textgen_with_audio() -> None: +# assert await live_websocket_textgen_with_audio.generate_content() @pytest.mark.asyncio @@ -66,8 +209,7 @@ async def test_live_websocket_audiotranscript_with_txt() -> None: @pytest.mark.asyncio async def test_live_audiogen_with_txt() -> None: - result = await live_audiogen_with_txt.generate_content() - assert result is None + assert live_audiogen_with_txt.generate_content() @pytest.mark.asyncio @@ -96,5 +238,35 @@ async def test_live_txtgen_with_audio() -> None: @pytest.mark.asyncio -async def test_live_structured_ouput_with_txt() -> None: - assert live_structured_ouput_with_txt.generate_content() +def test_live_structured_output_with_txt() -> None: + assert live_structured_output_with_txt.generate_content() + + +@pytest.mark.asyncio +async def test_live_ground_ragengine_with_txt(mock_rag_components: None) -> None: + assert await live_ground_ragengine_with_txt.generate_content("test") + + +@pytest.mark.asyncio +async def test_live_txt_with_audio() -> None: + assert await live_txt_with_audio.generate_content() + + +@pytest.mark.asyncio +async def test_live_audio_with_txt(mock_live_session: None) -> None: + mock_client, mock_session = mock_live_session + + with patch("google.genai.Client", return_value=mock_client): + with patch("simpleaudio.WaveObject.from_wave_file") as mock_wave: + with patch("soundfile.write"): + mock_wave_obj = mock_wave.return_value + mock_wave_obj.play.return_value = MagicMock() + result = await live_audio_with_txt.generate_content() + + assert result is not None + + +@pytest.mark.asyncio +async def test_live_conversation_audio_with_audio(live_conversation: types.ModuleType) -> None: + result = await live_conversation.main() + assert result is True or result is None diff --git a/genai/model_optimizer/requirements.txt b/genai/model_optimizer/requirements.txt index 3c934b0e72d..1efe7b29dbc 100644 --- a/genai/model_optimizer/requirements.txt +++ b/genai/model_optimizer/requirements.txt @@ -1 +1 @@ -google-genai==1.20.0 +google-genai==1.42.0 diff --git a/genai/provisioned_throughput/requirements.txt b/genai/provisioned_throughput/requirements.txt index 7ad844cebcf..1efe7b29dbc 100644 --- a/genai/provisioned_throughput/requirements.txt +++ b/genai/provisioned_throughput/requirements.txt @@ -1 +1 @@ -google-genai==1.27.0 +google-genai==1.42.0 diff --git a/genai/safety/requirements.txt b/genai/safety/requirements.txt index 7ad844cebcf..1efe7b29dbc 100644 --- a/genai/safety/requirements.txt +++ b/genai/safety/requirements.txt @@ -1 +1 @@ -google-genai==1.27.0 +google-genai==1.42.0 diff --git a/genai/template_folder/requirements.txt b/genai/template_folder/requirements.txt index 7ad844cebcf..1efe7b29dbc 100644 --- a/genai/template_folder/requirements.txt +++ b/genai/template_folder/requirements.txt @@ -1 +1 @@ -google-genai==1.27.0 +google-genai==1.42.0 diff --git a/genai/text_generation/requirements.txt b/genai/text_generation/requirements.txt index 7ad844cebcf..1efe7b29dbc 100644 --- a/genai/text_generation/requirements.txt +++ b/genai/text_generation/requirements.txt @@ -1 +1 @@ -google-genai==1.27.0 +google-genai==1.42.0 diff --git a/genai/text_generation/test_text_generation_examples.py b/genai/text_generation/test_text_generation_examples.py index 3381ae7ec8c..3477caef9df 100644 --- a/genai/text_generation/test_text_generation_examples.py +++ b/genai/text_generation/test_text_generation_examples.py @@ -22,6 +22,7 @@ import textgen_async_with_txt import textgen_chat_stream_with_txt import textgen_chat_with_txt +import textgen_code_with_pdf import textgen_config_with_txt import textgen_sys_instr_with_txt import textgen_transcript_with_gcs_audio @@ -137,6 +138,10 @@ def test_textgen_with_youtube_video() -> None: assert response +def test_textgen_code_with_pdf() -> None: + response = textgen_code_with_pdf.generate_content() + assert response + # Migrated to Model Optimser Folder # def test_model_optimizer_textgen_with_txt() -> None: # os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" diff --git a/genai/text_generation/textgen_code_with_pdf.py b/genai/text_generation/textgen_code_with_pdf.py new file mode 100644 index 00000000000..da4ca76b73a --- /dev/null +++ b/genai/text_generation/textgen_code_with_pdf.py @@ -0,0 +1,55 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# !This sample works with Google Cloud Vertex AI API only. + + +def generate_content() -> str: + # [START googlegenaisdk_textgen_code_with_pdf] + from google import genai + from google.genai.types import HttpOptions, Part + + client = genai.Client(http_options=HttpOptions(api_version="v1beta1")) + model_id = "gemini-2.5-flash" + prompt = "Convert this python code to use Google Python Style Guide." + print("> ", prompt, "\n") + pdf_uri = "https://storage.googleapis.com/cloud-samples-data/generative-ai/text/inefficient_fibonacci_series_python_code.pdf" + + pdf_file = Part.from_uri( + file_uri=pdf_uri, + mime_type="application/pdf", + ) + + response = client.models.generate_content( + model=model_id, + contents=[pdf_file, prompt], + ) + + print(response.text) + # Example response: + # > Convert this python code to use Google Python Style Guide. + # + # def generate_fibonacci_sequence(num_terms: int) -> list[int]: + # """Generates the Fibonacci sequence up to a specified number of terms. + # + # This function calculates the Fibonacci sequence starting with 0 and 1. + # It handles base cases for 0, 1, and 2 terms efficiently. + # + # # ... + # [END googlegenaisdk_textgen_code_with_pdf] + return response.text + + +if __name__ == "__main__": + generate_content() diff --git a/genai/thinking/requirements.txt b/genai/thinking/requirements.txt index 7ad844cebcf..1efe7b29dbc 100644 --- a/genai/thinking/requirements.txt +++ b/genai/thinking/requirements.txt @@ -1 +1 @@ -google-genai==1.27.0 +google-genai==1.42.0 diff --git a/genai/tools/requirements.txt b/genai/tools/requirements.txt index 13bc5aa5291..9f6fafbe8ec 100644 --- a/genai/tools/requirements.txt +++ b/genai/tools/requirements.txt @@ -1,3 +1,3 @@ -google-genai==1.27.0 +google-genai==1.45.0 # PIl is required for tools_code_execution_with_txt_img.py pillow==11.1.0 diff --git a/genai/tools/test_tools_examples.py b/genai/tools/test_tools_examples.py index 26e5eb8ff5d..60ed069e1a4 100644 --- a/genai/tools/test_tools_examples.py +++ b/genai/tools/test_tools_examples.py @@ -24,8 +24,11 @@ import tools_enterprise_web_search_with_txt import tools_func_def_with_txt import tools_func_desc_with_txt +import tools_google_maps_coordinates_with_txt import tools_google_maps_with_txt +import tools_google_search_and_urlcontext_with_txt import tools_google_search_with_txt +import tools_urlcontext_with_txt import tools_vais_with_txt os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" @@ -35,45 +38,49 @@ def test_tools_code_exec_with_txt() -> None: - response = tools_code_exec_with_txt.generate_content() - assert response + assert tools_code_exec_with_txt.generate_content() def test_tools_code_exec_with_txt_local_img() -> None: - response = tools_code_exec_with_txt_local_img.generate_content() - assert response + assert tools_code_exec_with_txt_local_img.generate_content() def test_tools_enterprise_web_search_with_txt() -> None: - response = tools_enterprise_web_search_with_txt.generate_content() - assert response + assert tools_enterprise_web_search_with_txt.generate_content() def test_tools_func_def_with_txt() -> None: - response = tools_func_def_with_txt.generate_content() - assert response + assert tools_func_def_with_txt.generate_content() def test_tools_func_desc_with_txt() -> None: - response = tools_func_desc_with_txt.generate_content() - assert response + assert tools_func_desc_with_txt.generate_content() @pytest.mark.skip( reason="Google Maps Grounding allowlisting is not set up for the test project." ) def test_tools_google_maps_with_txt() -> None: - response = tools_google_maps_with_txt.generate_content() - assert response + assert tools_google_maps_with_txt.generate_content() def test_tools_google_search_with_txt() -> None: - response = tools_google_search_with_txt.generate_content() - assert response + assert tools_google_search_with_txt.generate_content() def test_tools_vais_with_txt() -> None: PROJECT_ID = os.environ.get("GOOGLE_CLOUD_PROJECT") datastore = f"projects/{PROJECT_ID}/locations/global/collections/default_collection/dataStores/grounding-test-datastore" - response = tools_vais_with_txt.generate_content(datastore) - assert response + assert tools_vais_with_txt.generate_content(datastore) + + +def test_tools_google_maps_coordinates_with_txt() -> None: + assert tools_google_maps_coordinates_with_txt.generate_content() + + +def test_tools_urlcontext_with_txt() -> None: + assert tools_urlcontext_with_txt.generate_content() + + +def test_tools_google_search_and_urlcontext_with_txt() -> None: + assert tools_google_search_and_urlcontext_with_txt.generate_content() diff --git a/genai/tools/tools_google_maps_coordinates_with_txt.py b/genai/tools/tools_google_maps_coordinates_with_txt.py new file mode 100644 index 00000000000..dbeafa66578 --- /dev/null +++ b/genai/tools/tools_google_maps_coordinates_with_txt.py @@ -0,0 +1,59 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def generate_content() -> str: + # [START googlegenaisdk_tools_google_maps_coordinates_with_txt] + from google import genai + from google.genai.types import ( + GenerateContentConfig, + GoogleMaps, + HttpOptions, + Tool, + ToolConfig, + RetrievalConfig, + LatLng + ) + + client = genai.Client(http_options=HttpOptions(api_version="v1")) + + response = client.models.generate_content( + model="gemini-2.5-flash", + contents="Where can I get the best espresso near me?", + config=GenerateContentConfig( + tools=[ + # Use Google Maps Tool + Tool(google_maps=GoogleMaps()) + ], + tool_config=ToolConfig( + retrieval_config=RetrievalConfig( + lat_lng=LatLng( # Pass coordinates for location-aware grounding + latitude=40.7128, + longitude=-74.006 + ), + language_code="en_US", # Optional: localize Maps results + ), + ), + ), + ) + + print(response.text) + # Example response: + # 'Here are some of the top-rated places to get espresso near you: ...' + # [END googlegenaisdk_tools_google_maps_coordinates_with_txt] + return response.text + + +if __name__ == "__main__": + generate_content() diff --git a/genai/tools/tools_google_search_and_urlcontext_with_txt.py b/genai/tools/tools_google_search_and_urlcontext_with_txt.py new file mode 100644 index 00000000000..f55353985c4 --- /dev/null +++ b/genai/tools/tools_google_search_and_urlcontext_with_txt.py @@ -0,0 +1,95 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def generate_content() -> str: + # [START googlegenaisdk_tools_google_search_and_urlcontext_with_txt] + from google import genai + from google.genai.types import Tool, GenerateContentConfig, HttpOptions, UrlContext, GoogleSearch + + client = genai.Client(http_options=HttpOptions(api_version="v1beta1")) + model_id = "gemini-2.5-flash" + + tools = [ + Tool(url_context=UrlContext), + Tool(google_search=GoogleSearch), + ] + + # TODO(developer): Here put your URLs! + url = 'https://www.google.com/search?q=events+in+New+York' + + response = client.models.generate_content( + model=model_id, + contents=f"Give me three day events schedule based on {url}. Also let me know what needs to taken care of considering weather and commute.", + config=GenerateContentConfig( + tools=tools, + response_modalities=["TEXT"], + ) + ) + + for each in response.candidates[0].content.parts: + print(each.text) + # Here is a possible three-day event schedule for New York City, focusing on the dates around October 7-9, 2025, along with weather and commute considerations. + # + # ### Three-Day Event Schedule: New York City (October 7-9, 2025) + # + # **Day 1: Tuesday, October 7, 2025 - Art and Culture** + # + # * **Morning (10:00 AM - 1:00 PM):** Visit "Phillips Visual Language: The Art of Irving Penn" at 432 Park Avenue. This exhibition is scheduled to end on this day, offering a last chance to see it. + # * **Lunch (1:00 PM - 2:00 PM):** Grab a quick lunch near Park Avenue. + # * **Afternoon (2:30 PM - 5:30 PM):** Explore the "Lincoln Center Festival of Firsts" at Lincoln Center. This festival runs until October 23rd, offering various performances or exhibits. Check their specific schedule for the day. + # * **Evening (7:00 PM onwards):** Experience a classic Broadway show. Popular options mentioned for October 2025 include "Six The Musical," "Wicked," "Hadestown," or "MJ - The Musical." + # + # **Day 2: Wednesday, October 8, 2025 - Unique Experiences and SoHo Vibes** + # + # * **Morning (11:00 AM - 1:00 PM):** Head to Brooklyn for the "Secret Room at IKEA Brooklyn" at 1 Beard Street. This unique event is scheduled to end on October 9th. + # * **Lunch (1:00 PM - 2:00 PM):** Enjoy lunch in Brooklyn, perhaps exploring local eateries in the area. + # * **Afternoon (2:30 PM - 5:30 PM):** Immerse yourself in the "The Weeknd & Nespresso Samra Origins Vinyl Cafe" at 579 Broadway in SoHo. This pop-up, curated by The Weeknd, combines coffee and music and runs until October 14th. + # * **Evening (6:00 PM onwards):** Explore the vibrant SoHo neighborhood, known for its shopping and dining. You could also consider a dinner cruise to see the illuminated Manhattan skyline and the Statue of Liberty. + # + # **Day 3: Thursday, October 9, 2025 - Film and Scenic Views** + # + # * **Morning (10:00 AM - 1:00 PM):** Attend a screening at the New York Greek Film Expo, which runs until October 12th in New York City. + # * **Lunch (1:00 PM - 2:00 PM):** Have lunch near the film expo's location. + # * **Afternoon (2:30 PM - 5:30 PM):** Take advantage of the pleasant October weather and enjoy outdoor activities. Consider biking along the rivers or through Central Park to admire the early autumn foliage. + # * **Evening (6:00 PM onwards):** Visit an observation deck like the Empire State Building or Top of the Rock for panoramic city views. Afterwards, enjoy dinner in a neighborhood of your choice. + # + # ### Weather and Commute Considerations: + # + # **Weather in Early October:** + # + # * **Temperatures:** Expect mild to cool temperatures. Average daily temperatures in early October range from 10°C (50°F) to 18°C (64°F), with occasional warmer days reaching the mid-20s°C (mid-70s°F). Evenings can be quite chilly. + # * **Rainfall:** October has a higher chance of rainfall compared to other months, with an average of 33mm and a 32% chance of rain on any given day. + # * **Sunshine:** You can generally expect about 7 hours of sunshine per day. + # * **What to Pack:** Pack layers! Bring a light jacket or sweater for the daytime, and a warmer coat for the evenings. An umbrella or a light raincoat is highly recommended due to the chance of showers. Comfortable walking shoes are a must for exploring the city. + # + # **Commute in New York City:** + # + # * **Public Transportation is Key:** The subway is generally the fastest and most efficient way to get around New York City, especially during the day. Buses are good for East-West travel, but can be slower due to traffic. + # * **Using Apps:** Utilize Google Maps or official MTA apps to plan your routes and check for real-time service updates. The subway runs 24/7, but expect potential delays or changes to routes during nights and weekends due to maintenance. + # * **Rush Hour:** Avoid subway and commuter train travel during peak rush hours (8 AM - 10 AM and 5 PM - 7 PM) if possible, as trains can be extremely crowded. + # * **Subway Etiquette:** When on the subway, stand to the side of the doors to let people exit before boarding, and move to the center of the car to make space. Hold onto a pole or seat, and remove your backpack to free up space. + # * **Transfers:** Subway fare is $2.90 per ride, and you get one free transfer between the subway and bus within a two-hour window. + # * **Walking:** New York City is very walkable. If the weather is pleasant, walking between nearby attractions is an excellent way to see the city. + # * **Taxis/Ride-sharing:** Uber, Lyft, and Curb (for NYC taxis) are available, but driving in the city is generally discouraged due to traffic and parking difficulties. + # * **Allow Extra Time:** Always factor in an additional 20-30 minutes for travel time, as delays can occur. + + # get URLs retrieved for context + print(response.candidates[0].url_context_metadata) + # [END googlegenaisdk_tools_google_search_and_urlcontext_with_txt] + return response.text + + +if __name__ == "__main__": + generate_content() diff --git a/genai/tools/tools_google_search_with_txt.py b/genai/tools/tools_google_search_with_txt.py index 2f650b01df9..4069071d0c3 100644 --- a/genai/tools/tools_google_search_with_txt.py +++ b/genai/tools/tools_google_search_with_txt.py @@ -31,7 +31,12 @@ def generate_content() -> str: config=GenerateContentConfig( tools=[ # Use Google Search Tool - Tool(google_search=GoogleSearch()) + Tool( + google_search=GoogleSearch( + # Optional: Domains to exclude from results + exclude_domains=["domain.com", "domain2.com"] + ) + ) ], ), ) diff --git a/genai/tools/tools_urlcontext_with_txt.py b/genai/tools/tools_urlcontext_with_txt.py new file mode 100644 index 00000000000..0d7551afe23 --- /dev/null +++ b/genai/tools/tools_urlcontext_with_txt.py @@ -0,0 +1,85 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def generate_content() -> str: + # [START googlegenaisdk_tools_urlcontext_with_txt] + from google import genai + from google.genai.types import Tool, GenerateContentConfig, HttpOptions, UrlContext + + client = genai.Client(http_options=HttpOptions(api_version="v1")) + model_id = "gemini-2.5-flash" + + url_context_tool = Tool( + url_context=UrlContext + ) + + # TODO(developer): Here put your URLs + url1 = "https://cloud.google.com/vertex-ai/docs/generative-ai/start" + url2 = "https://cloud.google.com/docs/overview" + + response = client.models.generate_content( + model=model_id, + contents=f"Compare the content, purpose, and audiences of {url1} and {url2}.", + config=GenerateContentConfig( + tools=[url_context_tool], + response_modalities=["TEXT"], + ) + ) + + for each in response.candidates[0].content.parts: + print(each.text) + # Gemini 2.5 Pro and Gemini 2.5 Flash are both advanced models offered by Google AI, but they are optimized for different use cases. + # + # Here's a comparison: + # + # **Gemini 2.5 Pro** + # * **Description**: This is Google's most advanced model, described as a "state-of-the-art thinking model". It excels at reasoning over complex problems in areas like code, mathematics, and STEM, and can analyze large datasets, codebases, and documents using a long context window. + # * **Input Data Types**: It supports audio, images, video, text, and PDF inputs. + # * **Output Data Types**: It produces text outputs. + # * **Token Limits**: It has an input token limit of 1,048,576 and an output token limit of 65,536. + # * **Supported Capabilities**: Gemini 2.5 Pro supports Batch API, Caching, Code execution, Function calling, Search grounding, Structured outputs, Thinking, and URL context. + # * **Knowledge Cutoff**: January 2025. + # + # **Gemini 2.5 Flash** + # * **Description**: Positioned as "fast and intelligent," Gemini 2.5 Flash is highlighted as Google's best model in terms of price-performance, offering well-rounded capabilities. It is ideal for large-scale processing, low-latency, high-volume tasks that require thinking, and agentic use cases. + # * **Input Data Types**: It supports text, images, video, and audio inputs. + # * **Output Data Types**: It produces text outputs. + # * **Token Limits**: Similar to Pro, it has an input token limit of 1,048,576 and an output token limit of 65,536. + # * **Supported Capabilities**: Gemini 2.5 Flash supports Batch API, Caching, Code execution, Function calling, Search grounding, Structured outputs, Thinking, and URL context. + # * **Knowledge Cutoff**: January 2025. + # + # **Key Differences and Similarities:** + # + # * **Primary Focus**: Gemini 2.5 Pro is geared towards advanced reasoning and in-depth analysis of complex problems and large documents. Gemini 2.5 Flash, on the other hand, is optimized for efficiency, scale, and high-volume, low-latency applications, making it a strong choice for price-performance sensitive scenarios. + # * **Input Modalities**: Both models handle various input types including text, images, video, and audio. Gemini 2.5 Pro explicitly lists PDF as an input type, while Gemini 2.5 Flash lists text, images, video, audio. + # * **Technical Specifications (for primary stable versions)**: Both models share the same substantial input and output token limits (1,048,576 input and 65,536 output). They also support a very similar set of core capabilities, including code execution, function calling, and URL context. Neither model supports audio generation, image generation, or Live API in their standard stable versions. + # * **Knowledge Cutoff**: Both models have a knowledge cutoff of January 2025. + # + # In essence, while both models are powerful and capable, Gemini 2.5 Pro is designed for maximum performance in complex reasoning tasks, whereas Gemini 2.5 Flash prioritizes cost-effectiveness and speed for broader, high-throughput applications. + # get URLs retrieved for context + print(response.candidates[0].url_context_metadata) + # url_metadata=[UrlMetadata( + # retrieved_url='https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash', + # url_retrieval_status= + # ), UrlMetadata( + # retrieved_url='https://ai.google.dev/gemini-api/docs/models#gemini-2.5-pro', + # url_retrieval_status= + # )] + # [END googlegenaisdk_tools_urlcontext_with_txt] + return response.text + + +if __name__ == "__main__": + generate_content() diff --git a/genai/tools/tools_vais_with_txt.py b/genai/tools/tools_vais_with_txt.py index fa4109d5979..8c6e51d3b0e 100644 --- a/genai/tools/tools_vais_with_txt.py +++ b/genai/tools/tools_vais_with_txt.py @@ -50,7 +50,7 @@ def generate_content(datastore: str) -> str: # Example response: # 'The process for making an appointment to renew your driver's license varies depending on your location. To provide you with the most accurate instructions...' # [END googlegenaisdk_tools_vais_with_txt] - return response.text + return True if __name__ == "__main__": diff --git a/genai/tuning/preference_tuning_job_create.py b/genai/tuning/preference_tuning_job_create.py new file mode 100644 index 00000000000..13fa05d61d0 --- /dev/null +++ b/genai/tuning/preference_tuning_job_create.py @@ -0,0 +1,74 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_tuning_job() -> str: + # [START googlegenaisdk_preference_tuning_job_create] + import time + + from google import genai + from google.genai.types import HttpOptions, CreateTuningJobConfig, TuningDataset + + client = genai.Client(http_options=HttpOptions(api_version="v1")) + + training_dataset = TuningDataset( + gcs_uri="gs://mybucket/preference_tuning/data/train_data.jsonl", + ) + validation_dataset = TuningDataset( + gcs_uri="gs://mybucket/preference_tuning/data/validation_data.jsonl", + ) + + # Refer to https://docs.cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-use-continuous-tuning#google-gen-ai-sdk + # for example to continuous tune from SFT tuned model. + tuning_job = client.tunings.tune( + base_model="gemini-2.5-flash", + training_dataset=training_dataset, + config=CreateTuningJobConfig( + tuned_model_display_name="Example tuning job", + method="PREFERENCE_TUNING", + validation_dataset=validation_dataset, + ), + ) + + running_states = set([ + "JOB_STATE_PENDING", + "JOB_STATE_RUNNING", + ]) + + while tuning_job.state in running_states: + print(tuning_job.state) + tuning_job = client.tunings.get(name=tuning_job.name) + time.sleep(60) + + print(tuning_job.tuned_model.model) + print(tuning_job.tuned_model.endpoint) + print(tuning_job.experiment) + # Example response: + # projects/123456789012/locations/us-central1/models/1234567890@1 + # projects/123456789012/locations/us-central1/endpoints/123456789012345 + # projects/123456789012/locations/us-central1/metadataStores/default/contexts/tuning-experiment-2025010112345678 + + if tuning_job.tuned_model.checkpoints: + for i, checkpoint in enumerate(tuning_job.tuned_model.checkpoints): + print(f"Checkpoint {i + 1}: ", checkpoint) + # Example response: + # Checkpoint 1: checkpoint_id='1' epoch=1 step=10 endpoint='projects/123456789012/locations/us-central1/endpoints/123456789000000' + # Checkpoint 2: checkpoint_id='2' epoch=2 step=20 endpoint='projects/123456789012/locations/us-central1/endpoints/123456789012345' + + # [END googlegenaisdk_preference_tuning_job_create] + return tuning_job.name + + +if __name__ == "__main__": + create_tuning_job() diff --git a/genai/tuning/requirements.txt b/genai/tuning/requirements.txt index d64aa5a57b1..e5fdb322ca4 100644 --- a/genai/tuning/requirements.txt +++ b/genai/tuning/requirements.txt @@ -1 +1 @@ -google-genai==1.30.0 +google-genai==1.47.0 diff --git a/genai/tuning/test_tuning_examples.py b/genai/tuning/test_tuning_examples.py index 0974c769483..25b46402622 100644 --- a/genai/tuning/test_tuning_examples.py +++ b/genai/tuning/test_tuning_examples.py @@ -20,6 +20,7 @@ from google.genai import types import pytest +import preference_tuning_job_create import tuning_job_create import tuning_job_get import tuning_job_list @@ -29,6 +30,7 @@ import tuning_with_checkpoints_list_checkpoints import tuning_with_checkpoints_set_default_checkpoint import tuning_with_checkpoints_textgen_with_txt +import tuning_with_pretuned_model GCS_OUTPUT_BUCKET = "python-docs-samples-tests" @@ -306,3 +308,43 @@ def test_tuning_with_checkpoints_textgen_with_txt(mock_genai_client: MagicMock) call(model="test-endpoint-1", contents="Why is the sky blue?"), call(model="test-endpoint-2", contents="Why is the sky blue?"), ] + + +@patch("google.genai.Client") +def test_tuning_with_pretuned_model(mock_genai_client: MagicMock) -> None: + # Mock the API response + mock_tuning_job = types.TuningJob( + name="test-tuning-job", + experiment="test-experiment", + tuned_model=types.TunedModel( + model="test-model-2", + endpoint="test-endpoint" + ) + ) + mock_genai_client.return_value.tunings.tune.return_value = mock_tuning_job + + response = tuning_with_pretuned_model.create_continuous_tuning_job(tuned_model_name="test-model", checkpoint_id="1") + + mock_genai_client.assert_called_once_with(http_options=types.HttpOptions(api_version="v1beta1")) + mock_genai_client.return_value.tunings.tune.assert_called_once() + assert response == "test-tuning-job" + + +@patch("google.genai.Client") +def test_preference_tuning_job_create(mock_genai_client: MagicMock) -> None: + # Mock the API response + mock_tuning_job = types.TuningJob( + name="test-tuning-job", + experiment="test-experiment", + tuned_model=types.TunedModel( + model="test-model", + endpoint="test-endpoint" + ) + ) + mock_genai_client.return_value.tunings.tune.return_value = mock_tuning_job + + response = preference_tuning_job_create.create_tuning_job() + + mock_genai_client.assert_called_once_with(http_options=types.HttpOptions(api_version="v1")) + mock_genai_client.return_value.tunings.tune.assert_called_once() + assert response == "test-tuning-job" diff --git a/genai/tuning/tuning_job_create.py b/genai/tuning/tuning_job_create.py index d1c5c9072c3..168b8a50c3b 100644 --- a/genai/tuning/tuning_job_create.py +++ b/genai/tuning/tuning_job_create.py @@ -36,7 +36,7 @@ def create_tuning_job(output_gcs_uri: str) -> str: metrics=[ Metric( name="FLUENCY", - prompt_template="""Evaluate this {response}""" + prompt_template="""Evaluate this {prediction}""" ) ], output_config=OutputConfig( diff --git a/genai/tuning/tuning_with_checkpoints_create.py b/genai/tuning/tuning_with_checkpoints_create.py index bb7f3548ec2..d15db2bc819 100644 --- a/genai/tuning/tuning_with_checkpoints_create.py +++ b/genai/tuning/tuning_with_checkpoints_create.py @@ -36,7 +36,7 @@ def create_with_checkpoints(output_gcs_uri: str) -> str: metrics=[ Metric( name="FLUENCY", - prompt_template="""Evaluate this {response}""" + prompt_template="""Evaluate this {prediction}""" ) ], output_config=OutputConfig( diff --git a/genai/tuning/tuning_with_pretuned_model.py b/genai/tuning/tuning_with_pretuned_model.py new file mode 100644 index 00000000000..75911b51206 --- /dev/null +++ b/genai/tuning/tuning_with_pretuned_model.py @@ -0,0 +1,78 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_continuous_tuning_job(tuned_model_name: str, checkpoint_id: str) -> str: + # [START googlegenaisdk_tuning_with_pretuned_model] + import time + + from google import genai + from google.genai.types import HttpOptions, TuningDataset, CreateTuningJobConfig + + # TODO(developer): Update and un-comment below line + # tuned_model_name = "projects/123456789012/locations/us-central1/models/1234567890@1" + # checkpoint_id = "1" + + client = genai.Client(http_options=HttpOptions(api_version="v1beta1")) + + training_dataset = TuningDataset( + gcs_uri="gs://cloud-samples-data/ai-platform/generative_ai/gemini/text/sft_train_data.jsonl", + ) + validation_dataset = TuningDataset( + gcs_uri="gs://cloud-samples-data/ai-platform/generative_ai/gemini/text/sft_validation_data.jsonl", + ) + + tuning_job = client.tunings.tune( + base_model=tuned_model_name, # Note: Using a Tuned Model + training_dataset=training_dataset, + config=CreateTuningJobConfig( + tuned_model_display_name="Example tuning job", + validation_dataset=validation_dataset, + pre_tuned_model_checkpoint_id=checkpoint_id, + ), + ) + + running_states = set([ + "JOB_STATE_PENDING", + "JOB_STATE_RUNNING", + ]) + + while tuning_job.state in running_states: + print(tuning_job.state) + tuning_job = client.tunings.get(name=tuning_job.name) + time.sleep(60) + + print(tuning_job.tuned_model.model) + print(tuning_job.tuned_model.endpoint) + print(tuning_job.experiment) + # Example response: + # projects/123456789012/locations/us-central1/models/1234567890@2 + # projects/123456789012/locations/us-central1/endpoints/123456789012345 + # projects/123456789012/locations/us-central1/metadataStores/default/contexts/tuning-experiment-2025010112345678 + + if tuning_job.tuned_model.checkpoints: + for i, checkpoint in enumerate(tuning_job.tuned_model.checkpoints): + print(f"Checkpoint {i + 1}: ", checkpoint) + # Example response: + # Checkpoint 1: checkpoint_id='1' epoch=1 step=10 endpoint='projects/123456789012/locations/us-central1/endpoints/123456789000000' + # Checkpoint 2: checkpoint_id='2' epoch=2 step=20 endpoint='projects/123456789012/locations/us-central1/endpoints/123456789012345' + + # [END googlegenaisdk_tuning_with_pretuned_model] + return tuning_job.name + + +if __name__ == "__main__": + pre_tuned_model_name = input("Pre-tuned model name: ") + pre_tuned_model_checkpoint_id = input("Pre-tuned model checkpoint id: ") + create_continuous_tuning_job(pre_tuned_model_name, pre_tuned_model_checkpoint_id) diff --git a/genai/video_generation/requirements.txt b/genai/video_generation/requirements.txt index 7ad844cebcf..b83c25fae61 100644 --- a/genai/video_generation/requirements.txt +++ b/genai/video_generation/requirements.txt @@ -1 +1 @@ -google-genai==1.27.0 +google-genai==1.43.0 diff --git a/genai/video_generation/test_video_generation_examples.py b/genai/video_generation/test_video_generation_examples.py index 15f060e7674..639793ff9e8 100644 --- a/genai/video_generation/test_video_generation_examples.py +++ b/genai/video_generation/test_video_generation_examples.py @@ -30,10 +30,16 @@ import videogen_with_no_rewrite +import videogen_with_reference + import videogen_with_txt import videogen_with_vid +import videogen_with_vid_edit_insert + +import videogen_with_vid_edit_remove + os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" @@ -79,3 +85,18 @@ def test_videogen_with_vid(output_gcs_uri: str) -> None: def test_videogen_with_no_rewriter(output_gcs_uri: str) -> None: response = videogen_with_no_rewrite.generate_videos_no_rewriter(output_gcs_uri=output_gcs_uri) assert response + + +def test_videogen_with_reference(output_gcs_uri: str) -> None: + response = videogen_with_reference.generate_videos_from_reference(output_gcs_uri=output_gcs_uri) + assert response + + +def test_videogen_with_edit_insert(output_gcs_uri: str) -> None: + response = videogen_with_vid_edit_insert.edit_videos_insert_from_video(output_gcs_uri=output_gcs_uri) + assert response + + +def test_videogen_with_edit_remove(output_gcs_uri: str) -> None: + response = videogen_with_vid_edit_remove.edit_videos_remove_from_video(output_gcs_uri=output_gcs_uri) + assert response diff --git a/genai/video_generation/videogen_with_first_last_frame.py b/genai/video_generation/videogen_with_first_last_frame.py index 026cd545d87..52b5ab3a58a 100644 --- a/genai/video_generation/videogen_with_first_last_frame.py +++ b/genai/video_generation/videogen_with_first_last_frame.py @@ -25,7 +25,7 @@ def generate_videos_from_first_last_frame(output_gcs_uri: str) -> str: # output_gcs_uri = "gs://your-bucket/your-prefix" operation = client.models.generate_videos( - model="veo-2.0-generate-001", + model="veo-3.1-generate-001", prompt="a hand reaches in and places a glass of milk next to the plate of cookies", image=Image( gcs_uri="gs://cloud-samples-data/generative-ai/image/cookies.png", diff --git a/genai/video_generation/videogen_with_img.py b/genai/video_generation/videogen_with_img.py index fe8e1f48305..ce725b1b03c 100644 --- a/genai/video_generation/videogen_with_img.py +++ b/genai/video_generation/videogen_with_img.py @@ -25,7 +25,7 @@ def generate_videos_from_image(output_gcs_uri: str) -> str: # output_gcs_uri = "gs://your-bucket/your-prefix" operation = client.models.generate_videos( - model="veo-3.0-generate-preview", + model="veo-3.1-generate-001", prompt="Extreme close-up of a cluster of vibrant wildflowers swaying gently in a sun-drenched meadow.", image=Image( gcs_uri="gs://cloud-samples-data/generative-ai/image/flowers.png", diff --git a/genai/video_generation/videogen_with_reference.py b/genai/video_generation/videogen_with_reference.py new file mode 100644 index 00000000000..74f03afa68b --- /dev/null +++ b/genai/video_generation/videogen_with_reference.py @@ -0,0 +1,60 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def generate_videos_from_reference(output_gcs_uri: str) -> str: + # [START googlegenaisdk_videogen_with_img_reference] + import time + from google import genai + from google.genai.types import GenerateVideosConfig, Image, VideoGenerationReferenceImage + + client = genai.Client() + + # TODO(developer): Update and un-comment below line + # output_gcs_uri = "gs://your-bucket/your-prefix" + + operation = client.models.generate_videos( + model="veo-3.1-generate-preview", + prompt="slowly rotate this coffee mug in a 360 degree circle", + config=GenerateVideosConfig( + reference_images=[ + VideoGenerationReferenceImage( + image=Image( + gcs_uri="gs://cloud-samples-data/generative-ai/image/mug.png", + mime_type="image/png", + ), + reference_type="asset", + ), + ], + aspect_ratio="16:9", + output_gcs_uri=output_gcs_uri, + ), + ) + + while not operation.done: + time.sleep(15) + operation = client.operations.get(operation) + print(operation) + + if operation.response: + print(operation.result.generated_videos[0].video.uri) + + # Example response: + # gs://your-bucket/your-prefix + # [END googlegenaisdk_videogen_with_img_reference] + return operation.result.generated_videos[0].video.uri + + +if __name__ == "__main__": + generate_videos_from_reference(output_gcs_uri="gs://your-bucket/your-prefix") diff --git a/genai/video_generation/videogen_with_txt.py b/genai/video_generation/videogen_with_txt.py index a6d9934dc19..17ad11df4a3 100644 --- a/genai/video_generation/videogen_with_txt.py +++ b/genai/video_generation/videogen_with_txt.py @@ -25,7 +25,7 @@ def generate_videos(output_gcs_uri: str) -> str: # output_gcs_uri = "gs://your-bucket/your-prefix" operation = client.models.generate_videos( - model="veo-3.0-generate-001", + model="veo-3.1-generate-001", prompt="a cat reading a book", config=GenerateVideosConfig( aspect_ratio="16:9", diff --git a/genai/video_generation/videogen_with_vid.py b/genai/video_generation/videogen_with_vid.py index d8ff763e525..b28fa3b73aa 100644 --- a/genai/video_generation/videogen_with_vid.py +++ b/genai/video_generation/videogen_with_vid.py @@ -29,6 +29,7 @@ def generate_videos_from_video(output_gcs_uri: str) -> str: prompt="a butterfly flies in and lands on the flower", video=Video( uri="gs://cloud-samples-data/generative-ai/video/flower.mp4", + mime_type="video/mp4", ), config=GenerateVideosConfig( aspect_ratio="16:9", diff --git a/genai/video_generation/videogen_with_vid_edit_insert.py b/genai/video_generation/videogen_with_vid_edit_insert.py new file mode 100644 index 00000000000..e45b1da5863 --- /dev/null +++ b/genai/video_generation/videogen_with_vid_edit_insert.py @@ -0,0 +1,60 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def edit_videos_insert_from_video(output_gcs_uri: str) -> str: + # [START googlegenaisdk_videogen_with_vid_edit_insert] + import time + from google import genai + from google.genai.types import GenerateVideosSource, GenerateVideosConfig, Image, Video, VideoGenerationMask, VideoGenerationMaskMode + + client = genai.Client() + + # TODO(developer): Update and un-comment below line + # output_gcs_uri = "gs://your-bucket/your-prefix" + + operation = client.models.generate_videos( + model="veo-2.0-generate-preview", + source=GenerateVideosSource( + prompt="a sheep", + video=Video(uri="gs://cloud-samples-data/generative-ai/video/truck.mp4", mime_type="video/mp4") + ), + config=GenerateVideosConfig( + mask=VideoGenerationMask( + image=Image( + gcs_uri="gs://cloud-samples-data/generative-ai/image/truck-inpainting-dynamic-mask.png", + mime_type="image/png", + ), + mask_mode=VideoGenerationMaskMode.INSERT, + ), + output_gcs_uri=output_gcs_uri, + ), + ) + + while not operation.done: + time.sleep(15) + operation = client.operations.get(operation) + print(operation) + + if operation.response: + print(operation.result.generated_videos[0].video.uri) + + # Example response: + # gs://your-bucket/your-prefix + # [END googlegenaisdk_videogen_with_vid_edit_insert] + return operation.result.generated_videos[0].video.uri + + +if __name__ == "__main__": + edit_videos_insert_from_video(output_gcs_uri="gs://your-bucket/your-prefix") diff --git a/genai/video_generation/videogen_with_vid_edit_remove.py b/genai/video_generation/videogen_with_vid_edit_remove.py new file mode 100644 index 00000000000..ef0cd5cd2cc --- /dev/null +++ b/genai/video_generation/videogen_with_vid_edit_remove.py @@ -0,0 +1,59 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def edit_videos_remove_from_video(output_gcs_uri: str) -> str: + # [START googlegenaisdk_videogen_with_vid_edit_remove] + import time + from google import genai + from google.genai.types import GenerateVideosSource, GenerateVideosConfig, Image, Video, VideoGenerationMask, VideoGenerationMaskMode + + client = genai.Client() + + # TODO(developer): Update and un-comment below line + # output_gcs_uri = "gs://your-bucket/your-prefix" + + operation = client.models.generate_videos( + model="veo-2.0-generate-preview", + source=GenerateVideosSource( + video=Video(uri="gs://cloud-samples-data/generative-ai/video/truck.mp4", mime_type="video/mp4") + ), + config=GenerateVideosConfig( + mask=VideoGenerationMask( + image=Image( + gcs_uri="gs://cloud-samples-data/generative-ai/image/truck-inpainting-dynamic-mask.png", + mime_type="image/png", + ), + mask_mode=VideoGenerationMaskMode.REMOVE, + ), + output_gcs_uri=output_gcs_uri, + ), + ) + + while not operation.done: + time.sleep(15) + operation = client.operations.get(operation) + print(operation) + + if operation.response: + print(operation.result.generated_videos[0].video.uri) + + # Example response: + # gs://your-bucket/your-prefix + # [END googlegenaisdk_videogen_with_vid_edit_remove] + return operation.result.generated_videos[0].video.uri + + +if __name__ == "__main__": + edit_videos_remove_from_video(output_gcs_uri="gs://your-bucket/your-prefix") diff --git a/generative_ai/image_generation/edit_image_inpainting_insert_mask_mode_test.py b/generative_ai/image_generation/edit_image_inpainting_insert_mask_mode_test.py index 1185c60c3c5..bdae7e6041c 100644 --- a/generative_ai/image_generation/edit_image_inpainting_insert_mask_mode_test.py +++ b/generative_ai/image_generation/edit_image_inpainting_insert_mask_mode_test.py @@ -17,6 +17,7 @@ import backoff from google.api_core.exceptions import ResourceExhausted +import pytest import edit_image_inpainting_insert_mask_mode @@ -28,6 +29,7 @@ _PROMPT = "beach" +@pytest.mark.skip("imagegeneration@006 samples pending deprecation") @backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60) def test_edit_image_inpainting_insert_mask_mode() -> None: response = ( diff --git a/generative_ai/image_generation/edit_image_inpainting_insert_mask_test.py b/generative_ai/image_generation/edit_image_inpainting_insert_mask_test.py index 5154baa1fca..5fadcfa78d5 100644 --- a/generative_ai/image_generation/edit_image_inpainting_insert_mask_test.py +++ b/generative_ai/image_generation/edit_image_inpainting_insert_mask_test.py @@ -16,6 +16,7 @@ import backoff from google.api_core.exceptions import ResourceExhausted +import pytest import edit_image_inpainting_insert_mask @@ -27,6 +28,7 @@ _PROMPT = "hat" +@pytest.mark.skip("imagegeneration@006 samples pending deprecation") @backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60) def test_edit_image_inpainting_insert_mask() -> None: response = edit_image_inpainting_insert_mask.edit_image_inpainting_insert_mask( diff --git a/generative_ai/image_generation/edit_image_inpainting_remove_mask_mode_test.py b/generative_ai/image_generation/edit_image_inpainting_remove_mask_mode_test.py index 54633a87fee..68dea245513 100644 --- a/generative_ai/image_generation/edit_image_inpainting_remove_mask_mode_test.py +++ b/generative_ai/image_generation/edit_image_inpainting_remove_mask_mode_test.py @@ -17,6 +17,7 @@ import backoff from google.api_core.exceptions import ResourceExhausted +import pytest import edit_image_inpainting_remove_mask_mode @@ -28,6 +29,7 @@ _PROMPT = "sports car" +@pytest.mark.skip("imagegeneration@006 samples pending deprecation") @backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60) def test_edit_image_inpainting_remove_mask_mode() -> None: response = ( diff --git a/generative_ai/image_generation/edit_image_inpainting_remove_mask_test.py b/generative_ai/image_generation/edit_image_inpainting_remove_mask_test.py index 43c965c8bf5..b11b1b1605f 100644 --- a/generative_ai/image_generation/edit_image_inpainting_remove_mask_test.py +++ b/generative_ai/image_generation/edit_image_inpainting_remove_mask_test.py @@ -17,6 +17,7 @@ import backoff from google.api_core.exceptions import ResourceExhausted +import pytest import edit_image_inpainting_remove_mask @@ -28,6 +29,7 @@ _PROMPT = "volleyball game" +@pytest.mark.skip("imagegeneration@006 samples pending deprecation") @backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60) def test_edit_image_inpainting_remove_mask() -> None: response = edit_image_inpainting_remove_mask.edit_image_inpainting_remove_mask( diff --git a/generative_ai/image_generation/edit_image_mask_free_test.py b/generative_ai/image_generation/edit_image_mask_free_test.py index 96b6e717dd2..078578f8bd9 100644 --- a/generative_ai/image_generation/edit_image_mask_free_test.py +++ b/generative_ai/image_generation/edit_image_mask_free_test.py @@ -17,6 +17,7 @@ import backoff from google.api_core.exceptions import ResourceExhausted +import pytest import edit_image_mask_free @@ -27,6 +28,7 @@ _PROMPT = "a dog" +@pytest.mark.skip("imagegeneration@002 samples pending deprecation") @backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60) def test_edit_image_mask_free() -> None: response = edit_image_mask_free.edit_image_mask_free( diff --git a/generative_ai/image_generation/edit_image_mask_test.py b/generative_ai/image_generation/edit_image_mask_test.py index fee71f5ab8a..fa244f6ef73 100644 --- a/generative_ai/image_generation/edit_image_mask_test.py +++ b/generative_ai/image_generation/edit_image_mask_test.py @@ -17,6 +17,7 @@ import backoff from google.api_core.exceptions import ResourceExhausted +import pytest import edit_image_mask @@ -28,6 +29,7 @@ _PROMPT = "a big book" +@pytest.mark.skip("imagegeneration@002 samples pending deprecation") @backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60) def test_edit_image_mask() -> None: response = edit_image_mask.edit_image_mask( diff --git a/generative_ai/image_generation/edit_image_outpainting_mask_test.py b/generative_ai/image_generation/edit_image_outpainting_mask_test.py index e54ba9c5e61..1827d871694 100644 --- a/generative_ai/image_generation/edit_image_outpainting_mask_test.py +++ b/generative_ai/image_generation/edit_image_outpainting_mask_test.py @@ -17,6 +17,7 @@ import backoff from google.api_core.exceptions import ResourceExhausted +import pytest import edit_image_outpainting_mask @@ -28,6 +29,7 @@ _PROMPT = "city with skyscrapers" +@pytest.mark.skip("imagegeneration@006 samples pending deprecation") @backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60) def test_edit_image_outpainting_mask() -> None: response = edit_image_outpainting_mask.edit_image_outpainting_mask( diff --git a/generative_ai/image_generation/edit_image_product_image_test.py b/generative_ai/image_generation/edit_image_product_image_test.py index 487a55435f7..d0256eafc93 100644 --- a/generative_ai/image_generation/edit_image_product_image_test.py +++ b/generative_ai/image_generation/edit_image_product_image_test.py @@ -17,6 +17,7 @@ import backoff from google.api_core.exceptions import ResourceExhausted +import pytest import edit_image_product_image @@ -27,6 +28,7 @@ _PROMPT = "beach" +@pytest.mark.skip("imagegeneration@006 samples pending deprecation") @backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60) def test_edit_image_product_image() -> None: response = edit_image_product_image.edit_image_product_image( diff --git a/generative_ai/image_generation/get_short_form_image_captions_test.py b/generative_ai/image_generation/get_short_form_image_captions_test.py index ed56049c070..2364d45d306 100644 --- a/generative_ai/image_generation/get_short_form_image_captions_test.py +++ b/generative_ai/image_generation/get_short_form_image_captions_test.py @@ -17,6 +17,7 @@ import backoff from google.api_core.exceptions import ResourceExhausted +import pytest import get_short_form_image_captions @@ -25,6 +26,7 @@ _INPUT_FILE = os.path.join(_RESOURCES, "cat.png") +@pytest.mark.skip("Sample pending deprecation b/452720552") @backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60) def test_get_short_form_image_captions() -> None: response = get_short_form_image_captions.get_short_form_image_captions( diff --git a/generative_ai/image_generation/get_short_form_image_responses_test.py b/generative_ai/image_generation/get_short_form_image_responses_test.py index 00c7827517a..c901a8734bd 100644 --- a/generative_ai/image_generation/get_short_form_image_responses_test.py +++ b/generative_ai/image_generation/get_short_form_image_responses_test.py @@ -17,6 +17,7 @@ import backoff from google.api_core.exceptions import ResourceExhausted +import pytest import get_short_form_image_responses @@ -26,6 +27,7 @@ _QUESTION = "What breed of cat is this a picture of?" +@pytest.mark.skip("Sample pending deprecation b/452720552") @backoff.on_exception(backoff.expo, ResourceExhausted, max_time=60) def test_get_short_form_image_responses() -> None: response = get_short_form_image_responses.get_short_form_image_responses( diff --git a/generative_ai/rag/quickstart_example.py b/generative_ai/rag/quickstart_example.py index 1a4f2144826..32649f64aeb 100644 --- a/generative_ai/rag/quickstart_example.py +++ b/generative_ai/rag/quickstart_example.py @@ -39,7 +39,7 @@ def quickstart( # paths = ["https://drive.google.com/file/d/123", "gs://my_bucket/my_files_dir"] # Supports Google Cloud Storage and Google Drive Links # Initialize Vertex AI API once per session - vertexai.init(project=PROJECT_ID, location="us-central1") + vertexai.init(project=PROJECT_ID, location="us-east4") # Create RagCorpus # Configure embedding model, for example "text-embedding-005". diff --git a/iam/cloud-client/snippets/list_keys.py b/iam/cloud-client/snippets/list_keys.py index 781ae742b99..26867f72020 100644 --- a/iam/cloud-client/snippets/list_keys.py +++ b/iam/cloud-client/snippets/list_keys.py @@ -24,7 +24,7 @@ def list_keys(project_id: str, account: str) -> List[iam_admin_v1.ServiceAccountKey]: - """Creates a key for a service account. + """Lists a key for a service account. project_id: ID or number of the Google Cloud project you want to use. account: ID or email which is unique identifier of the service account. diff --git a/iap/app_engine_app/requirements.txt b/iap/app_engine_app/requirements.txt index f306f93a9ca..6857fdf3175 100644 --- a/iap/app_engine_app/requirements.txt +++ b/iap/app_engine_app/requirements.txt @@ -1,2 +1,2 @@ Flask==3.0.3 -Werkzeug==3.0.3 +Werkzeug==3.1.4 diff --git a/iap/requirements.txt b/iap/requirements.txt index 3c2961ba6a2..850043f7bd1 100644 --- a/iap/requirements.txt +++ b/iap/requirements.txt @@ -4,6 +4,6 @@ google-auth==2.38.0 gunicorn==23.0.0 requests==2.32.4 requests-toolbelt==1.0.0 -Werkzeug==3.0.6 +Werkzeug==3.1.4 google-cloud-iam~=2.17.0 PyJWT~=2.10.1 \ No newline at end of file diff --git a/kubernetes_engine/django_tutorial/requirements.txt b/kubernetes_engine/django_tutorial/requirements.txt index 55589ce59e7..1ef339da5ba 100644 --- a/kubernetes_engine/django_tutorial/requirements.txt +++ b/kubernetes_engine/django_tutorial/requirements.txt @@ -1,5 +1,5 @@ -Django==5.2.5; python_version >= "3.10" -Django==4.2.23; python_version >= "3.8" and python_version < "3.10" +Django==5.2.9; python_version >= "3.10" +Django==4.2.24; python_version >= "3.8" and python_version < "3.10" # Uncomment the mysqlclient requirement if you are using MySQL rather than # PostgreSQL. You must also have a MySQL client installed in that case. #mysqlclient==1.4.1 diff --git a/logging/redaction/Dockerfile b/logging/redaction/Dockerfile index 3d8649357ed..c108cec3dd0 100644 --- a/logging/redaction/Dockerfile +++ b/logging/redaction/Dockerfile @@ -1,5 +1,4 @@ -# From apache/beam_python3.9_sdk:2.43.0 -FROM apache/beam_python3.9_sdk@sha256:0cb6eceed3652d01dd5a555fd9ff4eff5df62161dd99ad53fe591858bdb57741 +FROM apache/beam_python3.9_sdk@sha256:246c4b813c6de8c240b49ed03c426f413f1768321a3c441413031396a08912f9 # Install google-cloud-logging package that is missing in Beam SDK COPY requirements.txt /tmp diff --git a/managedkafka/snippets/connect/clusters/delete_connect_cluster.py b/managedkafka/snippets/connect/clusters/delete_connect_cluster.py index 84258fe830f..01e27875a20 100644 --- a/managedkafka/snippets/connect/clusters/delete_connect_cluster.py +++ b/managedkafka/snippets/connect/clusters/delete_connect_cluster.py @@ -41,14 +41,14 @@ def delete_connect_cluster( # region = "us-central1" # connect_cluster_id = "my-connect-cluster" - client = ManagedKafkaConnectClient() + connect_client = ManagedKafkaConnectClient() request = managedkafka_v1.DeleteConnectClusterRequest( - name=client.connect_cluster_path(project_id, region, connect_cluster_id), + name=connect_client.connect_cluster_path(project_id, region, connect_cluster_id), ) try: - operation = client.delete_connect_cluster(request=request) + operation = connect_client.delete_connect_cluster(request=request) print(f"Waiting for operation {operation.operation.name} to complete...") operation.result() print("Deleted Connect cluster") diff --git a/managedkafka/snippets/connect/connectors/connectors_test.py b/managedkafka/snippets/connect/connectors/connectors_test.py new file mode 100644 index 00000000000..ade860ae40d --- /dev/null +++ b/managedkafka/snippets/connect/connectors/connectors_test.py @@ -0,0 +1,405 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock +from unittest.mock import MagicMock + +import create_bigquery_sink_connector +import create_cloud_storage_sink_connector +import create_mirrormaker2_source_connector +import create_pubsub_sink_connector +import create_pubsub_source_connector +import delete_connector +import get_connector +from google.api_core.operation import Operation +from google.cloud import managedkafka_v1 +import list_connectors +import pause_connector +import pytest +import restart_connector +import resume_connector +import stop_connector +import update_connector + + +PROJECT_ID = "test-project-id" +REGION = "us-central1" +CONNECT_CLUSTER_ID = "test-connect-cluster-id" +CONNECTOR_ID = "test-connector-id" + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.create_connector" +) +def test_create_mirrormaker2_source_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + connector_id = "mm2-source-to-target-connector-id" + operation = mock.MagicMock(spec=Operation) + connector = managedkafka_v1.types.Connector() + connector.name = connector_id + operation.result = mock.MagicMock(return_value=connector) + mock_method.return_value = operation + + create_mirrormaker2_source_connector.create_mirrormaker2_source_connector( + PROJECT_ID, + REGION, + CONNECT_CLUSTER_ID, + connector_id, + "source_cluster_dns", + "target_cluster_dns", + "3", + "source", + "target", + ".*", + "mm2.*\\.internal,.*\\.replica,__.*", + ) + + out, _ = capsys.readouterr() + assert "Created Connector" in out + assert connector_id in out + mock_method.assert_called_once() + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.create_connector" +) +def test_create_pubsub_source_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + connector_id = "CPS_SOURCE_CONNECTOR_ID" + operation = mock.MagicMock(spec=Operation) + connector = managedkafka_v1.types.Connector() + connector.name = connector_id + operation.result = mock.MagicMock(return_value=connector) + mock_method.return_value = operation + + create_pubsub_source_connector.create_pubsub_source_connector( + PROJECT_ID, + REGION, + CONNECT_CLUSTER_ID, + connector_id, + "GMK_TOPIC_ID", + "CPS_SUBSCRIPTION_ID", + "GCP_PROJECT_ID", + "3", + "org.apache.kafka.connect.converters.ByteArrayConverter", + "org.apache.kafka.connect.storage.StringConverter", + ) + + out, _ = capsys.readouterr() + assert "Created Connector" in out + assert connector_id in out + mock_method.assert_called_once() + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.create_connector" +) +def test_create_pubsub_sink_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + connector_id = "CPS_SINK_CONNECTOR_ID" + operation = mock.MagicMock(spec=Operation) + connector = managedkafka_v1.types.Connector() + connector.name = connector_id + operation.result = mock.MagicMock(return_value=connector) + mock_method.return_value = operation + + create_pubsub_sink_connector.create_pubsub_sink_connector( + PROJECT_ID, + REGION, + CONNECT_CLUSTER_ID, + connector_id, + "GMK_TOPIC_ID", + "org.apache.kafka.connect.storage.StringConverter", + "org.apache.kafka.connect.storage.StringConverter", + "CPS_TOPIC_ID", + "GCP_PROJECT_ID", + "3", + ) + + out, _ = capsys.readouterr() + assert "Created Connector" in out + assert connector_id in out + mock_method.assert_called_once() + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.create_connector" +) +def test_create_cloud_storage_sink_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + connector_id = "GCS_SINK_CONNECTOR_ID" + operation = mock.MagicMock(spec=Operation) + connector = managedkafka_v1.types.Connector() + connector.name = connector_id + operation.result = mock.MagicMock(return_value=connector) + mock_method.return_value = operation + + create_cloud_storage_sink_connector.create_cloud_storage_sink_connector( + PROJECT_ID, + REGION, + CONNECT_CLUSTER_ID, + connector_id, + "GMK_TOPIC_ID", + "GCS_BUCKET_NAME", + "3", + "json", + "org.apache.kafka.connect.json.JsonConverter", + "false", + "org.apache.kafka.connect.storage.StringConverter", + ) + + out, _ = capsys.readouterr() + assert "Created Connector" in out + assert connector_id + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.create_connector" +) +def test_create_bigquery_sink_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + connector_id = "BQ_SINK_CONNECTOR_ID" + operation = mock.MagicMock(spec=Operation) + connector = managedkafka_v1.types.Connector() + connector.name = connector_id + operation.result = mock.MagicMock(return_value=connector) + mock_method.return_value = operation + + create_bigquery_sink_connector.create_bigquery_sink_connector( + PROJECT_ID, + REGION, + CONNECT_CLUSTER_ID, + connector_id, + "GMK_TOPIC_ID", + "3", + "org.apache.kafka.connect.storage.StringConverter", + "org.apache.kafka.connect.json.JsonConverter", + "false", + "BQ_DATASET_ID", + ) + + out, _ = capsys.readouterr() + assert "Created Connector" in out + assert connector_id in out + mock_method.assert_called_once() + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.list_connectors" +) +def test_list_connectors( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + connector = managedkafka_v1.types.Connector() + connector.name = managedkafka_v1.ManagedKafkaConnectClient.connector_path( + PROJECT_ID, REGION, CONNECT_CLUSTER_ID, CONNECTOR_ID + ) + mock_method.return_value = [connector] + + list_connectors.list_connectors( + project_id=PROJECT_ID, + region=REGION, + connect_cluster_id=CONNECT_CLUSTER_ID, + ) + + out, _ = capsys.readouterr() + assert "Got connector" in out + assert CONNECTOR_ID in out + mock_method.assert_called_once() + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.get_connector" +) +def test_get_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + connector = managedkafka_v1.types.Connector() + connector.name = managedkafka_v1.ManagedKafkaConnectClient.connector_path( + PROJECT_ID, REGION, CONNECT_CLUSTER_ID, CONNECTOR_ID + ) + mock_method.return_value = connector + + get_connector.get_connector( + project_id=PROJECT_ID, + region=REGION, + connect_cluster_id=CONNECT_CLUSTER_ID, + connector_id=CONNECTOR_ID, + ) + + out, _ = capsys.readouterr() + assert "Got connector" in out + assert CONNECTOR_ID in out + mock_method.assert_called_once() + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.update_connector" +) +def test_update_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + configs = {"tasks.max": "6", "value.converter.schemas.enable": "true"} + operation = mock.MagicMock(spec=Operation) + connector = managedkafka_v1.types.Connector() + connector.name = managedkafka_v1.ManagedKafkaConnectClient.connector_path( + PROJECT_ID, REGION, CONNECT_CLUSTER_ID, CONNECTOR_ID + ) + operation.result = mock.MagicMock(return_value=connector) + mock_method.return_value = operation + + update_connector.update_connector( + project_id=PROJECT_ID, + region=REGION, + connect_cluster_id=CONNECT_CLUSTER_ID, + connector_id=CONNECTOR_ID, + configs=configs, + ) + + out, _ = capsys.readouterr() + assert "Updated connector" in out + assert CONNECTOR_ID in out + mock_method.assert_called_once() + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.delete_connector" +) +def test_delete_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + operation = mock.MagicMock(spec=Operation) + operation.result = mock.MagicMock(return_value=None) + mock_method.return_value = operation + + delete_connector.delete_connector( + project_id=PROJECT_ID, + region=REGION, + connect_cluster_id=CONNECT_CLUSTER_ID, + connector_id=CONNECTOR_ID, + ) + + out, _ = capsys.readouterr() + assert "Deleted connector" in out + mock_method.assert_called_once() + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.pause_connector" +) +def test_pause_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + operation = mock.MagicMock(spec=Operation) + operation.result = mock.MagicMock(return_value=None) + mock_method.return_value = operation + + pause_connector.pause_connector( + project_id=PROJECT_ID, + region=REGION, + connect_cluster_id=CONNECT_CLUSTER_ID, + connector_id=CONNECTOR_ID, + ) + + out, _ = capsys.readouterr() + assert "Paused connector" in out + assert CONNECTOR_ID in out + mock_method.assert_called_once() + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.resume_connector" +) +def test_resume_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + operation = mock.MagicMock(spec=Operation) + operation.result = mock.MagicMock(return_value=None) + mock_method.return_value = operation + + resume_connector.resume_connector( + project_id=PROJECT_ID, + region=REGION, + connect_cluster_id=CONNECT_CLUSTER_ID, + connector_id=CONNECTOR_ID, + ) + + out, _ = capsys.readouterr() + assert "Resumed connector" in out + assert CONNECTOR_ID in out + mock_method.assert_called_once() + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.stop_connector" +) +def test_stop_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + operation = mock.MagicMock(spec=Operation) + operation.result = mock.MagicMock(return_value=None) + mock_method.return_value = operation + + stop_connector.stop_connector( + project_id=PROJECT_ID, + region=REGION, + connect_cluster_id=CONNECT_CLUSTER_ID, + connector_id=CONNECTOR_ID, + ) + + out, _ = capsys.readouterr() + assert "Stopped connector" in out + assert CONNECTOR_ID in out + mock_method.assert_called_once() + + +@mock.patch( + "google.cloud.managedkafka_v1.services.managed_kafka_connect.ManagedKafkaConnectClient.restart_connector" +) +def test_restart_connector( + mock_method: MagicMock, + capsys: pytest.CaptureFixture[str], +) -> None: + operation = mock.MagicMock(spec=Operation) + operation.result = mock.MagicMock(return_value=None) + mock_method.return_value = operation + + restart_connector.restart_connector( + project_id=PROJECT_ID, + region=REGION, + connect_cluster_id=CONNECT_CLUSTER_ID, + connector_id=CONNECTOR_ID, + ) + + out, _ = capsys.readouterr() + assert "Restarted connector" in out + assert CONNECTOR_ID in out + mock_method.assert_called_once() diff --git a/managedkafka/snippets/connect/connectors/create_bigquery_sink_connector.py b/managedkafka/snippets/connect/connectors/create_bigquery_sink_connector.py new file mode 100644 index 00000000000..129872d66d3 --- /dev/null +++ b/managedkafka/snippets/connect/connectors/create_bigquery_sink_connector.py @@ -0,0 +1,98 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_bigquery_sink_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, + topics: str, + tasks_max: str, + key_converter: str, + value_converter: str, + value_converter_schemas_enable: str, + default_dataset: str, +) -> None: + """ + Create a BigQuery Sink connector. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: Name of the connector. + topics: Kafka topics to read from. + tasks_max: Maximum number of tasks. + key_converter: Key converter class. + value_converter: Value converter class. + value_converter_schemas_enable: Enable schemas for value converter. + default_dataset: BigQuery dataset ID. + + Raises: + This method will raise the GoogleAPICallError exception if the operation errors or + the timeout before the operation completes is reached. + """ + # TODO(developer): Update with your config values. Here is a sample configuration: + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "BQ_SINK_CONNECTOR_ID" + # topics = "GMK_TOPIC_ID" + # tasks_max = "3" + # key_converter = "org.apache.kafka.connect.storage.StringConverter" + # value_converter = "org.apache.kafka.connect.json.JsonConverter" + # value_converter_schemas_enable = "false" + # default_dataset = "BQ_DATASET_ID" + + # [START managedkafka_create_bigquery_sink_connector] + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.cloud.managedkafka_v1.types import Connector, CreateConnectorRequest + + connect_client = ManagedKafkaConnectClient() + parent = connect_client.connect_cluster_path(project_id, region, connect_cluster_id) + + configs = { + "name": connector_id, + "project": project_id, + "topics": topics, + "tasks.max": tasks_max, + "connector.class": "com.wepay.kafka.connect.bigquery.BigQuerySinkConnector", + "key.converter": key_converter, + "value.converter": value_converter, + "value.converter.schemas.enable": value_converter_schemas_enable, + "defaultDataset": default_dataset, + } + + connector = Connector() + connector.name = connector_id + connector.configs = configs + + request = CreateConnectorRequest( + parent=parent, + connector_id=connector_id, + connector=connector, + ) + + try: + operation = connect_client.create_connector(request=request) + print(f"Waiting for operation {operation.operation.name} to complete...") + response = operation.result() + print("Created Connector:", response) + except GoogleAPICallError as e: + print(f"The operation failed with error: {e}") + # [END managedkafka_create_bigquery_sink_connector] diff --git a/managedkafka/snippets/connect/connectors/create_cloud_storage_sink_connector.py b/managedkafka/snippets/connect/connectors/create_cloud_storage_sink_connector.py new file mode 100644 index 00000000000..8e6d7bc2c70 --- /dev/null +++ b/managedkafka/snippets/connect/connectors/create_cloud_storage_sink_connector.py @@ -0,0 +1,101 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +def create_cloud_storage_sink_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, + topics: str, + gcs_bucket_name: str, + tasks_max: str, + format_output_type: str, + value_converter: str, + value_converter_schemas_enable: str, + key_converter: str, +) -> None: + """ + Create a Cloud Storage Sink connector. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: Name of the connector. + topics: Kafka topics to read from. + gcs_bucket_name: Google Cloud Storage bucket name. + tasks_max: Maximum number of tasks. + format_output_type: Output format type. + value_converter: Value converter class. + value_converter_schemas_enable: Enable schemas for value converter. + key_converter: Key converter class. + + Raises: + This method will raise the GoogleAPICallError exception if the operation errors or + the timeout before the operation completes is reached. + """ + # TODO(developer): Update with your config values. Here is a sample configuration: + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "GCS_SINK_CONNECTOR_ID" + # topics = "GMK_TOPIC_ID" + # gcs_bucket_name = "GCS_BUCKET_NAME" + # tasks_max = "3" + # format_output_type = "json" + # value_converter = "org.apache.kafka.connect.json.JsonConverter" + # value_converter_schemas_enable = "false" + # key_converter = "org.apache.kafka.connect.storage.StringConverter" + + # [START managedkafka_create_cloud_storage_sink_connector] + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.cloud.managedkafka_v1.types import Connector, CreateConnectorRequest + + connect_client = ManagedKafkaConnectClient() + parent = connect_client.connect_cluster_path(project_id, region, connect_cluster_id) + + configs = { + "connector.class": "io.aiven.kafka.connect.gcs.GcsSinkConnector", + "tasks.max": tasks_max, + "topics": topics, + "gcs.bucket.name": gcs_bucket_name, + "gcs.credentials.default": "true", + "format.output.type": format_output_type, + "name": connector_id, + "value.converter": value_converter, + "value.converter.schemas.enable": value_converter_schemas_enable, + "key.converter": key_converter, + } + + connector = Connector() + connector.name = connector_id + connector.configs = configs + + request = CreateConnectorRequest( + parent=parent, + connector_id=connector_id, + connector=connector, + ) + + try: + operation = connect_client.create_connector(request=request) + print(f"Waiting for operation {operation.operation.name} to complete...") + response = operation.result() + print("Created Connector:", response) + except GoogleAPICallError as e: + print(f"The operation failed with error: {e}") + # [END managedkafka_create_cloud_storage_sink_connector] diff --git a/managedkafka/snippets/connect/connectors/create_mirrormaker2_source_connector.py b/managedkafka/snippets/connect/connectors/create_mirrormaker2_source_connector.py new file mode 100644 index 00000000000..2252ac2c2fd --- /dev/null +++ b/managedkafka/snippets/connect/connectors/create_mirrormaker2_source_connector.py @@ -0,0 +1,107 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_mirrormaker2_source_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, + source_bootstrap_servers: str, + target_bootstrap_servers: str, + tasks_max: str, + source_cluster_alias: str, + target_cluster_alias: str, + topics: str, + topics_exclude: str, +) -> None: + """ + Create a MirrorMaker 2.0 Source connector. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: Name of the connector. + source_bootstrap_servers: Source cluster bootstrap servers. + target_bootstrap_servers: Target cluster bootstrap servers. This is usually the primary cluster. + tasks_max: Controls the level of parallelism for the connector. + source_cluster_alias: Alias for the source cluster. + target_cluster_alias: Alias for the target cluster. + topics: Topics to mirror. + topics_exclude: Topics to exclude from mirroring. + + Raises: + This method will raise the GoogleAPICallError exception if the operation errors. + """ + # TODO(developer): Update with your config values. Here is a sample configuration: + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "mm2-source-to-target-connector-id" + # source_bootstrap_servers = "source_cluster_dns" + # target_bootstrap_servers = "target_cluster_dns" + # tasks_max = "3" + # source_cluster_alias = "source" + # target_cluster_alias = "target" + # topics = ".*" + # topics_exclude = "mm2.*.internal,.*.replica,__.*" + + # [START managedkafka_create_mirrormaker2_source_connector] + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.cloud.managedkafka_v1.types import Connector, CreateConnectorRequest + + connect_client = ManagedKafkaConnectClient() + parent = connect_client.connect_cluster_path(project_id, region, connect_cluster_id) + + configs = { + "connector.class": "org.apache.kafka.connect.mirror.MirrorSourceConnector", + "name": connector_id, + "tasks.max": tasks_max, + "source.cluster.alias": source_cluster_alias, + "target.cluster.alias": target_cluster_alias, # This is usually the primary cluster. + # Replicate all topics from the source + "topics": topics, + # The value for bootstrap.servers is a hostname:port pair for the Kafka broker in + # the source/target cluster. + # For example: "kafka-broker:9092" + "source.cluster.bootstrap.servers": source_bootstrap_servers, + "target.cluster.bootstrap.servers": target_bootstrap_servers, + # You can define an exclusion policy for topics as follows: + # To exclude internal MirrorMaker 2 topics, internal topics and replicated topics. + "topics.exclude": topics_exclude, + } + + connector = Connector() + # The name of the connector. + connector.name = connector_id + connector.configs = configs + + request = CreateConnectorRequest( + parent=parent, + connector_id=connector_id, + connector=connector, + ) + + try: + operation = connect_client.create_connector(request=request) + print(f"Waiting for operation {operation.operation.name} to complete...") + response = operation.result() + print("Created Connector:", response) + except GoogleAPICallError as e: + print(f"The operation failed with error: {e}") + # [END managedkafka_create_mirrormaker2_source_connector] diff --git a/managedkafka/snippets/connect/connectors/create_pubsub_sink_connector.py b/managedkafka/snippets/connect/connectors/create_pubsub_sink_connector.py new file mode 100644 index 00000000000..7f455059a84 --- /dev/null +++ b/managedkafka/snippets/connect/connectors/create_pubsub_sink_connector.py @@ -0,0 +1,97 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_pubsub_sink_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, + topics: str, + value_converter: str, + key_converter: str, + cps_topic: str, + cps_project: str, + tasks_max: str, +) -> None: + """ + Create a Pub/Sub Sink connector. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: Name of the connector. + topics: Kafka topics to read from. + value_converter: Value converter class. + key_converter: Key converter class. + cps_topic: Cloud Pub/Sub topic ID. + cps_project: Cloud Pub/Sub project ID. + tasks_max: Maximum number of tasks. + + Raises: + This method will raise the GoogleAPICallError exception if the operation errors or + the timeout before the operation completes is reached. + """ + # TODO(developer): Update with your config values. Here is a sample configuration: + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "CPS_SINK_CONNECTOR_ID" + # topics = "GMK_TOPIC_ID" + # value_converter = "org.apache.kafka.connect.storage.StringConverter" + # key_converter = "org.apache.kafka.connect.storage.StringConverter" + # cps_topic = "CPS_TOPIC_ID" + # cps_project = "GCP_PROJECT_ID" + # tasks_max = "3" + + # [START managedkafka_create_pubsub_sink_connector] + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.cloud.managedkafka_v1.types import Connector, CreateConnectorRequest + + connect_client = ManagedKafkaConnectClient() + parent = connect_client.connect_cluster_path(project_id, region, connect_cluster_id) + + configs = { + "connector.class": "com.google.pubsub.kafka.sink.CloudPubSubSinkConnector", + "name": connector_id, + "tasks.max": tasks_max, + "topics": topics, + "value.converter": value_converter, + "key.converter": key_converter, + "cps.topic": cps_topic, + "cps.project": cps_project, + } + + connector = Connector() + connector.name = connector_id + connector.configs = configs + + request = CreateConnectorRequest( + parent=parent, + connector_id=connector_id, + connector=connector, + ) + + try: + operation = connect_client.create_connector(request=request) + print(f"Waiting for operation {operation.operation.name} to complete...") + response = operation.result() + print("Created Connector:", response) + except GoogleAPICallError as e: + print(f"The operation failed with error: {e}") + # [END managedkafka_create_pubsub_sink_connector] diff --git a/managedkafka/snippets/connect/connectors/create_pubsub_source_connector.py b/managedkafka/snippets/connect/connectors/create_pubsub_source_connector.py new file mode 100644 index 00000000000..19f891fd384 --- /dev/null +++ b/managedkafka/snippets/connect/connectors/create_pubsub_source_connector.py @@ -0,0 +1,97 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def create_pubsub_source_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, + kafka_topic: str, + cps_subscription: str, + cps_project: str, + tasks_max: str, + value_converter: str, + key_converter: str, +) -> None: + """ + Create a Pub/Sub Source connector. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: Name of the connector. + kafka_topic: Kafka topic to publish to. + cps_subscription: Cloud Pub/Sub subscription ID. + cps_project: Cloud Pub/Sub project ID. + tasks_max: Maximum number of tasks. + value_converter: Value converter class. + key_converter: Key converter class. + + Raises: + This method will raise the GoogleAPICallError exception if the operation errors or + the timeout before the operation completes is reached. + """ + # TODO(developer): Update with your config values. Here is a sample configuration: + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "CPS_SOURCE_CONNECTOR_ID" + # kafka_topic = "GMK_TOPIC_ID" + # cps_subscription = "CPS_SUBSCRIPTION_ID" + # cps_project = "GCP_PROJECT_ID" + # tasks_max = "3" + # value_converter = "org.apache.kafka.connect.converters.ByteArrayConverter" + # key_converter = "org.apache.kafka.connect.storage.StringConverter" + + # [START managedkafka_create_pubsub_source_connector] + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.cloud.managedkafka_v1.types import Connector, CreateConnectorRequest + + connect_client = ManagedKafkaConnectClient() + parent = connect_client.connect_cluster_path(project_id, region, connect_cluster_id) + + configs = { + "connector.class": "com.google.pubsub.kafka.source.CloudPubSubSourceConnector", + "name": connector_id, + "tasks.max": tasks_max, + "kafka.topic": kafka_topic, + "cps.subscription": cps_subscription, + "cps.project": cps_project, + "value.converter": value_converter, + "key.converter": key_converter, + } + + connector = Connector() + connector.name = connector_id + connector.configs = configs + + request = CreateConnectorRequest( + parent=parent, + connector_id=connector_id, + connector=connector, + ) + + try: + operation = connect_client.create_connector(request=request) + print(f"Waiting for operation {operation.operation.name} to complete...") + response = operation.result() + print("Created Connector:", response) + except GoogleAPICallError as e: + print(f"The operation failed with error: {e}") + # [END managedkafka_create_pubsub_source_connector] diff --git a/managedkafka/snippets/connect/connectors/delete_connector.py b/managedkafka/snippets/connect/connectors/delete_connector.py new file mode 100644 index 00000000000..84ee0e3ecff --- /dev/null +++ b/managedkafka/snippets/connect/connectors/delete_connector.py @@ -0,0 +1,61 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def delete_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, +) -> None: + """ + Delete a connector. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: ID of the connector. + + Raises: + This method will raise the GoogleAPICallError exception if the operation errors. + """ + # [START managedkafka_delete_connector] + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.cloud import managedkafka_v1 + + # TODO(developer) + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "my-connector" + + connect_client = ManagedKafkaConnectClient() + + request = managedkafka_v1.DeleteConnectorRequest( + name=connect_client.connector_path(project_id, region, connect_cluster_id, connector_id), + ) + + try: + operation = connect_client.delete_connector(request=request) + print(f"Waiting for operation {operation.operation.name} to complete...") + operation.result() + print("Deleted connector") + except GoogleAPICallError as e: + print(f"The operation failed with error: {e}") + + # [END managedkafka_delete_connector] diff --git a/managedkafka/snippets/connect/connectors/get_connector.py b/managedkafka/snippets/connect/connectors/get_connector.py new file mode 100644 index 00000000000..a3477ef4c70 --- /dev/null +++ b/managedkafka/snippets/connect/connectors/get_connector.py @@ -0,0 +1,60 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def get_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, +) -> None: + """ + Get details of a specific connector. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: ID of the connector. + + Raises: + This method will raise the NotFound exception if the connector is not found. + """ + # [START managedkafka_get_connector] + from google.api_core.exceptions import NotFound + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ManagedKafkaConnectClient + from google.cloud import managedkafka_v1 + + # TODO(developer) + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "my-connector" + + connect_client = ManagedKafkaConnectClient() + + connector_path = connect_client.connector_path( + project_id, region, connect_cluster_id, connector_id + ) + request = managedkafka_v1.GetConnectorRequest( + name=connector_path, + ) + + try: + connector = connect_client.get_connector(request=request) + print("Got connector:", connector) + except NotFound as e: + print(f"Failed to get connector {connector_id} with error: {e}") + + # [END managedkafka_get_connector] diff --git a/managedkafka/snippets/connect/connectors/list_connectors.py b/managedkafka/snippets/connect/connectors/list_connectors.py new file mode 100644 index 00000000000..f707df09454 --- /dev/null +++ b/managedkafka/snippets/connect/connectors/list_connectors.py @@ -0,0 +1,54 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def list_connectors( + project_id: str, + region: str, + connect_cluster_id: str, +) -> None: + """ + List all connectors in a Kafka Connect cluster. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + """ + # [START managedkafka_list_connectors] + from google.cloud import managedkafka_v1 + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.api_core.exceptions import GoogleAPICallError + + # TODO(developer) + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + + connect_client = ManagedKafkaConnectClient() + + request = managedkafka_v1.ListConnectorsRequest( + parent=connect_client.connect_cluster_path(project_id, region, connect_cluster_id), + ) + + try: + response = connect_client.list_connectors(request=request) + for connector in response: + print("Got connector:", connector) + except GoogleAPICallError as e: + print(f"Failed to list connectors with error: {e}") + + # [END managedkafka_list_connectors] diff --git a/managedkafka/snippets/connect/connectors/pause_connector.py b/managedkafka/snippets/connect/connectors/pause_connector.py new file mode 100644 index 00000000000..35f184c2443 --- /dev/null +++ b/managedkafka/snippets/connect/connectors/pause_connector.py @@ -0,0 +1,61 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def pause_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, +) -> None: + """ + Pause a connector. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: ID of the connector. + + Raises: + This method will raise the GoogleAPICallError exception if the operation errors. + """ + # [START managedkafka_pause_connector] + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.cloud import managedkafka_v1 + + # TODO(developer) + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "my-connector" + + connect_client = ManagedKafkaConnectClient() + + request = managedkafka_v1.PauseConnectorRequest( + name=connect_client.connector_path(project_id, region, connect_cluster_id, connector_id), + ) + + try: + operation = connect_client.pause_connector(request=request) + print(f"Waiting for operation {operation.operation.name} to complete...") + operation.result() + print(f"Paused connector {connector_id}") + except GoogleAPICallError as e: + print(f"Failed to pause connector {connector_id} with error: {e}") + + # [END managedkafka_pause_connector] diff --git a/managedkafka/snippets/connect/connectors/restart_connector.py b/managedkafka/snippets/connect/connectors/restart_connector.py new file mode 100644 index 00000000000..72714de7aa1 --- /dev/null +++ b/managedkafka/snippets/connect/connectors/restart_connector.py @@ -0,0 +1,63 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def restart_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, +) -> None: + """ + Restart a connector. + Note: This operation is used to restart a failed connector. To start + a stopped connector, use the `resume_connector` operation instead. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: ID of the connector. + + Raises: + This method will raise the GoogleAPICallError exception if the operation errors. + """ + # [START managedkafka_restart_connector] + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.cloud import managedkafka_v1 + + # TODO(developer) + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "my-connector" + + connect_client = ManagedKafkaConnectClient() + + request = managedkafka_v1.RestartConnectorRequest( + name=connect_client.connector_path(project_id, region, connect_cluster_id, connector_id), + ) + + try: + operation = connect_client.restart_connector(request=request) + print(f"Waiting for operation {operation.operation.name} to complete...") + operation.result() + print(f"Restarted connector {connector_id}") + except GoogleAPICallError as e: + print(f"Failed to restart connector {connector_id} with error: {e}") + + # [END managedkafka_restart_connector] diff --git a/managedkafka/snippets/connect/connectors/resume_connector.py b/managedkafka/snippets/connect/connectors/resume_connector.py new file mode 100644 index 00000000000..3787368ef1e --- /dev/null +++ b/managedkafka/snippets/connect/connectors/resume_connector.py @@ -0,0 +1,61 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def resume_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, +) -> None: + """ + Resume a paused connector. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: ID of the connector. + + Raises: + This method will raise the GoogleAPICallError exception if the operation errors. + """ + # [START managedkafka_resume_connector] + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.cloud import managedkafka_v1 + + # TODO(developer) + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "my-connector" + + connect_client = ManagedKafkaConnectClient() + + request = managedkafka_v1.ResumeConnectorRequest( + name=connect_client.connector_path(project_id, region, connect_cluster_id, connector_id), + ) + + try: + operation = connect_client.resume_connector(request=request) + print(f"Waiting for operation {operation.operation.name} to complete...") + operation.result() + print(f"Resumed connector {connector_id}") + except GoogleAPICallError as e: + print(f"Failed to resume connector {connector_id} with error: {e}") + + # [END managedkafka_resume_connector] diff --git a/managedkafka/snippets/connect/connectors/stop_connector.py b/managedkafka/snippets/connect/connectors/stop_connector.py new file mode 100644 index 00000000000..cd3767075bc --- /dev/null +++ b/managedkafka/snippets/connect/connectors/stop_connector.py @@ -0,0 +1,61 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def stop_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, +) -> None: + """ + Stop a connector. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: ID of the connector. + + Raises: + This method will raise the GoogleAPICallError exception if the operation errors. + """ + # [START managedkafka_stop_connector] + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.cloud import managedkafka_v1 + + # TODO(developer) + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "my-connector" + + connect_client = ManagedKafkaConnectClient() + + request = managedkafka_v1.StopConnectorRequest( + name=connect_client.connector_path(project_id, region, connect_cluster_id, connector_id), + ) + + try: + operation = connect_client.stop_connector(request=request) + print(f"Waiting for operation {operation.operation.name} to complete...") + operation.result() + print(f"Stopped connector {connector_id}") + except GoogleAPICallError as e: + print(f"Failed to stop connector {connector_id} with error: {e}") + + # [END managedkafka_stop_connector] diff --git a/managedkafka/snippets/connect/connectors/update_connector.py b/managedkafka/snippets/connect/connectors/update_connector.py new file mode 100644 index 00000000000..b0357079cd9 --- /dev/null +++ b/managedkafka/snippets/connect/connectors/update_connector.py @@ -0,0 +1,79 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +def update_connector( + project_id: str, + region: str, + connect_cluster_id: str, + connector_id: str, + configs: dict, +) -> None: + """ + Update a connector's configuration. + + Args: + project_id: Google Cloud project ID. + region: Cloud region. + connect_cluster_id: ID of the Kafka Connect cluster. + connector_id: ID of the connector. + configs: Dictionary containing the updated configuration. + + Raises: + This method will raise the GoogleAPICallError exception if the operation errors. + """ + # [START managedkafka_update_connector] + from google.api_core.exceptions import GoogleAPICallError + from google.cloud import managedkafka_v1 + from google.cloud.managedkafka_v1.services.managed_kafka_connect import ( + ManagedKafkaConnectClient, + ) + from google.cloud.managedkafka_v1.types import Connector + from google.protobuf import field_mask_pb2 + + # TODO(developer) + # project_id = "my-project-id" + # region = "us-central1" + # connect_cluster_id = "my-connect-cluster" + # connector_id = "my-connector" + # configs = { + # "tasks.max": "6", + # "value.converter.schemas.enable": "true" + # } + + connect_client = ManagedKafkaConnectClient() + + connector = Connector() + connector.name = connect_client.connector_path( + project_id, region, connect_cluster_id, connector_id + ) + connector.configs = configs + update_mask = field_mask_pb2.FieldMask() + update_mask.paths.append("config") + + # For a list of editable fields, one can check https://cloud.google.com/managed-service-for-apache-kafka/docs/connect-cluster/update-connector#editable-properties. + request = managedkafka_v1.UpdateConnectorRequest( + update_mask=update_mask, + connector=connector, + ) + + try: + operation = connect_client.update_connector(request=request) + print(f"Waiting for operation {operation.operation.name} to complete...") + response = operation.result() + print("Updated connector:", response) + except GoogleAPICallError as e: + print(f"The operation failed with error: {e}") + + # [END managedkafka_update_connector] diff --git a/model_armor/snippets/requirements.txt b/model_armor/snippets/requirements.txt index a2d49b77ba7..0b64c19841b 100644 --- a/model_armor/snippets/requirements.txt +++ b/model_armor/snippets/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-modelarmor==0.2.5 +google-cloud-modelarmor==0.2.8 google-cloud-dlp==3.30.0 \ No newline at end of file diff --git a/model_garden/anthropic/anthropic_batchpredict_with_bq.py b/model_garden/anthropic/anthropic_batchpredict_with_bq.py index 1823eb8c266..1e9ecdf0940 100644 --- a/model_garden/anthropic/anthropic_batchpredict_with_bq.py +++ b/model_garden/anthropic/anthropic_batchpredict_with_bq.py @@ -26,7 +26,7 @@ def generate_content(output_uri: str) -> str: # output_uri = f"bq://your-project.your_dataset.your_table" job = client.batches.create( - # Check Anthropic Claude region availability in https://cloud.devsite.corp.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude#regions + # Check Anthropic Claude region availability in https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/use-claude#regions # More about Anthropic model: https://console.cloud.google.com/vertex-ai/publishers/anthropic/model-garden/claude-3-5-haiku model="publishers/anthropic/models/claude-3-5-haiku", # The source dataset needs to be created specifically in us-east5 diff --git a/noxfile-template.py b/noxfile-template.py index 93b0186aedd..d2c4e7608ce 100644 --- a/noxfile-template.py +++ b/noxfile-template.py @@ -88,7 +88,7 @@ def get_pytest_env_vars() -> dict[str, str]: # All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] +ALL_VERSIONS = ["2.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] # Any default versions that should be ignored. IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] diff --git a/parametermanager/snippets/requirements.txt b/parametermanager/snippets/requirements.txt index 012571b208f..0919a6ec653 100644 --- a/parametermanager/snippets/requirements.txt +++ b/parametermanager/snippets/requirements.txt @@ -1 +1 @@ -google-cloud-parametermanager==0.1.3 +google-cloud-parametermanager==0.1.5 diff --git a/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb b/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb index f0656c1208c..ab637613a91 100644 --- a/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb +++ b/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb @@ -1381,7 +1381,7 @@ " display_name=\"weather-forecasting\",\n", " python_package_gcs_uri=f\"gs://{bucket}/weather/weather-model-1.0.0.tar.gz\",\n", " python_module_name=\"weather.trainer\",\n", - " container_uri=\"us-docker.pkg.dev/vertex-ai/training/pytorch-gpu.2-4.py310:latest\",\n", + " container_uri=\"us-docker.pkg.dev/vertex-ai/training/pytorch-gpu.2-8.py310:latest\",\n", ")\n", "job.run(\n", " machine_type=\"n1-highmem-8\",\n", diff --git a/people-and-planet-ai/weather-forecasting/serving/weather-model/pyproject.toml b/people-and-planet-ai/weather-forecasting/serving/weather-model/pyproject.toml index f1ca5c7eb26..6f6c66d33a9 100644 --- a/people-and-planet-ai/weather-forecasting/serving/weather-model/pyproject.toml +++ b/people-and-planet-ai/weather-forecasting/serving/weather-model/pyproject.toml @@ -18,7 +18,7 @@ name = "weather-model" version = "1.0.0" dependencies = [ "datasets==4.0.0", - "torch==2.4.0", # make sure this matches the `container_uri` in `notebooks/3-training.ipynb` + "torch==2.8.0", # make sure this matches the `container_uri` in `notebooks/3-training.ipynb` "transformers==4.48.0", ] diff --git a/pubsublite/spark-connector/README.md b/pubsublite/spark-connector/README.md index dc800440166..c133fd66f64 100644 --- a/pubsublite/spark-connector/README.md +++ b/pubsublite/spark-connector/README.md @@ -193,7 +193,7 @@ Here is an example output: