diff --git a/.github/header-checker-lint.yml b/.github/header-checker-lint.yml index 6fe78aa7..1743b487 100644 --- a/.github/header-checker-lint.yml +++ b/.github/header-checker-lint.yml @@ -1,15 +1,21 @@ -{"allowedCopyrightHolders": ["Google LLC"], - "allowedLicenses": ["Apache-2.0", "MIT", "BSD-3"], - "ignoreFiles": ["**/requirements.txt", "**/requirements-test.txt", "**/__init__.py", "samples/**/constraints.txt", "samples/**/constraints-test.txt"], - "sourceFileExtensions": [ - "ts", - "js", - "java", - "sh", - "Dockerfile", - "yaml", - "py", - "html", - "txt" - ] -} \ No newline at end of file +allowedCopyrightHolders: + - "Google LLC" +allowedLicenses: + - "Apache-2.0" +sourceFileExtensions: + - "yaml" + - "yml" + - "sh" + - "proto" + - "Dockerfile" + - "py" + - "html" + - "text" +ignoreFiles: + - ".github/release-please.yml" + - ".github/release-trigger.yml" + - ".github/header-checker-lint.yml" + - ".github/sync-repo-settings.yaml" + - ".kokoro/**" + - "**/requirements.txt" + - "**/requirements-test.txt" diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 7b866f5b..2352d4b8 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -15,12 +15,16 @@ name: Lint on: pull_request: - paths-ignore: + paths-ignore: # Changes to the paths list need to be reflected in lint_fallback.yml - "*.md" + - ".kokoro/**" + - ".github/**" pull_request_target: types: [labeled] paths-ignore: - "*.md" + - ".kokoro/**" + - ".github/**" jobs: lint: @@ -50,9 +54,13 @@ jobs: } catch (e) { console.log('Failed to remove label. Another job may have already removed it!'); } - + - name: Checkout Repository uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.sha }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + token: ${{ secrets.GITHUB_TOKEN }} - name: Setup Python uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 diff --git a/.github/workflows/lint_fallback.yml b/.github/workflows/lint_fallback.yml new file mode 100644 index 00000000..04841b9f --- /dev/null +++ b/.github/workflows/lint_fallback.yml @@ -0,0 +1,30 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Lint +on: + pull_request: + paths: # These paths are the inverse of lint.yml + - "*.md" + - ".kokoro/**" + - ".github/**" + +jobs: + lint: + runs-on: ubuntu-latest + permissions: + contents: none + + steps: + - run: echo "No tests required." diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 908ab451..9c05aeb6 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -97,39 +97,39 @@ colorlog==6.7.0 \ # via # gcp-docuploader # nox -cryptography==42.0.0 \ - --hash=sha256:0a68bfcf57a6887818307600c3c0ebc3f62fbb6ccad2240aa21887cda1f8df1b \ - --hash=sha256:146e971e92a6dd042214b537a726c9750496128453146ab0ee8971a0299dc9bd \ - --hash=sha256:14e4b909373bc5bf1095311fa0f7fcabf2d1a160ca13f1e9e467be1ac4cbdf94 \ - --hash=sha256:206aaf42e031b93f86ad60f9f5d9da1b09164f25488238ac1dc488334eb5e221 \ - --hash=sha256:3005166a39b70c8b94455fdbe78d87a444da31ff70de3331cdec2c568cf25b7e \ - --hash=sha256:324721d93b998cb7367f1e6897370644751e5580ff9b370c0a50dc60a2003513 \ - --hash=sha256:33588310b5c886dfb87dba5f013b8d27df7ffd31dc753775342a1e5ab139e59d \ - --hash=sha256:35cf6ed4c38f054478a9df14f03c1169bb14bd98f0b1705751079b25e1cb58bc \ - --hash=sha256:3ca482ea80626048975360c8e62be3ceb0f11803180b73163acd24bf014133a0 \ - --hash=sha256:56ce0c106d5c3fec1038c3cca3d55ac320a5be1b44bf15116732d0bc716979a2 \ - --hash=sha256:5a217bca51f3b91971400890905a9323ad805838ca3fa1e202a01844f485ee87 \ - --hash=sha256:678cfa0d1e72ef41d48993a7be75a76b0725d29b820ff3cfd606a5b2b33fda01 \ - --hash=sha256:69fd009a325cad6fbfd5b04c711a4da563c6c4854fc4c9544bff3088387c77c0 \ - --hash=sha256:6cf9b76d6e93c62114bd19485e5cb003115c134cf9ce91f8ac924c44f8c8c3f4 \ - --hash=sha256:74f18a4c8ca04134d2052a140322002fef535c99cdbc2a6afc18a8024d5c9d5b \ - --hash=sha256:85f759ed59ffd1d0baad296e72780aa62ff8a71f94dc1ab340386a1207d0ea81 \ - --hash=sha256:87086eae86a700307b544625e3ba11cc600c3c0ef8ab97b0fda0705d6db3d4e3 \ - --hash=sha256:8814722cffcfd1fbd91edd9f3451b88a8f26a5fd41b28c1c9193949d1c689dc4 \ - --hash=sha256:8fedec73d590fd30c4e3f0d0f4bc961aeca8390c72f3eaa1a0874d180e868ddf \ - --hash=sha256:9515ea7f596c8092fdc9902627e51b23a75daa2c7815ed5aa8cf4f07469212ec \ - --hash=sha256:988b738f56c665366b1e4bfd9045c3efae89ee366ca3839cd5af53eaa1401bce \ - --hash=sha256:a2a8d873667e4fd2f34aedab02ba500b824692c6542e017075a2efc38f60a4c0 \ - --hash=sha256:bd7cf7a8d9f34cc67220f1195884151426ce616fdc8285df9054bfa10135925f \ - --hash=sha256:bdce70e562c69bb089523e75ef1d9625b7417c6297a76ac27b1b8b1eb51b7d0f \ - --hash=sha256:be14b31eb3a293fc6e6aa2807c8a3224c71426f7c4e3639ccf1a2f3ffd6df8c3 \ - --hash=sha256:be41b0c7366e5549265adf2145135dca107718fa44b6e418dc7499cfff6b4689 \ - --hash=sha256:c310767268d88803b653fffe6d6f2f17bb9d49ffceb8d70aed50ad45ea49ab08 \ - --hash=sha256:c58115384bdcfe9c7f644c72f10f6f42bed7cf59f7b52fe1bf7ae0a622b3a139 \ - --hash=sha256:c640b0ef54138fde761ec99a6c7dc4ce05e80420262c20fa239e694ca371d434 \ - --hash=sha256:ca20550bb590db16223eb9ccc5852335b48b8f597e2f6f0878bbfd9e7314eb17 \ - --hash=sha256:d97aae66b7de41cdf5b12087b5509e4e9805ed6f562406dfcf60e8481a9a28f8 \ - --hash=sha256:e9326ca78111e4c645f7e49cbce4ed2f3f85e17b61a563328c85a5208cf34440 +cryptography==42.0.4 \ + --hash=sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b \ + --hash=sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce \ + --hash=sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88 \ + --hash=sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7 \ + --hash=sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20 \ + --hash=sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9 \ + --hash=sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff \ + --hash=sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1 \ + --hash=sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764 \ + --hash=sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b \ + --hash=sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298 \ + --hash=sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1 \ + --hash=sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824 \ + --hash=sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257 \ + --hash=sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a \ + --hash=sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129 \ + --hash=sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb \ + --hash=sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929 \ + --hash=sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854 \ + --hash=sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52 \ + --hash=sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923 \ + --hash=sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885 \ + --hash=sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0 \ + --hash=sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd \ + --hash=sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2 \ + --hash=sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18 \ + --hash=sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b \ + --hash=sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992 \ + --hash=sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74 \ + --hash=sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660 \ + --hash=sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925 \ + --hash=sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449 # via # gcp-releasetool # secretstorage @@ -273,7 +273,9 @@ jaraco-classes==3.3.0 \ jeepney==0.8.0 \ --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \ --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755 - # via secretstorage + # via + # keyring + # secretstorage jinja2==3.1.3 \ --hash=sha256:7d6d50dd97d52cbc355597bd845fabfbac3f551e1f99619e39a35ce8c370b5fa \ --hash=sha256:ac8bd6544d4bb2c9792bf3a159e80bba8fda7f07e81bc3aed565432d5925ba90 @@ -490,7 +492,9 @@ rsa==4.9 \ secretstorage==3.3.3 \ --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \ --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99 - # via -r requirements.in + # via + # -r requirements.in + # keyring six==1.16.0 \ --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \ --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 diff --git a/CHANGELOG.md b/CHANGELOG.md index bfaa0a68..8a2bdb1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,17 @@ # Changelog +## [0.2.1](https://github.com/googleapis/langchain-google-spanner-python/compare/v0.2.0...v0.2.1) (2024-03-06) + + +### Bug Fixes + +* Update user agent ([#36](https://github.com/googleapis/langchain-google-spanner-python/issues/36)) ([a8f6f71](https://github.com/googleapis/langchain-google-spanner-python/commit/a8f6f71e9e2567f03d1428cf7c76304f4fa4aa8e)) + + +### Documentation + +* Update colabs ([#30](https://github.com/googleapis/langchain-google-spanner-python/issues/30)) ([af66f7c](https://github.com/googleapis/langchain-google-spanner-python/commit/af66f7c13b0e9a972718e57ce64ce73348035940)) + ## [0.2.0](https://github.com/googleapis/langchain-google-spanner-python/compare/v0.1.0...v0.2.0) (2024-02-29) diff --git a/docs/chat_message_history.ipynb b/docs/chat_message_history.ipynb index 91ca31fd..96cb3e14 100644 --- a/docs/chat_message_history.ipynb +++ b/docs/chat_message_history.ipynb @@ -6,10 +6,14 @@ "collapsed": false }, "source": [ - "# Cloud Spanner\n", - "> [Cloud Spanner](https://cloud.google.com/spanner) is a highly scalable database that combines unlimited scalability with relational semantics, such as secondary indexes, strong consistency, schemas, and SQL providing 99.999% availability in one easy solution.\n", + "# Google Spanner\n", + "> [Spanner](https://cloud.google.com/spanner) is a highly scalable database that combines unlimited scalability with relational semantics, such as secondary indexes, strong consistency, schemas, and SQL providing 99.999% availability in one easy solution.\n", "\n", - "This notebook goes over how to use `Spanner` to store chat message history with the `SpannerChatMessageHistory` class." + "This notebook goes over how to use `Spanner` to store chat message history with the `SpannerChatMessageHistory` class.\n", + "\n", + "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-spanner-python/).\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googleapis/langchain-google-spanner-python/blob/main/samples/chat_message_history.ipynb)" ] }, { @@ -146,7 +150,7 @@ }, "outputs": [], "source": [ - "# enable Cloud SQL Admin API\n", + "# enable Spanner API\n", "!gcloud services enable spanner.googleapis.com" ] }, @@ -167,8 +171,8 @@ "id": "OMvzMWRrR6n7" }, "source": [ - "### Set Cloud SQL database values\n", - "Find your database values, in the [Cloud SQL Instances page](https://console.cloud.google.com/sql?_ga=2.223735448.2062268965.1707700487-2088871159.1707257687)." + "### Set Spanner database values\n", + "Find your database values, in the [Spanner Instances page](https://console.cloud.google.com/spanner)." ] }, { diff --git a/docs/document_loader.ipynb b/docs/document_loader.ipynb index 5d0c90c6..283e2874 100644 --- a/docs/document_loader.ipynb +++ b/docs/document_loader.ipynb @@ -4,12 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Spanner\n", + "# Google Spanner\n", "\n", - "> [Spanner](https://cloud.google.com/spanner) is a highly scalable database that combines unlimited scalability with relational semantics, such as secondary indexes, strong consistency, schemas, and SQL providing 99.999% availability in one easy solution. Extend your database application to build AI-powered experiences leveraging Cloud SQL's Langchain integrations.\n", + "> [Spanner](https://cloud.google.com/spanner) is a highly scalable database that combines unlimited scalability with relational semantics, such as secondary indexes, strong consistency, schemas, and SQL providing 99.999% availability in one easy solution.\n", "\n", "This notebook goes over how to use [Spanner](https://cloud.google.com/spanner) to [save, load and delete langchain documents](https://python.langchain.com/docs/modules/data_connection/document_loaders/) with `SpannerLoader` and `SpannerDocumentSaver`.\n", "\n", + "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-spanner-python/).\n", + "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googleapis/langchain-google-spanner-python/blob/main/docs/document_loader.ipynb)" ] }, diff --git a/docs/vector_store.ipynb b/docs/vector_store.ipynb index 8e594305..76cd27c7 100644 --- a/docs/vector_store.ipynb +++ b/docs/vector_store.ipynb @@ -4,10 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Cloud Spanner\n", - "> [Cloud Spanner](https://cloud.google.com/spanner) is a highly scalable database that combines unlimited scalability with relational semantics, such as secondary indexes, strong consistency, schemas, and SQL providing 99.999% availability in one easy solution.\n", + "# Google Spanner\n", + "> [Spanner](https://cloud.google.com/spanner) is a highly scalable database that combines unlimited scalability with relational semantics, such as secondary indexes, strong consistency, schemas, and SQL providing 99.999% availability in one easy solution.\n", "\n", - "This notebook goes over how to use `Spanner` for Vector Search with `SpannerVectorStore` class." + "This notebook goes over how to use `Spanner` for Vector Search with `SpannerVectorStore` class.\n", + "\n", + "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-spanner-python/).\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googleapis/langchain-google-spanner-python/blob/main/docs/vector_store.ipynb)" ] }, { @@ -135,7 +139,7 @@ "metadata": {}, "outputs": [], "source": [ - "# enable Cloud SQL Admin API\n", + "# enable Spanner API\n", "!gcloud services enable spanner.googleapis.com" ] }, @@ -150,8 +154,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Set Cloud Spanner database values\n", - "Find your database values, in the [Cloud Spanner Instances page](https://console.cloud.google.com/spanner?_ga=2.223735448.2062268965.1707700487-2088871159.1707257687)." + "### Set Spanner database values\n", + "Find your database values, in the [Spanner Instances page](https://console.cloud.google.com/spanner?_ga=2.223735448.2062268965.1707700487-2088871159.1707257687)." ] }, { diff --git a/samples/langchain_quick_start.ipynb b/samples/langchain_quick_start.ipynb index ff9194fb..2293bf45 100644 --- a/samples/langchain_quick_start.ipynb +++ b/samples/langchain_quick_start.ipynb @@ -34,7 +34,7 @@ "---\n", "# **Introduction**\n", "\n", - "In this codelab you will learn how to create an interactive generative AI application with Retrieval Augmented Generation using Spanner and LangChain. We will be creating an application grounded in a [Netflix Movie dataset](https://www.kaggle.com/datasets/shivamb/netflix-shows)." + "In this codelab, you'll learn how to create a powerful interactive generative AI application using Retrieval Augmented Generation powered by [Spanner](https://cloud.google.com/spanner) and [LangChain](https://www.langchain.com/). We will be creating an application grounded in a [Netflix Movie dataset](https://www.kaggle.com/datasets/shivamb/netflix-shows), allowing you to interact with movie data in exciting new ways." ] }, { @@ -107,7 +107,7 @@ "outputs": [], "source": [ "%pip install langchain-google-spanner\n", - "\n", + "# Install additional dependencies\n", "%pip install langchain langchain-google-vertexai" ] }, @@ -127,9 +127,7 @@ "metadata": {}, "outputs": [], "source": [ - "from google.colab import auth\n", - "\n", - "auth.authenticate_user()" + "!gcloud auth login" ] }, { @@ -367,7 +365,7 @@ "database = instance.database(database_id)\n", "table_name = \"netflix_titles\"\n", "\n", - "database.update_ddl(\n", + "operation = database.update_ddl(\n", " [\n", " \"\"\"CREATE TABLE IF NOT EXISTS {} (\n", " show_id STRING(MAX) NOT NULL,\n", @@ -386,7 +384,12 @@ " table_name\n", " )\n", " ]\n", - ")" + ")\n", + "\n", + "print(\"Waiting for operation to complete...\")\n", + "operation.result(240)\n", + "\n", + "print(\"Table created.\")" ] }, { @@ -654,7 +657,7 @@ "outputs": [], "source": [ "vector_store_rows = read_csv_from_gcs(\n", - " \"cloud-samples-data\", \"langchain/netflix_titles_computed_embeddings.csv\"\n", + " \"cloud-samples-data\", \"langchain/spanner/netflix_titles_embeddings.csv\"\n", ")\n", "\n", "for vector_store_row in vector_store_rows:\n", @@ -815,7 +818,7 @@ "\n", "# Intialize the embedding service\n", "embeddings_service = VertexAIEmbeddings(\n", - " model_name=\"textembedding-gecko@latest\", project=project_id\n", + " model_name=\"textembedding-gecko@003\", project=project_id\n", ")\n", "\n", "# Intialize the Vector Store\n", diff --git a/src/langchain_google_spanner/chat_message_history.py b/src/langchain_google_spanner/chat_message_history.py index 834df709..a4a8ba36 100644 --- a/src/langchain_google_spanner/chat_message_history.py +++ b/src/langchain_google_spanner/chat_message_history.py @@ -26,7 +26,7 @@ from .version import __version__ -USER_AGENT_CHAT = "langchain-google-spanner-python:chat_history" + __version__ +USER_AGENT_CHAT = "langchain-google-spanner-python:chat_history/" + __version__ OPERATION_TIMEOUT_SECONDS = 240 diff --git a/src/langchain_google_spanner/loader.py b/src/langchain_google_spanner/loader.py index e28f963e..a4a31b9b 100644 --- a/src/langchain_google_spanner/loader.py +++ b/src/langchain_google_spanner/loader.py @@ -25,8 +25,8 @@ from .version import __version__ -USER_AGENT_LOADER = "langchain-google-spanner-python:document_loader" + __version__ -USER_AGENT_SAVER = "langchain-google-spanner-python:document_saver" + __version__ +USER_AGENT_LOADER = "langchain-google-spanner-python:document_loader/" + __version__ +USER_AGENT_SAVER = "langchain-google-spanner-python:document_saver/" + __version__ OPERATION_TIMEOUT_SECONDS = 240 MUTATION_BATCH_SIZE = 1000 @@ -308,7 +308,10 @@ def add_documents(self, documents: List[Document]): db = self.client.instance(self.instance_id).database(self.database_id) values = [ _load_doc_to_row( - self._table_fields, doc, self.content_column, self.metadata_json_column + self._table_fields, + doc, + self.content_column, + self.metadata_json_column, ) for doc in documents ] diff --git a/src/langchain_google_spanner/vector_store.py b/src/langchain_google_spanner/vector_store.py index 46afe816..cf5990e8 100644 --- a/src/langchain_google_spanner/vector_store.py +++ b/src/langchain_google_spanner/vector_store.py @@ -15,35 +15,18 @@ from __future__ import annotations import datetime -import json import logging -import re -import uuid -import warnings from abc import ABC, abstractmethod from enum import Enum -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Dict, - Iterable, - List, - Optional, - Tuple, - Type, - Union, -) +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type, Union import numpy as np from google.cloud import spanner # type: ignore from google.cloud.spanner_admin_database_v1.types import DatabaseDialect from google.cloud.spanner_v1 import JsonObject, param_types -from google.cloud.spanner_v1.streamed import StreamedResultSet from langchain_community.vectorstores.utils import maximal_marginal_relevance from langchain_core.documents import Document from langchain_core.embeddings import Embeddings -from langchain_core.utils import get_from_dict_or_env from langchain_core.vectorstores import VectorStore from .version import __version__ @@ -55,7 +38,7 @@ EMBEDDING_COLUMN_NAME = "embedding" ADDITIONAL_METADATA_COLUMN_NAME = "metadata" -USER_AGENT_VECTOR_STORE = "langchain-google-spanner-python:vector_store" + __version__ +USER_AGENT_VECTOR_STORE = "langchain-google-spanner-python:vector_store/" + __version__ KNN_DISTANCE_SEARCH_QUERY_ALIAS = "distance" @@ -632,7 +615,8 @@ def _validate_table_schema(self, column_type_map, types, default_columns): if column_name not in self._columns_to_insert: if "NO" == column_config[2].upper(): raise Exception( - "Found not nullable constraint on column: {}.", column_name + "Found not nullable constraint on column: {}.", + column_name, ) def _select_relevance_score_fn(self) -> Callable[[float], float]: @@ -728,7 +712,10 @@ def _insert_data(self, records, columns_to_insert): ) def add_documents( - self, documents: List[Document], ids: Optional[List[str]] = None, **kwargs: Any + self, + documents: List[Document], + ids: Optional[List[str]] = None, + **kwargs: Any, ) -> List[str]: """ Add documents to the vector store. @@ -875,7 +862,7 @@ def _get_rows_by_similarity_search( sql_query = """ SELECT {select_column_names} {distance_function}({embedding_column}, {vector_embedding_placeholder}) AS {distance_alias} - FROM {table_name} + FROM {table_name} WHERE {filter} ORDER BY distance LIMIT {k_count}; @@ -929,7 +916,11 @@ def _get_documents_from_query_results( return documents def similarity_search( - self, query: str, k: int = 4, pre_filter: Optional[str] = None, **kwargs: Any + self, + query: str, + k: int = 4, + pre_filter: Optional[str] = None, + **kwargs: Any, ) -> List[Document]: """ Perform similarity search for a given query. @@ -949,7 +940,11 @@ def similarity_search( return [doc for doc, _ in documents] def similarity_search_with_score( - self, query: str, k: int = 4, pre_filter: Optional[str] = None, **kwargs: Any + self, + query: str, + k: int = 4, + pre_filter: Optional[str] = None, + **kwargs: Any, ) -> List[Tuple[Document, float]]: """ Perform similarity search for a given query with scores. diff --git a/src/langchain_google_spanner/version.py b/src/langchain_google_spanner/version.py index 20c5861b..fffa9d95 100644 --- a/src/langchain_google_spanner/version.py +++ b/src/langchain_google_spanner/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.2.0" +__version__ = "0.2.1" diff --git a/tests/integration/test_spanner_chat_message_history.py b/tests/integration/test_spanner_chat_message_history.py index 1e2f56d2..ad8e7307 100644 --- a/tests/integration/test_spanner_chat_message_history.py +++ b/tests/integration/test_spanner_chat_message_history.py @@ -37,25 +37,10 @@ def client() -> Client: @pytest.fixture(scope="module") def setup(client): for env in ["GOOGLE_DATABASE", "PG_DATABASE"]: - google_schema = f"""CREATE TABLE IF NOT EXISTS {table_name} ( - id STRING(36) DEFAULT (GENERATE_UUID()), - created_at TIMESTAMP NOT NULL OPTIONS (allow_commit_timestamp=true), - session_id STRING(MAX) NOT NULL, - message JSON NOT NULL, - ) PRIMARY KEY (session_id, created_at ASC, id)""" - - pg_schema = f"""CREATE TABLE IF NOT EXISTS {table_name} ( - id varchar(36) DEFAULT (spanner.generate_uuid()), - created_at SPANNER.COMMIT_TIMESTAMP NOT NULL, - session_id TEXT NOT NULL, - message JSONB NOT NULL, - PRIMARY KEY (session_id, created_at, id) - );""" database_id = os.environ.get(env) - ddl = pg_schema if env == "PG_DATABASE" else google_schema - database = client.instance(instance_id).database(database_id) - operation = database.update_ddl([ddl]) - operation.result(OPERATION_TIMEOUT_SECONDS) + SpannerChatMessageHistory.create_chat_history_table( + instance_id, database_id, table_name + ) yield for env in ["GOOGLE_DATABASE", "PG_DATABASE"]: database_id = os.environ.get(env) diff --git a/tests/integration/test_spanner_loader.py b/tests/integration/test_spanner_loader.py index 274c4fc4..6ebe0143 100644 --- a/tests/integration/test_spanner_loader.py +++ b/tests/integration/test_spanner_loader.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import json import os import pytest