diff --git a/CHANGELOG.md b/CHANGELOG.md index a7be2f02..4da9dd22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## [0.6.0](https://github.com/googleapis/langchain-google-spanner-python/compare/v0.5.0...v0.6.0) (2024-12-05) + + +### Features + +* Add Spanner Graph QA Chain ([#111](https://github.com/googleapis/langchain-google-spanner-python/issues/111)) ([e22abde](https://github.com/googleapis/langchain-google-spanner-python/commit/e22abde9a94625ee69f8975fc0950cedd11bc542)) + ## [0.5.0](https://github.com/googleapis/langchain-google-spanner-python/compare/v0.4.1...v0.5.0) (2024-11-25) diff --git a/README.rst b/README.rst index 238ebce2..cb047dd8 100644 --- a/README.rst +++ b/README.rst @@ -151,6 +151,34 @@ See the full `Spanner Graph Store`_ tutorial. .. _`Spanner Graph Store`: https://github.com/googleapis/langchain-google-spanner-python/blob/main/docs/graph_store.ipynb +Spanner Graph QA Chain Usage +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Use ``SpannerGraphQAChain`` for question answering over a graph stored in Spanner Graph. + +.. code:: python + + from langchain_google_spanner import SpannerGraphStore, SpannerGraphQAChain + from langchain_google_vertexai import ChatVertexAI + + + graph = SpannerGraphStore( + instance_id="my-instance", + database_id="my-database", + graph_name="my_graph", + ) + llm = ChatVertexAI() + chain = SpannerGraphQAChain.from_llm( + llm, + graph=graph, + allow_dangerous_requests=True + ) + chain.invoke("query=Where does Sarah's sibling live?") + +See the full `Spanner Graph QA Chain`_ tutorial. + +.. _`Spanner Graph QA Chain`: https://github.com/googleapis/langchain-google-spanner-python/blob/main/docs/graph_qa_chain.ipynb + Contributions ~~~~~~~~~~~~~ diff --git a/docs/graph_qa_chain.ipynb b/docs/graph_qa_chain.ipynb new file mode 100644 index 00000000..7746e5c8 --- /dev/null +++ b/docs/graph_qa_chain.ipynb @@ -0,0 +1,693 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + }, + "colab": { + "provenance": [], + "toc_visible": true + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Google Spanner\n", + "\n", + "> [Spanner](https://cloud.google.com/spanner) is a highly scalable database that combines unlimited scalability with relational semantics, such as secondary indexes, strong consistency, schemas, and SQL providing 99.999% availability in one easy solution.\n", + "\n", + "This notebook goes over how to use `Spanner` for GraphRAG with `SpannerGraphStore` and `SpannerGraphQAChain` class.\n", + "\n", + "Learn more about the package on [GitHub](https://github.com/googleapis/langchain-google-spanner-python/).\n", + "\n", + "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/googleapis/langchain-google-spanner-python/blob/main/docs/graph_qa_chain.ipynb)" + ], + "metadata": { + "id": "7VBkjcqNNxEd" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Before You Begin\n", + "\n", + "To run this notebook, you will need to do the following:\n", + "\n", + " * [Create a Google Cloud Project](https://developers.google.com/workspace/guides/create-project)\n", + " * [Enable the Cloud Spanner API](https://console.cloud.google.com/flows/enableapi?apiid=spanner.googleapis.com)\n", + " * [Create a Spanner instance](https://cloud.google.com/spanner/docs/create-manage-instances)\n", + " * [Create a Spanner database](https://cloud.google.com/spanner/docs/create-manage-databases)" + ], + "metadata": { + "id": "HEAGYTPgNydh" + } + }, + { + "cell_type": "markdown", + "source": [ + "### 🦜🔗 Library Installation\n", + "The integration lives in its own `langchain-google-spanner` package, so we need to install it." + ], + "metadata": { + "id": "cboPIg-yOcxS" + } + }, + { + "cell_type": "code", + "source": [ + "%pip install --upgrade --quiet langchain-google-spanner langchain-google-vertexai langchain-experimental json-repair pyvis" + ], + "metadata": { + "id": "AOWh6QKYVdDp" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "**Colab only:** Uncomment the following cell to restart the kernel or use the button to restart the kernel. For Vertex AI Workbench you can restart the terminal using the button on top." + ], + "metadata": { + "id": "M7MqpDhkOiP-" + } + }, + { + "cell_type": "code", + "source": [ + "# # Automatically restart kernel after installs so that your environment can access the new packages\n", + "import IPython\n", + "\n", + "app = IPython.Application.instance()\n", + "app.kernel.do_shutdown(True)" + ], + "metadata": { + "id": "xzgVZv0POj17" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### 🔐 Authentication\n", + "Authenticate to Google Cloud as the IAM user logged into this notebook in order to access your Google Cloud Project.\n", + "\n", + "* If you are using Colab to run this notebook, use the cell below and continue.\n", + "* If you are using Vertex AI Workbench, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env)." + ], + "metadata": { + "id": "zfIhwIryOls1" + } + }, + { + "cell_type": "code", + "source": [ + "from google.colab import auth\n", + "\n", + "auth.authenticate_user()" + ], + "metadata": { + "id": "EWOkHI7XOna2" + }, + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### ☁ Set Your Google Cloud Project\n", + "Set your Google Cloud project so that you can leverage Google Cloud resources within this notebook.\n", + "\n", + "If you don't know your project ID, try the following:\n", + "\n", + "* Run `gcloud config list`.\n", + "* Run `gcloud projects list`.\n", + "* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)." + ], + "metadata": { + "id": "6xHXneICOpsB" + } + }, + { + "cell_type": "code", + "source": [ + "# @markdown Please fill in the value below with your Google Cloud project ID and then run the cell.\n", + "\n", + "PROJECT_ID = \"google.com:cloud-spanner-demo\" # @param {type:\"string\"}\n", + "\n", + "# Set the project id\n", + "!gcloud config set project {PROJECT_ID}\n", + "%env GOOGLE_CLOUD_PROJECT={PROJECT_ID}" + ], + "metadata": { + "id": "hF0481BGOsS8" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### 💡 API Enablement\n", + "The `langchain-google-spanner` package requires that you [enable the Spanner API](https://console.cloud.google.com/flows/enableapi?apiid=spanner.googleapis.com) in your Google Cloud Project." + ], + "metadata": { + "id": "4TiC0RbhOwUu" + } + }, + { + "cell_type": "code", + "source": [ + "# enable Spanner API\n", + "!gcloud services enable spanner.googleapis.com" + ], + "metadata": { + "id": "9f3fJd5eOyRr" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "You must also and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com)." + ], + "metadata": { + "id": "bT_S-jaEOW4P" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Basic Usage" + ], + "metadata": { + "id": "k5pxMMiMOzt7" + } + }, + { + "cell_type": "markdown", + "source": [ + "### Set Spanner database values\n", + "Find your database values, in the [Spanner Instances page](https://console.cloud.google.com/spanner?_ga=2.223735448.2062268965.1707700487-2088871159.1707257687)." + ], + "metadata": { + "id": "mtDbLU5sO2iA" + } + }, + { + "cell_type": "code", + "source": [ + "# @title Set Your Values Here { display-mode: \"form\" }\n", + "INSTANCE = \"\" # @param {type: \"string\"}\n", + "DATABASE = \"\" # @param {type: \"string\"}\n", + "GRAPH_NAME = \"\" # @param {type: \"string\"}" + ], + "metadata": { + "id": "C-I8VTIcO442" + }, + "execution_count": 15, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "### SpannerGraphStore\n", + "\n", + "To initialize the `SpannerGraphStore` class you need to provide 3 required arguments and other arguments are optional and only need to pass if it's different from default ones\n", + "\n", + "1. a Spanner instance id;\n", + "2. a Spanner database id belongs to the above instance id;\n", + "3. a Spanner graph name used to create a graph in the above database." + ], + "metadata": { + "id": "kpAv-tpcO_iL" + } + }, + { + "cell_type": "code", + "source": [ + "from langchain_google_spanner import SpannerGraphStore\n", + "\n", + "graph_store = SpannerGraphStore(\n", + " instance_id=INSTANCE,\n", + " database_id=DATABASE,\n", + " graph_name=GRAPH_NAME,\n", + ")" + ], + "metadata": { + "id": "u589YapWQFb8" + }, + "execution_count": 16, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "#### Add Graph Documents to Spanner Graph" + ], + "metadata": { + "id": "G7-Pe2ADQlNJ" + } + }, + { + "cell_type": "code", + "source": [ + "# @title Extract Nodes and Edges from text snippets\n", + "from langchain_core.documents import Document\n", + "from langchain_experimental.graph_transformers import LLMGraphTransformer\n", + "from langchain_google_vertexai import ChatVertexAI\n", + "from langchain_text_splitters import RecursiveCharacterTextSplitter\n", + "\n", + "text_snippets = [\n", + " # Text snippet for students graduting from Veritas University, Computer Science Dept 2017\n", + " \"\"\"\n", + "This was the graduation ceremony of 2017. A wave of jubilant graduates poured out of the\n", + "grand halls of Veritas University, their laughter echoing across the quad. Among them were\n", + "a cohort of exceptional students from the Computer Science department, a group that had\n", + "become known for their collaborative spirit and innovative ideas.\n", + "Leading the pack was Emily Davis, a coding whiz with a passion for cybersecurity, already\n", + "fielding offers from top tech firms. Beside her walked James Rodriguez, a quiet but\n", + "brilliant mind fascinated by artificial intelligence, dreaming of building machines that\n", + "could understand human emotions. Trailing slightly behind, deep in conversation, were\n", + "Sarah Chen and Michael Patel, both aspiring game developers, eager to bring their creative\n", + "visions to life. And then there was Aisha Khan, a social justice advocate who planned to\n", + "use her coding skills to address inequality through technology.\n", + "As they celebrated their achievements, these Veritas University Computer Science graduates\n", + "were ready to embark on diverse paths, each carrying the potential to shape the future of\n", + "technology in their own unique way.\n", + "\"\"\",\n", + " # Text snippet for students graduting from Oakhaven University, Computer Science Dept 2016\n", + " \"\"\"\n", + "The year was 2016, and a palpable buzz filled the air as the graduating class of Oakhaven\n", + "university from Computer science and Engineering department emerged from the Beckman\n", + "Auditorium. Among them was a group of exceptional students, renowned for their\n", + "intellectual curiosity and groundbreaking research.\n", + "At the forefront was Alice Johnson, a gifted programmer with a fascination for quantum\n", + "computing, already collaborating with leading researchers in the field. Beside her\n", + "strode David Kim, a brilliant theorist captivated by the intricacies of cryptography,\n", + "eager to contribute to the development of secure communication systems. Engaged in an\n", + "animated discussion were Maria Rodriguez and Robert Lee, both passionate about robotics\n", + "and determined to push the boundaries of artificial intelligence. And then there was\n", + "Chloe Brown, a visionary with a deep interest in bioinformatics, driven to unlock the\n", + "secrets of the human genome through computational analysis.\n", + "As they celebrated their accomplishments, these graduates, armed with their exceptional\n", + "skills and unwavering determination, were poised to make significant contributions to the world of computing and beyond.\n", + "\"\"\",\n", + " # Text snippet mentions the company Emily Davis founded.\n", + " # The snippet doesn't mention that she is an alumni of Veritas University\n", + " \"\"\"\n", + "Emily Davis, a name synonymous with cybersecurity innovation, turned that passion into a\n", + "thriving business. In the year 2022, Davis founded Ironclad Security, a company that's\n", + "rapidly changing the landscape of cybersecurity solutions.\n", + "\"\"\",\n", + " # Text snippet mentions the company Alice Johnson founded.\n", + " # The snippet doesn't mention that she is an alumni of Oakhaven University.\n", + " \"\"\"\n", + "Alice Johnson had a vision that extended far beyond the classroom. Driven by an insatiable\n", + "curiosity about the potential of quantum mechanics, she founded Entangled Solutions, a\n", + "company poised to revolutionize industries through the power of quantum technology.\n", + "Entangled Solutions distinguishes itself by focusing on practical applications of quantum\n", + "computing.\n", + "\"\"\",\n", + "]\n", + "\n", + "# Create splits for documents\n", + "documents = [Document(page_content=t) for t in text_snippets]\n", + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)\n", + "splits = text_splitter.split_documents(documents)\n", + "llm = ChatVertexAI(model=\"gemini-1.5-flash\", temperature=0)\n", + "llm_transformer = LLMGraphTransformer(\n", + " llm=llm,\n", + " allowed_nodes=[\"College\", \"Deparatment\", \"Person\", \"Year\", \"Company\"],\n", + " allowed_relationships=[\n", + " \"AlumniOf\",\n", + " \"StudiedInDepartment\",\n", + " \"PartOf\",\n", + " \"GraduatedInYear\",\n", + " \"Founded\",\n", + " ],\n", + " node_properties=[\n", + " \"description\",\n", + " ],\n", + ")\n", + "graph_documents = llm_transformer.convert_to_graph_documents(splits)" + ], + "metadata": { + "id": "fP7XNu3aPl5c" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Print extracted nodes and edges\n", + "for doc in graph_documents:\n", + " print(doc.source.page_content[:100])\n", + " print(doc.nodes)\n", + " print(doc.relationships)\n", + " print()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "OylyNyv-ZsT2", + "outputId": "e4253d98-ad63-4ea8-a5f1-0e3dac8f6632" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "This was the graduation ceremony of 2017. A wave of jubilant graduates poured out of the\n", + "grand halls\n", + "[Node(id='Veritas University', type='College', properties={'description': 'grand halls'}), Node(id='Computer Science', type='Deparatment', properties={}), Node(id='2017', type='Year', properties={}), Node(id='Emily Davis', type='Person', properties={'description': 'coding whiz with a passion for cybersecurity'}), Node(id='James Rodriguez', type='Person', properties={'description': 'quiet but brilliant mind fascinated by artificial intelligence'}), Node(id='Sarah Chen', type='Person', properties={'description': 'aspiring game developers'}), Node(id='Michael Patel', type='Person', properties={'description': 'aspiring game developers'}), Node(id='Aisha Khan', type='Person', properties={'description': 'social justice advocate'})]\n", + "[Relationship(source=Node(id='Emily Davis', type='Person', properties={}), target=Node(id='Veritas University', type='College', properties={}), type='ALUMNIOF', properties={}), Relationship(source=Node(id='James Rodriguez', type='Person', properties={}), target=Node(id='Veritas University', type='College', properties={}), type='ALUMNIOF', properties={}), Relationship(source=Node(id='Sarah Chen', type='Person', properties={}), target=Node(id='Veritas University', type='College', properties={}), type='ALUMNIOF', properties={}), Relationship(source=Node(id='Michael Patel', type='Person', properties={}), target=Node(id='Veritas University', type='College', properties={}), type='ALUMNIOF', properties={}), Relationship(source=Node(id='Aisha Khan', type='Person', properties={}), target=Node(id='Veritas University', type='College', properties={}), type='ALUMNIOF', properties={}), Relationship(source=Node(id='Emily Davis', type='Person', properties={}), target=Node(id='Computer Science', type='Deparatment', properties={}), type='STUDIEDINDEPARTMENT', properties={}), Relationship(source=Node(id='James Rodriguez', type='Person', properties={}), target=Node(id='Computer Science', type='Deparatment', properties={}), type='STUDIEDINDEPARTMENT', properties={}), Relationship(source=Node(id='Sarah Chen', type='Person', properties={}), target=Node(id='Computer Science', type='Deparatment', properties={}), type='STUDIEDINDEPARTMENT', properties={}), Relationship(source=Node(id='Michael Patel', type='Person', properties={}), target=Node(id='Computer Science', type='Deparatment', properties={}), type='STUDIEDINDEPARTMENT', properties={}), Relationship(source=Node(id='Aisha Khan', type='Person', properties={}), target=Node(id='Computer Science', type='Deparatment', properties={}), type='STUDIEDINDEPARTMENT', properties={}), Relationship(source=Node(id='Emily Davis', type='Person', properties={}), target=Node(id='2017', type='Year', properties={}), type='GRADUATEDINYEAR', properties={}), Relationship(source=Node(id='James Rodriguez', type='Person', properties={}), target=Node(id='2017', type='Year', properties={}), type='GRADUATEDINYEAR', properties={}), Relationship(source=Node(id='Sarah Chen', type='Person', properties={}), target=Node(id='2017', type='Year', properties={}), type='GRADUATEDINYEAR', properties={}), Relationship(source=Node(id='Michael Patel', type='Person', properties={}), target=Node(id='2017', type='Year', properties={}), type='GRADUATEDINYEAR', properties={}), Relationship(source=Node(id='Aisha Khan', type='Person', properties={}), target=Node(id='2017', type='Year', properties={}), type='GRADUATEDINYEAR', properties={})]\n", + "\n", + "visions to life. And then there was Aisha Khan, a social justice advocate who planned to\n", + "use her c\n", + "[Node(id='Veritas University', type='College', properties={}), Node(id='Computer Science', type='Deparatment', properties={}), Node(id='Aisha Khan', type='Person', properties={'description': 'social justice advocate'})]\n", + "[Relationship(source=Node(id='Aisha Khan', type='Person', properties={}), target=Node(id='Veritas University', type='College', properties={}), type='ALUMNIOF', properties={}), Relationship(source=Node(id='Aisha Khan', type='Person', properties={}), target=Node(id='Computer Science', type='Deparatment', properties={}), type='STUDIEDINDEPARTMENT', properties={})]\n", + "\n", + "The year was 2016, and a palpable buzz filled the air as the graduating class of Oakhaven\n", + "university\n", + "[Node(id='Oakhaven University', type='College', properties={'description': 'Oakhaven university'}), Node(id='Computer Science And Engineering', type='Deparatment', properties={'description': 'Computer science and Engineering'}), Node(id='2016', type='Year', properties={'description': '2016'}), Node(id='Alice Johnson', type='Person', properties={'description': 'a gifted programmer with a fascination for quantum computing, already collaborating with leading researchers in the field'}), Node(id='David Kim', type='Person', properties={'description': 'a brilliant theorist captivated by the intricacies of cryptography, eager to contribute to the development of secure communication systems'}), Node(id='Maria Rodriguez', type='Person', properties={'description': 'passionate about robotics and determined to push the boundaries of artificial intelligence'}), Node(id='Robert Lee', type='Person', properties={'description': 'passionate about robotics and determined to push the boundaries of artificial intelligence'}), Node(id='Chloe Brown', type='Person', properties={'description': 'a visionary with a deep interest in bioinformatics, driven to unlock the secrets of the human genome through computational analysis'}), Node(id='Beckman Auditorium', type='Deparatment', properties={'description': 'Beckman Auditorium'})]\n", + "[Relationship(source=Node(id='Alice Johnson', type='Person', properties={}), target=Node(id='Oakhaven University', type='College', properties={}), type='ALUMNIOF', properties={}), Relationship(source=Node(id='David Kim', type='Person', properties={}), target=Node(id='Oakhaven University', type='College', properties={}), type='ALUMNIOF', properties={}), Relationship(source=Node(id='Maria Rodriguez', type='Person', properties={}), target=Node(id='Oakhaven University', type='College', properties={}), type='ALUMNIOF', properties={}), Relationship(source=Node(id='Robert Lee', type='Person', properties={}), target=Node(id='Oakhaven University', type='College', properties={}), type='ALUMNIOF', properties={}), Relationship(source=Node(id='Chloe Brown', type='Person', properties={}), target=Node(id='Oakhaven University', type='College', properties={}), type='ALUMNIOF', properties={}), Relationship(source=Node(id='Computer Science And Engineering', type='Deparatment', properties={}), target=Node(id='Oakhaven University', type='College', properties={}), type='PARTOF', properties={}), Relationship(source=Node(id='Alice Johnson', type='Person', properties={}), target=Node(id='Computer Science And Engineering', type='Deparatment', properties={}), type='STUDIEDINDEPARTMENT', properties={}), Relationship(source=Node(id='David Kim', type='Person', properties={}), target=Node(id='Computer Science And Engineering', type='Deparatment', properties={}), type='STUDIEDINDEPARTMENT', properties={}), Relationship(source=Node(id='Maria Rodriguez', type='Person', properties={}), target=Node(id='Computer Science And Engineering', type='Deparatment', properties={}), type='STUDIEDINDEPARTMENT', properties={}), Relationship(source=Node(id='Robert Lee', type='Person', properties={}), target=Node(id='Computer Science And Engineering', type='Deparatment', properties={}), type='STUDIEDINDEPARTMENT', properties={}), Relationship(source=Node(id='Chloe Brown', type='Person', properties={}), target=Node(id='Computer Science And Engineering', type='Deparatment', properties={}), type='STUDIEDINDEPARTMENT', properties={}), Relationship(source=Node(id='Oakhaven University', type='College', properties={}), target=Node(id='2016', type='Year', properties={}), type='GRADUATEDINYEAR', properties={}), Relationship(source=Node(id='Alice Johnson', type='Person', properties={}), target=Node(id='2016', type='Year', properties={}), type='GRADUATEDINYEAR', properties={}), Relationship(source=Node(id='David Kim', type='Person', properties={}), target=Node(id='2016', type='Year', properties={}), type='GRADUATEDINYEAR', properties={}), Relationship(source=Node(id='Maria Rodriguez', type='Person', properties={}), target=Node(id='2016', type='Year', properties={}), type='GRADUATEDINYEAR', properties={}), Relationship(source=Node(id='Robert Lee', type='Person', properties={}), target=Node(id='2016', type='Year', properties={}), type='GRADUATEDINYEAR', properties={}), Relationship(source=Node(id='Chloe Brown', type='Person', properties={}), target=Node(id='2016', type='Year', properties={}), type='GRADUATEDINYEAR', properties={}), Relationship(source=Node(id='Oakhaven University', type='College', properties={}), target=Node(id='Beckman Auditorium', type='Deparatment', properties={}), type='PARTOF', properties={})]\n", + "\n", + "Chloe Brown, a visionary with a deep interest in bioinformatics, driven to unlock the\n", + "secrets of the\n", + "[Node(id='Chloe Brown', type='Person', properties={'description': 'a visionary with a deep interest in bioinformatics, driven to unlock the secrets of the human genome through computational analysis'})]\n", + "[]\n", + "\n", + "Emily Davis, a name synonymous with cybersecurity innovation, turned that passion into a\n", + "thriving bu\n", + "[Node(id='Emily Davis', type='Person', properties={'description': 'a name synonymous with cybersecurity innovation'}), Node(id='Ironclad Security', type='Company', properties={'description': \"a company that's rapidly changing the landscape of cybersecurity solutions\"}), Node(id='2022', type='Year', properties={})]\n", + "[Relationship(source=Node(id='Emily Davis', type='Person', properties={}), target=Node(id='Ironclad Security', type='Company', properties={}), type='FOUNDED', properties={}), Relationship(source=Node(id='Emily Davis', type='Person', properties={}), target=Node(id='2022', type='Year', properties={}), type='FOUNDED', properties={})]\n", + "\n", + "Alice Johnson had a vision that extended far beyond the classroom. Driven by an insatiable\n", + "curiosity\n", + "[Node(id='Alice Johnson', type='Person', properties={'description': 'Driven by an insatiable curiosity about the potential of quantum mechanics, she founded Entangled Solutions, a company poised to revolutionize industries through the power of quantum technology.'}), Node(id='Entangled Solutions', type='Company', properties={'description': 'Entangled Solutions distinguishes itself by focusing on practical applications of quantum computing.'})]\n", + "[Relationship(source=Node(id='Alice Johnson', type='Person', properties={}), target=Node(id='Entangled Solutions', type='Company', properties={}), type='FOUNDED', properties={})]\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# @title Load the graph to Spanner Graph database\n", + "# Uncomment the line below, if you want to cleanup from\n", + "# previous iterations.\n", + "# BeWARE - THIS COULD REMOVE DATA FROM YOUR DATABASE !!!\n", + "# graph_store.cleanup()\n", + "\n", + "\n", + "for graph_document in graph_documents:\n", + " graph_store.add_graph_documents([graph_document])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "lMXvOpRbZdau", + "outputId": "26647456-2316-46e3-de43-cfc9845a1050" + }, + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Waiting for DDL operations to complete...\n", + "Insert nodes of type `College`...\n", + "Insert nodes of type `Deparatment`...\n", + "Insert nodes of type `Year`...\n", + "Insert nodes of type `Person`...\n", + "Insert edges of type `Person_ALUMNIOF_College`...\n", + "Insert edges of type `Person_STUDIEDINDEPARTMENT_Deparatment`...\n", + "Insert edges of type `Person_GRADUATEDINYEAR_Year`...\n", + "No schema change required...\n", + "Insert nodes of type `College`...\n", + "Insert nodes of type `Deparatment`...\n", + "Insert nodes of type `Person`...\n", + "Insert edges of type `Person_ALUMNIOF_College`...\n", + "Insert edges of type `Person_STUDIEDINDEPARTMENT_Deparatment`...\n", + "Waiting for DDL operations to complete...\n", + "Insert nodes of type `College`...\n", + "Insert nodes of type `Deparatment`...\n", + "Insert nodes of type `Year`...\n", + "Insert nodes of type `Person`...\n", + "Insert edges of type `Person_ALUMNIOF_College`...\n", + "Insert edges of type `Deparatment_PARTOF_College`...\n", + "Insert edges of type `Person_STUDIEDINDEPARTMENT_Deparatment`...\n", + "Insert edges of type `College_GRADUATEDINYEAR_Year`...\n", + "Insert edges of type `Person_GRADUATEDINYEAR_Year`...\n", + "Insert edges of type `College_PARTOF_Deparatment`...\n", + "No schema change required...\n", + "Insert nodes of type `Person`...\n", + "Waiting for DDL operations to complete...\n", + "Insert nodes of type `Person`...\n", + "Insert nodes of type `Company`...\n", + "Insert nodes of type `Year`...\n", + "Insert edges of type `Person_FOUNDED_Company`...\n", + "Insert edges of type `Person_FOUNDED_Year`...\n", + "No schema change required...\n", + "Insert nodes of type `Person`...\n", + "Insert nodes of type `Company`...\n", + "Insert edges of type `Person_FOUNDED_Company`...\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "### Initialize the Spanner Graph QA Chain\n", + "The Spanner Graph QA Chain takes two parameters, a SpannerGraphStore object and a language model." + ], + "metadata": { + "id": "qlKwtdGN7kaT" + } + }, + { + "cell_type": "code", + "source": [ + "from google.cloud import spanner\n", + "from langchain_google_vertexai import ChatVertexAI\n", + "from IPython.core.display import HTML\n", + "\n", + "# Initialize llm object\n", + "llm = ChatVertexAI(model=\"gemini-1.5-flash-002\", temperature=0)\n", + "\n", + "# Initialize GraphQAChain\n", + "chain = SpannerGraphQAChain.from_llm(\n", + " llm,\n", + " graph=graph_store,\n", + " allow_dangerous_requests=True,\n", + " verbose=True,\n", + " return_intermediate_steps=True,\n", + ")" + ], + "metadata": { + "id": "7yKDAD9s7t7O" + }, + "execution_count": 30, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# @title Run Spanner Graph QA Chain 1\n", + "question = \"Who are the alumni of the college id Veritas University ?\" # @param {type:\"string\"}\n", + "response = chain.invoke(\"query=\" + question)\n", + "response[\"result\"]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 264 + }, + "id": "ukKi9wtH_bF1", + "outputId": "61b66dcb-54cf-4620-a097-b4f0d732d1e3" + }, + "execution_count": 33, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new SpannerGraphQAChain chain...\u001b[0m\n", + "Executing gql:\n", + "\u001b[32;1m\u001b[1;3mGRAPH graph_demo_2\n", + "MATCH (p:Person)-[:ALUMNIOF]->(c:College {id: \"Veritas University\"})\n", + "RETURN p.id AS person_id, c.id AS college_id\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[{'person_id': 'Aisha Khan', 'college_id': 'Veritas University'}, {'person_id': 'Emily Davis', 'college_id': 'Veritas University'}, {'person_id': 'James Rodriguez', 'college_id': 'Veritas University'}, {'person_id': 'Michael Patel', 'college_id': 'Veritas University'}, {'person_id': 'Sarah Chen', 'college_id': 'Veritas University'}]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'Aisha Khan, Emily Davis, James Rodriguez, Michael Patel, and Sarah Chen are alumni of Veritas University.\\n'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 33 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# @title Run Spanner Graph QA Chain 2\n", + "question = \"List the companies, their founders and the college they attended.\" # @param {type:\"string\"}\n", + "response = chain.invoke(\"query=\" + question)\n", + "response[\"result\"]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 281 + }, + "outputId": "e47d9f63-6769-49bc-b3a3-412c10de5c8a", + "id": "lcBc4tG__7Rm" + }, + "execution_count": 34, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new SpannerGraphQAChain chain...\u001b[0m\n", + "Executing gql:\n", + "\u001b[32;1m\u001b[1;3mGRAPH graph_demo_2\n", + "MATCH (p:Person)-[:FOUNDED]->(c:Company), (p)-[:ALUMNIOF]->(cl:College)\n", + "RETURN c.id AS company_id, c.description AS company_description, p.id AS founder_id, p.description AS founder_description, cl.id AS college_id, cl.description AS college_description\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[{'company_id': 'Entangled Solutions', 'company_description': 'Entangled Solutions distinguishes itself by focusing on practical applications of quantum computing.', 'founder_id': 'Alice Johnson', 'founder_description': 'Driven by an insatiable curiosity about the potential of quantum mechanics, she founded Entangled Solutions, a company poised to revolutionize industries through the power of quantum technology.', 'college_id': 'Oakhaven University', 'college_description': 'Oakhaven university'}, {'company_id': 'Ironclad Security', 'company_description': \"a company that's rapidly changing the landscape of cybersecurity solutions\", 'founder_id': 'Emily Davis', 'founder_description': 'a name synonymous with cybersecurity innovation', 'college_id': 'Veritas University', 'college_description': 'grand halls'}]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'Entangled Solutions, founded by Alice Johnson who attended Oakhaven University, focuses on practical applications of quantum computing. Ironclad Security, founded by Emily Davis who attended Veritas University, is rapidly changing the landscape of cybersecurity solutions.\\n'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 34 + } + ] + }, + { + "cell_type": "code", + "source": [ + "# @title Run Spanner Graph QA Chain 3\n", + "question = \"Which companies were founded by alumni of college id Veritas University ? Who were the founders ?\" # @param {type:\"string\"}\n", + "response = chain.invoke(\"query=\" + question)\n", + "response[\"result\"]" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 264 + }, + "outputId": "cb40179e-bcec-4399-df9d-a114e02b33f9", + "id": "e6djmq1NAGOM" + }, + "execution_count": 35, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new SpannerGraphQAChain chain...\u001b[0m\n", + "Executing gql:\n", + "\u001b[32;1m\u001b[1;3mGRAPH graph_demo_2\n", + "MATCH (c:College {id: \"Veritas University\"})<-[:ALUMNIOF]-(p:Person)-[:FOUNDED]->(co:Company)\n", + "RETURN co.id AS company_id, co.description AS company_description, p.id AS founder_id, p.description AS founder_description\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[{'company_id': 'Ironclad Security', 'company_description': \"a company that's rapidly changing the landscape of cybersecurity solutions\", 'founder_id': 'Emily Davis', 'founder_description': 'a name synonymous with cybersecurity innovation'}]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "\"Ironclad Security, a company that's rapidly changing the landscape of cybersecurity solutions, was founded by Emily Davis, a name synonymous with cybersecurity innovation.\\n\"" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 35 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "#### Clean up the graph\n", + "\n", + "> USE IT WITH CAUTION!\n", + "\n", + "Clean up all the nodes/edges in your graph and remove your graph definition." + ], + "metadata": { + "id": "pM7TmfI0TEFy" + } + }, + { + "cell_type": "code", + "source": [ + "graph_store.cleanup()" + ], + "metadata": { + "id": "UQWq4-sITOgl" + }, + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/integration.cloudbuild.yaml b/integration.cloudbuild.yaml index 5e96b0e5..538325c4 100644 --- a/integration.cloudbuild.yaml +++ b/integration.cloudbuild.yaml @@ -33,7 +33,7 @@ steps: - "GOOGLE_DATABASE=${_GOOGLE_DATABASE}" - "PG_DATABASE=${_PG_DATABASE}" -timeout: "4800s" +timeout: "7200s" substitutions: _INSTANCE_ID: test-instance _GOOGLE_DATABASE: test-google-db diff --git a/pyproject.toml b/pyproject.toml index 86f2884c..b5c51315 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,8 @@ authors = [ dependencies = [ "langchain-core>=0.1.25, <1.0.0", "langchain-community>=0.0.18, <1.0.0", - "google-cloud-spanner>=3.41.0, <4.0.0" + "google-cloud-spanner>=3.41.0, <4.0.0", + "pydantic>=2.9.1, <3.0.0" ] classifiers = [ "Intended Audience :: Developers", @@ -41,7 +42,8 @@ test = [ "mypy==1.11.2", "pytest==8.3.3", "pytest-asyncio==0.24.0", - "pytest-cov==5.0.0" + "pytest-cov==5.0.0", + "langchain_google_vertexai==1.0.10" ] [build-system] diff --git a/requirements.txt b/requirements.txt index ec56696a..9e161792 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +google-cloud-spanner==3.49.1 langchain-core==0.3.9 langchain-community==0.3.1 -google-cloud-spanner==3.49.1 +pydantic==2.9.1 diff --git a/src/langchain_google_spanner/__init__.py b/src/langchain_google_spanner/__init__.py index fb19446e..28c2dd10 100644 --- a/src/langchain_google_spanner/__init__.py +++ b/src/langchain_google_spanner/__init__.py @@ -13,6 +13,7 @@ # limitations under the License. from langchain_google_spanner.chat_message_history import SpannerChatMessageHistory +from langchain_google_spanner.graph_qa import SpannerGraphQAChain from langchain_google_spanner.graph_store import SpannerGraphStore from langchain_google_spanner.vector_store import ( DistanceStrategy, @@ -32,6 +33,7 @@ "SpannerDocumentSaver", "SpannerLoader", "SpannerGraphStore", + "SpannerGraphQAChain", "TableColumn", "SecondaryIndex", "QueryParameters", diff --git a/src/langchain_google_spanner/graph_qa.py b/src/langchain_google_spanner/graph_qa.py new file mode 100644 index 00000000..ff399b47 --- /dev/null +++ b/src/langchain_google_spanner/graph_qa.py @@ -0,0 +1,387 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import re +from typing import Any, Dict, List, Optional + +from langchain.chains.base import Chain +from langchain_core.callbacks import CallbackManagerForChainRun +from langchain_core.language_models import BaseLanguageModel +from langchain_core.output_parsers import JsonOutputParser, StrOutputParser +from langchain_core.prompts import BasePromptTemplate +from langchain_core.prompts.prompt import PromptTemplate +from langchain_core.runnables import RunnableSequence +from pydantic.v1 import BaseModel, Field + +from langchain_google_spanner.graph_store import SpannerGraphStore + +from .prompts import ( + DEFAULT_GQL_FIX_TEMPLATE, + DEFAULT_GQL_TEMPLATE, + DEFAULT_GQL_VERIFY_TEMPLATE, + SPANNERGRAPH_QA_TEMPLATE, +) + +GQL_GENERATION_PROMPT = PromptTemplate( + template=DEFAULT_GQL_TEMPLATE, + input_variables=["question", "schema"], +) + + +class VerifyGqlOutput(BaseModel): + input_gql: str + made_change: bool + explanation: str + verified_gql: str + + +verify_gql_output_parser = JsonOutputParser(pydantic_object=VerifyGqlOutput) + +GQL_VERIFY_PROMPT = PromptTemplate( + template=DEFAULT_GQL_VERIFY_TEMPLATE, + input_variables=["question", "generated_gql", "graph_schema"], + partial_variables={ + "format_instructions": verify_gql_output_parser.get_format_instructions() + }, +) + +GQL_FIX_PROMPT = PromptTemplate( + template=DEFAULT_GQL_FIX_TEMPLATE, + input_variables=["question", "generated_gql", "err_msg", "schema"], +) + +SPANNERGRAPH_QA_PROMPT = PromptTemplate( + template=SPANNERGRAPH_QA_TEMPLATE, + input_variables=["question", "graph_schema", "graph_query", "context"], +) + +INTERMEDIATE_STEPS_KEY = "intermediate_steps" + + +def fix_gql_syntax(query: str) -> str: + """Fixes the syntax of a GQL query. + Example 1: + Input: + MATCH (p:paper {id: 0})-[c:cites*8]->(p2:paper) + Output: + MATCH (p:paper {id: 0})-[c:cites]->{8}(p2:paper) + Example 2: + Input: + MATCH (p:paper {id: 0})-[c:cites*1..8]->(p2:paper) + Output: + MATCH (p:paper {id: 0})-[c:cites]->{1:8}(p2:paper) + + Args: + query: The input GQL query. + + Returns: + Possibly modified GQL query. + """ + + query = re.sub(r"-\[(.*?):(\w+)\*(\d+)\.\.(\d+)\]->", r"-[\1:\2]->{\3,\4}", query) + query = re.sub(r"-\[(.*?):(\w+)\*(\d+)\]->", r"-[\1:\2]->{\3}", query) + query = re.sub(r"<-\[(.*?):(\w+)\*(\d+)\.\.(\d+)\]-", r"<-[\1:\2]-{\3,\4}", query) + query = re.sub(r"<-\[(.*?):(\w+)\*(\d+)\]-", r"<-[\1:\2]-{\3}", query) + query = re.sub(r"-\[(.*?):(\w+)\*(\d+)\.\.(\d+)\]-", r"-[\1:\2]-{\3,\4}", query) + query = re.sub(r"-\[(.*?):(\w+)\*(\d+)\]-", r"-[\1:\2]-{\3}", query) + return query + + +def extract_gql(text: str) -> str: + """Extract GQL query from a text. + + Args: + text: Text to extract GQL query from. + + Returns: + GQL query extracted from the text. + """ + pattern = r"```(.*?)```" + matches = re.findall(pattern, text, re.DOTALL) + query = matches[0] if matches else text + return fix_gql_syntax(query) + + +class SpannerGraphQAChain(Chain): + """Chain for question-answering against a Spanner Graph database by + generating GQL statements from natural language questions. + + *Security note*: Make sure that the database connection uses credentials + that are narrowly-scoped to only include necessary permissions. + Failure to do so may result in data corruption or loss, since the calling + code may attempt commands that would result in deletion, mutation + of data if appropriately prompted or reading sensitive data if such + data is present in the database. + The best way to guard against such negative outcomes is to (as + appropriate) + limit the permissions granted to the credentials used with this tool. + + See https://python.langchain.com/docs/security for more information. + """ + + graph: SpannerGraphStore = Field(exclude=True) + gql_generation_chain: RunnableSequence + gql_fix_chain: RunnableSequence + gql_verify_chain: RunnableSequence + qa_chain: RunnableSequence + max_gql_fix_retries: int = 1 + """ Number of retries to fix an errornous generated graph query.""" + top_k: int = 10 + """Restricts the number of results returned in the graph query.""" + return_intermediate_steps: bool = False + """Whether to return the intermediate steps along with the final answer.""" + verify_gql: bool = True + """Whether to have a stage in the chain to verify and fix the generated GQL.""" + input_key: str = "query" #: :meta private: + output_key: str = "result" #: :meta private: + allow_dangerous_requests: bool = False + """Forced user opt-in to acknowledge that the chain can make dangerous requests. + + *Security note*: Make sure that the database connection uses credentials + that are narrowly-scoped to only include necessary permissions. + Failure to do so may result in data corruption or loss, since the calling + code may attempt commands that would result in deletion, mutation + of data if appropriately prompted or reading sensitive data if such + data is present in the database. + The best way to guard against such negative outcomes is to (as appropriate) + limit the permissions granted to the credentials used with this tool. + + See https://python.langchain.com/docs/security for more information. + """ + + def __init__(self, **kwargs: Any) -> None: + """Initialize the chain.""" + super().__init__(**kwargs) + if not self.allow_dangerous_requests: + raise ValueError( + "In order to use this chain, you must acknowledge that it can make " + "dangerous requests by setting `allow_dangerous_requests` to `True`." + "You must narrowly scope the permissions of the database connection " + "to only include necessary permissions. Failure to do so may result " + "in data corruption or loss or reading sensitive data if such data is " + "present in the database. " + "Only use this chain if you understand the risks and have taken the " + "necessary precautions. " + "See https://python.langchain.com/docs/security for more information." + ) + + @property + def input_keys(self) -> List[str]: + """Input keys. + + :meta private: + """ + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + """Output keys. + + :meta private: + """ + return [self.output_key] + + @classmethod + def from_llm( + cls, + llm: Optional[BaseLanguageModel] = None, + *, + qa_prompt: Optional[BasePromptTemplate] = None, + gql_prompt: Optional[BasePromptTemplate] = None, + gql_verify_prompt: Optional[BasePromptTemplate] = None, + gql_fix_prompt: Optional[BasePromptTemplate] = None, + qa_llm_kwargs: Optional[Dict[str, Any]] = None, + gql_llm_kwargs: Optional[Dict[str, Any]] = None, + gql_verify_llm_kwargs: Optional[Dict[str, Any]] = None, + gql_fix_llm_kwargs: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> SpannerGraphQAChain: + """Initialize from LLM.""" + if not llm: + raise ValueError("`llm` parameter must be provided") + if gql_prompt and gql_llm_kwargs: + raise ValueError( + "Specifying gql_prompt and gql_llm_kwargs together is" + " not allowed. Please pass prompt via gql_llm_kwargs." + ) + if gql_fix_prompt and gql_fix_llm_kwargs: + raise ValueError( + "Specifying gql_fix_prompt and gql_fix_llm_kwargs together is" + " not allowed. Please pass prompt via gql_fix_llm_kwargs." + ) + if qa_prompt and qa_llm_kwargs: + raise ValueError( + "Specifying qa_prompt and qa_llm_kwargs together is" + " not allowed. Please pass prompt via qa_llm_kwargs." + ) + + use_qa_llm_kwargs = qa_llm_kwargs if qa_llm_kwargs is not None else {} + use_gql_llm_kwargs = gql_llm_kwargs if gql_llm_kwargs is not None else {} + use_gql_verify_llm_kwargs = ( + gql_verify_llm_kwargs if gql_verify_llm_kwargs is not None else {} + ) + use_gql_fix_llm_kwargs = ( + gql_fix_llm_kwargs if gql_fix_llm_kwargs is not None else {} + ) + + if "prompt" not in use_qa_llm_kwargs: + use_qa_llm_kwargs["prompt"] = ( + qa_prompt if qa_prompt is not None else SPANNERGRAPH_QA_PROMPT + ) + if "prompt" not in use_gql_llm_kwargs: + use_gql_llm_kwargs["prompt"] = ( + gql_prompt if gql_prompt is not None else GQL_GENERATION_PROMPT + ) + if "prompt" not in use_gql_verify_llm_kwargs: + use_gql_verify_llm_kwargs["prompt"] = ( + gql_verify_prompt + if gql_verify_prompt is not None + else GQL_VERIFY_PROMPT + ) + if "prompt" not in use_gql_fix_llm_kwargs: + use_gql_fix_llm_kwargs["prompt"] = ( + gql_fix_prompt if gql_fix_prompt is not None else GQL_FIX_PROMPT + ) + + gql_generation_chain = use_gql_llm_kwargs["prompt"] | llm | StrOutputParser() + gql_fix_chain = use_gql_fix_llm_kwargs["prompt"] | llm | StrOutputParser() + gql_verify_chain = ( + use_gql_verify_llm_kwargs["prompt"] | llm | verify_gql_output_parser + ) + qa_chain = use_qa_llm_kwargs["prompt"] | llm | StrOutputParser() + + return cls( + gql_generation_chain=gql_generation_chain, + gql_fix_chain=gql_fix_chain, + gql_verify_chain=gql_verify_chain, + qa_chain=qa_chain, + **kwargs, + ) + + def execute_query( + self, _run_manager: CallbackManagerForChainRun, gql_query: str + ) -> List[Any]: + try: + _run_manager.on_text("Executing gql:", end="\n", verbose=self.verbose) + _run_manager.on_text( + gql_query, color="green", end="\n", verbose=self.verbose + ) + return self.graph.query(gql_query)[: self.top_k] + except Exception as e: + raise ValueError(str(e)) + + def execute_with_retry( + self, + _run_manager: CallbackManagerForChainRun, + intermediate_steps: List, + question: str, + gql_query: str, + ) -> tuple[str, List[Any]]: + retries = 0 + while retries <= self.max_gql_fix_retries: + try: + intermediate_steps.append({"generated_query": gql_query}) + return gql_query, self.execute_query(_run_manager, gql_query) + except Exception as e: + err_msg = str(e) + self.log_invalid_query(_run_manager, gql_query, err_msg) + intermediate_steps.pop() + intermediate_steps.append({"query_failed_" + str(retries): gql_query}) + fix_chain_result = self.gql_fix_chain.invoke( + { + "question": question, + "err_msg": err_msg, + "generated_gql": gql_query, + "schema": self.graph.get_schema, + } + ) + gql_query = extract_gql(fix_chain_result) + finally: + retries += 1 + + raise ValueError("The generated gql query is invalid") + + def log_invalid_query( + self, + _run_manager: CallbackManagerForChainRun, + generated_gql: str, + err_msg: str, + ) -> None: + _run_manager.on_text("Invalid generated gql:", end="\n", verbose=self.verbose) + _run_manager.on_text(generated_gql, color="red", end="\n", verbose=self.verbose) + _run_manager.on_text( + "Query error: ", color="red", end="\n", verbose=self.verbose + ) + _run_manager.on_text(err_msg, color="red", end="\n", verbose=self.verbose) + + def _call( + self, + inputs: Dict[str, Any], + run_manager: Optional[CallbackManagerForChainRun] = None, + ) -> Dict[str, str]: + + intermediate_steps: List = [] + + """Generate gql statement, uses it to look up in db and answer question.""" + + _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() + question = inputs[self.input_key] + gen_response = self.gql_generation_chain.invoke( + {"question": question, "schema": self.graph.get_schema}, + ) + generated_gql = extract_gql(gen_response) + + if self.verify_gql: + verify_response = self.gql_verify_chain.invoke( + { + "question": question, + "generated_gql": generated_gql, + "graph_schema": self.graph.get_schema, + } + ) + verified_gql = fix_gql_syntax(verify_response["verified_gql"]) + intermediate_steps.append({"verified_gql": verified_gql}) + else: + verified_gql = generated_gql + + final_gql = "" + if verified_gql: + (final_gql, context) = self.execute_with_retry( + _run_manager, intermediate_steps, question, verified_gql + ) + if not final_gql: + raise ValueError("No GQL was generated.") + _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose) + _run_manager.on_text( + str(context), color="green", end="\n", verbose=self.verbose + ) + intermediate_steps.append({"context": context}) + else: + context = [] + + qa_result = self.qa_chain.invoke( + { + "question": question, + "graph_schema": self.graph.get_schema, + "graph_query": final_gql, + "context": str(context), + } + ) + chain_result: Dict[str, Any] = {self.output_key: qa_result} + if self.return_intermediate_steps: + chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps + + return chain_result diff --git a/src/langchain_google_spanner/graph_store.py b/src/langchain_google_spanner/graph_store.py index 20ec7466..d3e03a05 100644 --- a/src/langchain_google_spanner/graph_store.py +++ b/src/langchain_google_spanner/graph_store.py @@ -28,6 +28,7 @@ from .type_utils import TypeUtility MUTATION_BATCH_SIZE = 1000 +DEFAULT_DDL_TIMEOUT = 300 class NodeWrapper(object): @@ -834,7 +835,7 @@ def apply_ddls(self, ddls: List[str], options: Dict[str, Any] = {}) -> None: op = self.database.update_ddl(ddl_statements=ddls) print("Waiting for DDL operations to complete...") - return op.result(options.get("timeout", 60)) + return op.result(options.get("timeout", DEFAULT_DDL_TIMEOUT)) def insert_or_update( self, table: str, columns: List[str], values: List[List[Any]] diff --git a/src/langchain_google_spanner/prompts.py b/src/langchain_google_spanner/prompts.py new file mode 100644 index 00000000..29f82865 --- /dev/null +++ b/src/langchain_google_spanner/prompts.py @@ -0,0 +1,244 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +GQL_EXAMPLES = """ +The following query in backtick matches all persons in the graph FinGraph +whose birthday is before 1990-01-10 and +returns their name and birthday. +``` +GRAPH FinGraph +MATCH (p:Person WHERE p.birthday < '1990-01-10') +RETURN p.name as name, p.birthday as birthday; +``` + +The following query in backtick finds the owner of the account with the most +incoming transfers by chaining multiple graph linear statements together. +``` +GRAPH FinGraph +MATCH (:Account)-[:Transfers]->(account:Account) +RETURN account, COUNT(*) AS num_incoming_transfers +GROUP BY account +ORDER BY num_incoming_transfers DESC +LIMIT 1 + +NEXT + +MATCH (account:Account)<-[:Owns]-(owner:Person) +RETURN account.id AS account_id, owner.name AS owner_name, num_incoming_transfers; +``` + +The following query finds all the destination accounts one to three transfers +away from a source Account with id equal to 7. +``` +GRAPH FinGraph +MATCH (src:Account {{id: 7}})-[e:Transfers]->{{1, 3}}(dst:Account) +RETURN src.id AS src_account_id, dst.id AS dst_account_id; +``` +Carefully note the syntax in the example above for path quantification, +that it is `[e:Transfers]->{{1, 3}}` and NOT `[e:Transfers*1..3]->` +""" + +DEFAULT_GQL_TEMPLATE_PART0 = """ +Create an Spanner Graph GQL query for the question using the schema. +{gql_examples} +""" + +DEFAULT_GQL_TEMPLATE_PART1 = """ +Instructions: +Mention the name of the graph at the beginning. +Use only nodes and edge types, and properties included in the schema. +Do not use any node and edge type, or properties not included in the schema. +Always alias RETURN values. + +Question: {question} +Schema: {schema} + +Note: +Do not include any explanations or apologies. +Do not prefix query with `gql` +Do not include any backticks. +Start with GRAPH +Output only the query statement. +Do not output any query that tries to modify or delete data. +""" + +DEFAULT_GQL_TEMPLATE = ( + DEFAULT_GQL_TEMPLATE_PART0.format(gql_examples=GQL_EXAMPLES) + + DEFAULT_GQL_TEMPLATE_PART1 +) + +VERIFY_EXAMPLES = """ +Examples: +1. +question: Which movie has own the Oscar award in 1996? +generated_gql: + GRAPH moviedb + MATCH (m:movie)-[:own_award]->(a:award {{name:"Oscar", year:1996}}) + RETURN m.name + +graph_schema: +{{ +"Edges": {{ + "produced_by": "From movie nodes to producer nodes", + "acts": "From actor nodes to movie nodes", + "has_coacted_with": "From actor nodes to actor nodes", + "own_award": "From actor nodes to award nodes" + }} +}} + +The verified gql fixes the missing node 'actor' + MATCH (m:movie)<-[:acts]-(a:actor)-[:own_award]->(a:award {{name:"Oscar", year:1996}}) + RETURN m.name + +2. +question: Which movies have been produced by production house ABC Movies? +generated_gql: + GRAPH moviedb + MATCH (p:producer {{name:"ABC Movies"}})-[:produced_by]->(m:movie) + RETURN p.name + +graph_schema: +{{ +"Edges": {{ + "produced_by": "From movie nodes to producer nodes", + "acts": "From actor nodes to movie nodes", + "references": "From movie nodes to movie nodes", + "own_award": "From actor nodes to award nodes" + }} +}} + +The verified gql fixes the edge direction: + GRAPH moviedb + MATCH (p:producer {{name:"ABC Movies"}})<-[:produced_by]-(m:movie) + RETURN m.name + +3. +question: Which movie references the movie "XYZ" via at most 3 hops ? +graph_schema: +{{ +"Edges": {{ + "produced_by": "From movie nodes to producer nodes", + "acts": "From actor nodes to movie nodes", + "references": "From movie nodes to movie nodes", + "own_award": "From actor nodes to award nodes" + }} +}} + +generated_gql: + GRAPH moviedb + MATCH (m:movie)-[:references*1..3]->(:movie {{name="XYZ"}}) + RETURN m.name + +The path quantification syntax [:references*1..3] is wrong. +The verified gql fixes the path quantification syntax: + GRAPH moviedb + MATCH (m:movie)-[:references]->{{1, 3}}(:movie {{name="XYZ"}}) + RETURN m.name +""" + +DEFAULT_GQL_VERIFY_TEMPLATE_PART0 = """ +Given a natual language question, Spanner Graph GQL graph query and a graph schema, +validate the query. + +{verify_examples} +""" + +DEFAULT_GQL_VERIFY_TEMPLATE_PART1 = """ +Instructions: +Add missing nodes and edges in the query if required. +Fix the path quantification syntax if required. +Carefully check the syntax. +Fix the query if required. There could be more than one correction. +Optimize if possible. +Do not make changes if not required. +Think in steps. Add the explanation in the output. + +Question : {question} +Input gql: {generated_gql} +Schema: {graph_schema} + +{format_instructions} +""" + +DEFAULT_GQL_VERIFY_TEMPLATE = ( + DEFAULT_GQL_VERIFY_TEMPLATE_PART0.format(verify_examples=VERIFY_EXAMPLES) + + DEFAULT_GQL_VERIFY_TEMPLATE_PART1 +) + +DEFAULT_GQL_FIX_TEMPLATE_PART0 = """ +We generated a Spanner Graph GQL query to answer a natural language question. +Question: {question} +However the generated Spanner Graph GQL query is not valid. ``` +Input gql: {generated_gql} +``` +The error obtained when executing the query is +``` +{err_msg} +``` +Give me a correct version of the query. +Do not generate the same query as the input gql. +""" + +DEFAULT_GQL_FIX_TEMPLATE_PART1 = """ +Examples of correct query : +{gql_examples}""" + +DEFAULT_GQL_FIX_TEMPLATE_PART2 = """ +Instructions: +Mention the name of the graph at the beginning. +Use only nodes and edge types, and properties included in the schema. +Do not use any node and edge type, or properties not included in the schema. +Do not generate the same query as the input gql. +Schema: {schema} + +Note: +Do not include any explanations or apologies. +Do not prefix query with `gql` +Do not include any backticks. +Start with GRAPH +Output only the query statement. +Do not output any query that tries to modify or delete data. +""" + +DEFAULT_GQL_FIX_TEMPLATE = ( + DEFAULT_GQL_FIX_TEMPLATE_PART0 + + DEFAULT_GQL_FIX_TEMPLATE_PART1.format(gql_examples=GQL_EXAMPLES) + + DEFAULT_GQL_FIX_TEMPLATE_PART2 +) + +SPANNERGRAPH_QA_TEMPLATE = """ +You are a helpful AI assistant. +Create a human readable answer for the for the question. +You should only use the information provided in the context and not use your internal knowledge. +Don't add any information. +Here is an example: + +Question: Which funds own assets over 10M? +Context:[name:ABC Fund, name:Star fund]" +Helpful Answer: ABC Fund and Star fund have assets over 10M. + +Follow this example when generating answers. +If the provided information is empty, say that you don't know the answer. +You are given the following information: +- `Question`: the natural language question from the user +- `Graph Schema`: contains the schema of the graph database +- `Graph Query`: A Spanner Graph GQL query equivalent of the question from the user used to extract context from the graph database +- `Context`: The response from the graph database as context +Information: +Question: {question} +Graph Schema: {graph_schema} +Graph Query: {graph_query} +Context: {context} + +Helpful Answer:""" diff --git a/src/langchain_google_spanner/version.py b/src/langchain_google_spanner/version.py index 4e1c4749..09fd186f 100644 --- a/src/langchain_google_spanner/version.py +++ b/src/langchain_google_spanner/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.5.0" +__version__ = "0.6.0" diff --git a/tests/integration/test_spanner_graph_qa.py b/tests/integration/test_spanner_graph_qa.py new file mode 100644 index 00000000..8bac7b87 --- /dev/null +++ b/tests/integration/test_spanner_graph_qa.py @@ -0,0 +1,215 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import random +import string + +import pytest +from google.cloud import spanner +from langchain.evaluation import load_evaluator +from langchain_community.graphs.graph_document import GraphDocument, Node, Relationship +from langchain_core.documents import Document +from langchain_google_vertexai import ChatVertexAI, VertexAIEmbeddings + +from langchain_google_spanner.graph_qa import SpannerGraphQAChain +from langchain_google_spanner.graph_store import SpannerGraphStore + +project_id = os.environ["PROJECT_ID"] +instance_id = os.environ["INSTANCE_ID"] +database_id = os.environ["GOOGLE_DATABASE"] + + +def random_string(num_char=3): + return "".join(random.choice(string.ascii_letters) for _ in range(num_char)) + + +def get_llm(): + llm = ChatVertexAI( + model="gemini-1.5-flash-002", + temperature=0, + ) + return llm + + +def get_evaluator(): + return load_evaluator( + "embedding_distance", + embeddings=VertexAIEmbeddings(model_name="text-embedding-004"), + ) + + +def get_spanner_graph(): + suffix = random_string(num_char=3) + graph_name = "test_graph{}".format(suffix) + graph = SpannerGraphStore( + instance_id=instance_id, + database_id=database_id, + graph_name=graph_name, + client=spanner.Client(project=project_id), + ) + return graph + + +def load_data(graph: SpannerGraphStore): + type_suffix = "_" + random_string(num_char=3) + graph_documents = [ + GraphDocument( + nodes=[ + Node( + id="Elias Thorne", + type="Person" + type_suffix, + properties={ + "name": "Elias Thorne", + "description": "lived in the desert", + }, + ), + Node( + id="Zephyr", + type="Animal" + type_suffix, + properties={"name": "Zephyr", "description": "pet falcon"}, + ), + Node( + id="Elara", + type="Person" + type_suffix, + properties={ + "name": "Elara", + "description": "resided in the capital city", + }, + ), + Node(id="Desert", type="Location" + type_suffix, properties={}), + Node(id="Capital City", type="Location" + type_suffix, properties={}), + ], + relationships=[ + Relationship( + source=Node( + id="Elias Thorne", type="Person" + type_suffix, properties={} + ), + target=Node( + id="Desert", type="Location" + type_suffix, properties={} + ), + type="LivesIn", + properties={}, + ), + Relationship( + source=Node( + id="Elias Thorne", type="Person" + type_suffix, properties={} + ), + target=Node( + id="Zephyr", type="Animal" + type_suffix, properties={} + ), + type="Owns", + properties={}, + ), + Relationship( + source=Node(id="Elara", type="Person" + type_suffix, properties={}), + target=Node( + id="Capital City", type="Location" + type_suffix, properties={} + ), + type="LivesIn", + properties={}, + ), + Relationship( + source=Node( + id="Elias Thorne", type="Person" + type_suffix, properties={} + ), + target=Node(id="Elara", type="Person" + type_suffix, properties={}), + type="Sibling", + properties={}, + ), + ], + source=Document( + metadata={}, + page_content=( + "Elias Thorne lived in the desert. He was a skilled craftsman" + " who worked with sandstone. Elias had a pet falcon named" + " Zephyr. His sister, Elara, resided in the capital city and" + " ran a spice shop. They rarely met due to the distance." + ), + ), + ) + ] + graph.add_graph_documents(graph_documents) + graph.refresh_schema() + + +class TestSpannerGraphQAChain: + + @pytest.fixture(scope="module") + def setup_db_load_data(self): + graph = get_spanner_graph() + load_data(graph) + yield graph + # teardown + print(graph.get_schema) + graph.cleanup() + + @pytest.fixture + def chain(self, setup_db_load_data): + graph = setup_db_load_data + return SpannerGraphQAChain.from_llm( + get_llm(), + graph=graph, + verbose=True, + return_intermediate_steps=True, + allow_dangerous_requests=True, + ) + + @pytest.fixture + def chain_without_opt_in(self, setup_db_load_data): + graph = setup_db_load_data + return SpannerGraphQAChain.from_llm( + get_llm(), + graph=graph, + verbose=True, + return_intermediate_steps=True, + ) + + def test_spanner_graph_qa_chain_1(self, chain): + question = "Where does Elias Thorne's sibling live?" + response = chain.invoke("query=" + question) + print(response) + + answer = response["result"] + assert ( + get_evaluator().evaluate_strings( + prediction=answer, + reference="Elias Thorne's sibling lives in Capital City.\n", + )["score"] + < 0.1 + ) + + def test_spanner_graph_qa_chain_no_answer(self, chain): + question = "Where does Sarah's sibling live?" + response = chain.invoke("query=" + question) + print(response) + + answer = response["result"] + assert ( + get_evaluator().evaluate_strings( + prediction=answer, + reference="I don't know the answer.\n", + )["score"] + < 0.1 + ) + + def test_spanner_graph_qa_chain_without_opt_in(self, setup_db_load_data): + with pytest.raises(ValueError): + graph = setup_db_load_data + SpannerGraphQAChain.from_llm( + get_llm(), + graph=graph, + verbose=True, + return_intermediate_steps=True, + )