commitBlob · commitBlob · Sep 30, 2025 · Sep 30, 2025 · Sep 30, 2025
diff --git a/RAG/.gitignore b/RAG/.gitignore
@@ -0,0 +1,119 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+.chroma/*
+.chroma
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+# VS Code
+.vscode/
+*.code-workspace
+
+# PyCharm
+.idea/
+*.iml
+
+# Jupyter
+.ipynb_checkpoints/
+
+# macOS
+.DS_Store
+
+# Windows
+Thumbs.db
diff --git a/RAG/README.md b/RAG/README.md
@@ -0,0 +1,115 @@
+# Playground/RAG
+
+Part of the [playground](https://github.com/commitBlob/playground) repository collection.
+
+## LangGraph Agentic RAG
+
+An intelligent question-answering system built with LangGraph that combines vector store retrieval and web search capabilities with sophisticated answer validation. The system uses an agent-based approach to dynamically decide the best information source and verify the quality of responses.
+
+### Project Location
+
+This project is located in the `RAG` directory of the playground repository:
+```
+playground/
+└── RAG/                  # This project
+    ├── ingestion.py
+    ├── main.py
+    └── graph/
+        └── ...
+```
+
+## Features
+
+- **Smart Routing**: Automatically routes questions to either vector store or web search based on the question's content
+- **Document Relevance Grading**: Evaluates retrieved documents for relevance to the question
+- **Hallucination Detection**: Verifies that generated answers are grounded in the source documents
+- **Answer Quality Assessment**: Ensures generated responses actually answer the user's question
+- **Fallback Mechanisms**: Dynamically switches to web search when vector store results are insufficient
+- **Flexible Architecture**: Built with LangGraph for clear state management and workflow control
+
+## Architecture
+
+The system uses a state-based graph architecture with several key components:
+
+1. **Router**: Determines whether to use vector store or web search based on the question
+2. **Retriever**: Fetches relevant documents from Pinecone vector store
+3. **Document Grader**: Evaluates document relevance
+4. **Generator**: Creates answers based on retrieved documents
+5. **Answer Grader**: Validates answers for hallucinations and relevance
+6. **Web Search**: Provides additional information when needed using Tavily Search
+
+## Setup
+
+1. Clone the repository
+2. Install dependencies:
+   ```bash
+   pip install langchain langchainhub langchain-community langchain-tavily langchain-pinecone langgraph python-dotenv pytest
+   ```
+
+3. Create a `.env` file with the following keys:
+   ```
+   OPENAI_API_KEY=your_openai_key
+   PINECONE_API_KEY=your_pinecone_key
+   TAVILY_API_KEY=your_tavily_key
+   LANGSMITH_API_KEY=your_langsmith_key (optional)
+   LANGSMITH_TRACING=true (optional)
+   LANGSMITH_PROJECT_NAME=your_project_name (optional)
+
+   ```
+
+## Usage
+
+1. Run the ingestion script to populate the vector store:
+   ```bash
+   python ingestion.py
+   ```
+
+2. Run the main application:
+   ```bash
+   python main.py
+   ```
+
+Example query:
+```python
+from graph.graph import app
+
+result = app.invoke(input={"question": "What is agent memory?"})
+print(result)
+```
+
+## Project Structure
+
+```
+├── ingestion.py          # Document ingestion and vector store setup
+├── main.py              # Main application entry point
+├── graph/
+│   ├── chains/         # LangChain components
+│   │   ├── answer_grader.py
+│   │   ├── generation.py
+│   │   ├── hallucination_grader.py
+│   │   ├── retrieval_grader.py
+│   │   └── router.py
+│   ├── nodes/          # Graph nodes implementation
+│   │   ├── generate.py
+│   │   ├── grade_documents.py
+│   │   ├── retrieve.py
+│   │   └── web_search.py
+│   ├── tests/          # Test cases
+│   ├── graph.py        # Main graph definition
+│   └── state.py        # State management
+```
+
+## Testing
+
+Run the test suite:
+```bash
+pytest . -s -v
+```
+
+## Flow Visualization
+
+The system generates a visual representation of the workflow graph in `graph_output.png`:
+
+![LangGraph Workflow](graph_output.png)
+
+This visualization shows the complete flow of the question-answering system, including routing decisions, document retrieval, grading steps, and generation paths.
diff --git a/RAG/graph/__init__.py b/RAG/graph/__init__.py
diff --git a/RAG/graph/chains/__init__.py b/RAG/graph/chains/__init__.py
diff --git a/RAG/graph/chains/answer_grader.py b/RAG/graph/chains/answer_grader.py
@@ -0,0 +1,25 @@
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnableSequence
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, Field
+
+
+class GradeAnswer(BaseModel):
+    binary_score: bool = Field(
+        description="Answer addresses the question, 'yes' or 'no'"
+    )
+
+llm = ChatOpenAI(temperature=0)
+
+structured_llm_grader = llm.with_structured_output(GradeAnswer)
+
+system = """You are a grader assessing whether an answer addresses / resolves a question \n 
+     Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question."""
+answer_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", system),
+        ("human", "User question: \n\n {question} \n\n LLM generation: {generation}"),
+    ]
+)
+
+answer_grader: RunnableSequence = answer_prompt | structured_llm_grader
diff --git a/RAG/graph/chains/generation.py b/RAG/graph/chains/generation.py
@@ -0,0 +1,8 @@
+from langchain import hub
+from langchain_core.output_parsers import StrOutputParser
+from langchain_openai import ChatOpenAI
+
+llm = ChatOpenAI(temperature=0)
+prompt = hub.pull("rlm/rag-prompt")
+
+generation_chain = prompt | llm | StrOutputParser()
diff --git a/RAG/graph/chains/hallucination_grader.py b/RAG/graph/chains/hallucination_grader.py
@@ -0,0 +1,24 @@
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnableSequence
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, Field
+
+llm = ChatOpenAI(temperature=0)
+
+class GradeHallucinations(BaseModel):
+    """Binary score for hallucination prensent in generation answer."""
+
+    binary_score: bool = Field(description="Answer is grounded in the facts, 'yes' or 'no'")
+
+structured_llm_grader = llm.with_structured_output(GradeHallucinations)
+
+system = """You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
+     Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts."""
+hallucination_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", system),
+        ("human", "Set of facts: \n\n {documents} \n\n LLM generation: {generation}"),
+    ]
+)
+
+hallucination_grader: RunnableSequence = hallucination_prompt | structured_llm_grader
diff --git a/RAG/graph/chains/retrieval_grader.py b/RAG/graph/chains/retrieval_grader.py
@@ -0,0 +1,25 @@
+from langchain_core.prompts import ChatPromptTemplate
+from pydantic import BaseModel, Field
+from langchain_openai import ChatOpenAI
+
+llm = ChatOpenAI(temperature=0)
+
+class GradeDocuments(BaseModel):
+    """Binary score for rlevance check on retrieved documents"""
+
+    binary_score: str = Field(description="Document are relevant to the question, 'yes' or 'no'")
+
+structured_llm_grader = llm.with_structured_output(GradeDocuments)
+
+system = """You are a grader assessing relevance of a retrieved document to a user question. \n 
+    If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. \n
+    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question."""
+
+grade_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", system),
+        ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
+    ]
+)
+
+retrieval_grader = grade_prompt | structured_llm_grader
diff --git a/RAG/graph/chains/router.py b/RAG/graph/chains/router.py
@@ -0,0 +1,29 @@
+from typing import Literal
+
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, Field
+
+
+class RouteQuery(BaseModel):
+    """Route a user query to the most relevant datasource."""
+
+    datasource: Literal["vectorstore", "websearch"] = Field(
+        ...,
+        description="Given a user question choose to route it to web search or a vectorstore.",
+    )
+
+llm = ChatOpenAI(temperature=0)
+structured_llm_router = llm.with_structured_output(RouteQuery)
+
+system = """You are an expert at routing a user question to a vectorstore or web search.
+The vectorstore contains documents related to agents, prompt engineering, and adversarial attacks.
+Use the vectorstore for questions on these topics. For all else, use web-search."""
+route_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", system),
+        ("human", "{question}"),
+    ]
+)
+
+question_router = route_prompt | structured_llm_router
diff --git a/RAG/graph/consts.py b/RAG/graph/consts.py
@@ -0,0 +1,4 @@
+RETRIEVE = "retrieve"
+GRADE_DOCUMENTS = "grade_documents"
+GENERATE = "generate"
+WEBSEARCH = "websearch"