diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 3a19de41..4c9a9b6e 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -29,11 +29,18 @@
 			"extensions": [
 				"ms-python.python",
 				"ms-python.vscode-pylance",
+				"ms-python.vscode-python-envs",
 				"charliermarsh.ruff",
 				"mtxr.sqltools",
 				"mtxr.sqltools-driver-pg",
+				"esbenp.prettier-vscode",
+				"mechatroner.rainbow-csv",
 				"ms-vscode.vscode-node-azure-pack",
-                "esbenp.prettier-vscode"
+				"esbenp.prettier-vscode",
+				"twixes.pypi-assistant",
+				"ms-python.vscode-python-envs",
+				"teamsdevapp.vscode-ai-foundry",
+				"ms-windows-ai-studio.windows-ai-studio"
 			],
 			// Set *default* container specific settings.json values on container create.
 			"settings": {
diff --git a/.env.sample b/.env.sample
index ee58dcf2..44517fd1 100644
--- a/.env.sample
+++ b/.env.sample
@@ -37,8 +37,7 @@ OLLAMA_EMBED_MODEL=nomic-embed-text
 OLLAMA_EMBEDDING_COLUMN=embedding_nomic
 # Needed for GitHub Models:
 GITHUB_TOKEN=YOUR-GITHUB-TOKEN
-GITHUB_BASE_URL=https://models.inference.ai.azure.com
-GITHUB_MODEL=gpt-4o
-GITHUB_EMBED_MODEL=text-embedding-3-large
+GITHUB_MODEL=openai/gpt-4o
+GITHUB_EMBED_MODEL=openai/text-embedding-3-large
 GITHUB_EMBED_DIMENSIONS=1024
 GITHUB_EMBEDDING_COLUMN=embedding_3l
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
new file mode 100644
index 00000000..ecef0bfa
--- /dev/null
+++ b/.github/copilot-instructions.md
@@ -0,0 +1,316 @@
+# RAG on PostgreSQL - Development Instructions
+
+**ALWAYS FOLLOW THESE INSTRUCTIONS FIRST**. Only search for additional information or use bash commands when the instructions below are incomplete or found to be in error.
+
+## Overview
+
+RAG on PostgreSQL is a Python FastAPI backend with React TypeScript frontend that provides a web-based chat application using OpenAI models to answer questions about data in a PostgreSQL database with pgvector extension. The application is designed for Azure deployment via Azure Developer CLI (azd).
+
+## Required Tools and Dependencies
+
+Install the following tools before beginning development:
+
+- **Python 3.10+** (3.12 recommended)
+- **Node.js 18+** for frontend development  
+- **PostgreSQL 14+** with pgvector extension
+- **Azure Developer CLI (azd)** for deployment
+- **Docker Desktop** for dev containers (optional)
+- **Git** for version control
+
+## Development Environment Setup
+
+### Bootstrap the Development Environment
+
+Run these commands in sequence. NEVER CANCEL any long-running commands:
+
+1. **Install Python dependencies** (takes ~90 seconds):
+   ```bash
+   python3 -m pip install -r requirements-dev.txt
+   ```
+
+2. **Install backend package in editable mode** (takes ~5 seconds):
+   ```bash
+   python3 -m pip install -e src/backend
+   ```
+
+3. **Install PostgreSQL and pgvector extension**:
+   ```bash
+   # Ubuntu/Debian:
+   sudo apt update && sudo apt install -y postgresql-16-pgvector
+   
+   # Start PostgreSQL and set password
+   sudo service postgresql start
+   sudo -u postgres psql -c "ALTER USER postgres PASSWORD 'postgres'"
+   ```
+
+4. **Configure environment file**:
+   ```bash
+   cp .env.sample .env
+   ```
+   Edit `.env` to set `POSTGRES_USERNAME=postgres` and `POSTGRES_PASSWORD=postgres`.
+
+5. **Set up database and seed data** (takes ~2 seconds each):
+   ```bash
+   python ./src/backend/fastapi_app/setup_postgres_database.py
+   python ./src/backend/fastapi_app/setup_postgres_seeddata.py
+   ```
+
+6. **Install frontend dependencies** (takes ~22 seconds):
+   ```bash
+   cd src/frontend
+   npm install
+   cd ../../
+   ```
+
+7. **Build frontend** (takes ~12 seconds):
+   ```bash
+   cd src/frontend
+   npm run build
+   cd ../../
+   ```
+
+8. **Install pre-commit hooks**:
+   ```bash
+   pre-commit install
+   ```
+
+## Running the Application
+
+### Backend Server
+```bash
+python -m uvicorn fastapi_app:create_app --factory --reload
+```
+Serves at `http://localhost:8000` with built frontend included.
+
+### Frontend Development Server (with hot reloading)
+```bash
+cd src/frontend
+npm run dev
+```
+Serves at `http://localhost:5173/` with hot reloading for development.
+
+### Both via VS Code
+Use "Frontend & Backend" configuration in the VS Code Run & Debug menu.
+
+## Code Quality and Testing
+
+### Linting and Formatting (ALWAYS run before committing)
+```bash
+ruff check .          # Lint code (takes <1 second)
+ruff format .          # Format code (takes <1 second)  
+mypy . --python-version 3.12  # Type check (takes ~42 seconds)
+```
+
+**NOTE**: MyPy may show 1 minor import error in `evals/safety_evaluation.py` which is expected and safe to ignore.
+
+### Testing (NEVER CANCEL - full test suite takes ~25 seconds)
+```bash
+pytest -s -vv --cov --cov-fail-under=85
+```
+
+**CRITICAL**: Some tests may fail with database connection issues if using different PostgreSQL credentials. This is expected in fresh environments and does not indicate broken functionality.
+
+### End-to-End Testing with Playwright (NEVER CANCEL - takes 2+ minutes)
+```bash
+playwright install chromium --with-deps
+pytest tests/e2e.py --tracing=retain-on-failure
+```
+
+## Build Times and Timeout Requirements
+
+**CRITICAL TIMING INFORMATION** - Set these timeout values and NEVER CANCEL:
+
+- **Dependencies install**: 90 seconds (use 180+ second timeout)
+- **Frontend npm install**: 22 seconds (use 60+ second timeout)  
+- **Frontend build**: 12 seconds (use 30+ second timeout)
+- **MyPy type checking**: 42 seconds (use 90+ second timeout)
+- **Full test suite**: 25 seconds (use 60+ second timeout)
+- **Playwright E2E tests**: 2+ minutes (use 300+ second timeout)
+
+## Manual Validation After Changes
+
+**ALWAYS perform these validation steps after making code changes:**
+
+1. **Lint and format code**:
+   ```bash
+   ruff check . && ruff format .
+   ```
+
+2. **Type check (if Python changes)**:
+   ```bash
+   mypy . --python-version 3.12
+   ```
+
+3. **Run relevant tests**:
+   ```bash
+   pytest tests/test_<relevant_module>.py -v
+   ```
+
+4. **Test application end-to-end**:
+   ```bash
+   # Start server
+   python -m uvicorn fastapi_app:create_app --factory --reload
+   ```
+   Then in another terminal:
+   ```bash
+   # Test API endpoints
+   curl http://localhost:8000/items/1
+   # Should return JSON with item data
+   
+   # Test frontend
+   curl http://localhost:8000/ | head -n 5
+   # Should return HTML with "RAG on PostgreSQL" title
+   ```
+
+5. **Test frontend build**:
+   ```bash
+   cd src/frontend && npm run build
+   ```
+
+6. **Functional testing scenarios**:
+   - Open `http://localhost:8000/` in browser
+   - Verify the "Product chat" interface loads with example questions
+   - Click an example question (will show Azure auth error in local dev - this is expected)
+   - Verify the frontend UI is responsive and properly styled
+
+## Key Project Structure
+
+### Backend (`src/backend/fastapi_app/`)
+- `__init__.py` - FastAPI app factory
+- `api_models.py` - Pydantic models for API
+- `postgres_engine.py` - Database connection setup
+- `postgres_searcher.py` - Vector and text search logic
+- `rag_simple.py`, `rag_advanced.py` - RAG implementations
+- `routes/api_routes.py` - API endpoints
+- `routes/frontend_routes.py` - Static file serving
+
+### Frontend (`src/frontend/`)
+- React TypeScript app with FluentUI components
+- Vite build system
+- Built files output to `src/backend/static/`
+
+### Infrastructure (`infra/`)
+- Bicep templates for Azure deployment
+- `main.bicep` - Main infrastructure definition
+
+### Configuration Files
+- `pyproject.toml` - Python project config (ruff, mypy, pytest)
+- `requirements-dev.txt` - Development dependencies
+- `azure.yaml` - Azure Developer CLI configuration
+- `.env.sample` - Environment variable template
+
+## Azure Deployment
+
+**Deploy to Azure using azd** (NEVER CANCEL - can take 10+ minutes):
+```bash
+azd auth login
+azd env new
+azd up
+```
+
+**Get deployment values**:
+```bash
+azd env get-values
+```
+
+## OpenAI Configuration Options
+
+The application supports multiple OpenAI providers:
+
+1. **Azure OpenAI** (recommended for production):
+   Set `OPENAI_CHAT_HOST=azure` and `OPENAI_EMBED_HOST=azure`
+
+2. **OpenAI.com**:
+   Set `OPENAI_CHAT_HOST=openai` and `OPENAI_EMBED_HOST=openai`
+
+3. **Ollama** (local):
+   Set `OPENAI_CHAT_HOST=ollama`
+
+4. **GitHub Models**:
+   Set `OPENAI_CHAT_HOST=github`
+
+## Common Issues and Solutions
+
+### Database Connection Issues
+- Verify PostgreSQL is running: `sudo service postgresql status`
+- Check `.env` file has correct `POSTGRES_USERNAME` and `POSTGRES_PASSWORD`
+- Ensure pgvector extension is installed: `sudo apt install postgresql-16-pgvector`
+
+### Frontend Build Issues
+- Clear npm cache: `cd src/frontend && npm cache clean --force`
+- Delete node_modules and reinstall: `rm -rf node_modules && npm install`
+
+### Azure Authentication in Local Development
+- Expected behavior: Chat queries will show "Azure Developer CLI could not be found" error
+- This is normal for local development without Azure OpenAI configured
+- Core application functionality (database, API endpoints, frontend) works correctly
+- For full chat functionality, configure Azure OpenAI or use OpenAI.com API key
+
+### CI/CD Pipeline Requirements
+The GitHub Actions require:
+- Python 3.10+ with specific versions (3.10, 3.11, 3.12)
+- PostgreSQL with pgvector extension
+- Node.js 18+
+- All code passes `ruff check`, `ruff format --check`, and `mypy`
+
+## Load Testing
+
+Use locust for load testing:
+```bash
+python -m pip install locust  # if not already installed
+locust
+```
+Open `http://localhost:8089/` and point to your running application.
+
+## Available API Endpoints
+
+The application provides these REST API endpoints (view full docs at `http://localhost:8000/docs`):
+
+- `GET /items/{id}` - Get specific item by ID
+- `GET /search` - Search items with text query 
+- `GET /similar` - Find similar items using vector search
+- `POST /chat` - Chat with RAG system (requires OpenAI configuration)
+- `POST /chat/stream` - Streaming chat responses
+
+Example API usage:
+```bash
+# Get item details
+curl http://localhost:8000/items/1
+
+# Search for tent-related items (requires OpenAI for embeddings)
+curl "http://localhost:8000/search?query=tent&limit=5"
+```
+
+## Directory Reference
+
+**Quick ls -la output for repository root:**
+```
+.devcontainer/          # Dev container configuration
+.env.sample            # Environment variables template  
+.github/               # GitHub Actions workflows
+.gitignore            # Git ignore patterns
+.pre-commit-config.yaml # Pre-commit hook configuration
+CONTRIBUTING.md       # Contribution guidelines
+README.md            # Main project documentation
+azure.yaml          # Azure Developer CLI configuration
+docs/               # Additional documentation
+evals/             # Evaluation scripts
+infra/            # Azure infrastructure templates
+locustfile.py    # Load testing configuration
+pyproject.toml  # Python project configuration
+requirements-dev.txt # Development dependencies
+scripts/        # Database and deployment scripts
+src/           # Source code (backend/ and frontend/)
+tests/        # Test suite
+```
+
+## Working Effectively
+
+- **Always build and test locally before committing**
+- **Use pre-commit hooks** - they run ruff automatically
+- **Check the GitHub Actions** in `.github/workflows/` for CI requirements
+- **Reference the full README.md** for deployment and Azure-specific details  
+- **Use VS Code with the Python and Ruff extensions** for the best development experience
+- **Never skip the frontend build** - the backend serves static files from `src/backend/static/`
+
+This project follows modern Python and TypeScript development practices with comprehensive tooling for code quality, testing, and deployment.
\ No newline at end of file
diff --git a/.github/workflows/app-tests.yaml b/.github/workflows/app-tests.yaml
index be7336e2..acf59b6d 100644
--- a/.github/workflows/app-tests.yaml
+++ b/.github/workflows/app-tests.yaml
@@ -28,10 +28,8 @@ jobs:
       fail-fast: false
       matrix:
         os: ["ubuntu-latest", "macos-latest-xlarge", "macos-13", "windows-latest"]
-        python_version: ["3.9", "3.10", "3.11", "3.12"]
+        python_version: ["3.10", "3.11", "3.12"]
         exclude:
-          - os: macos-latest-xlarge
-            python_version: "3.9"
           - os: macos-latest-xlarge
             python_version: "3.10"
     env:
@@ -61,12 +59,12 @@ jobs:
           run: |
               call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
               cd %TEMP%
-              git clone --branch v0.7.4 https://github.com/pgvector/pgvector.git
+              git clone --branch v0.8.1 https://github.com/pgvector/pgvector.git
               cd pgvector
               nmake /NOLOGO /F Makefile.win
               nmake /NOLOGO /F Makefile.win install
-              sc config postgresql-x64-14 start=auto
-              net start postgresql-x64-14
+              sc config postgresql-x64-17 start=auto
+              net start postgresql-x64-17
               "%PGBIN%/psql" -d postgres -c "CREATE EXTENSION vector"
 
         - name: (Linux) Install pgvector and set password
@@ -79,13 +77,13 @@ jobs:
             sudo -u postgres psql -c 'CREATE EXTENSION vector'
 
         - name: Setup python
-          uses: actions/setup-python@v5
+          uses: actions/setup-python@v6
           with:
             python-version: ${{ matrix.python_version }}
             architecture: x64
 
         - name: Install uv
-          uses: astral-sh/setup-uv@v5
+          uses: astral-sh/setup-uv@v6
           with:
             enable-cache: true
             version: "0.4.20"
@@ -106,7 +104,7 @@ jobs:
             python ./src/backend/fastapi_app/setup_postgres_seeddata.py
 
         - name: Setup node
-          uses: actions/setup-node@v4
+          uses: actions/setup-node@v5
           with:
             node-version: 18
 
@@ -117,13 +115,13 @@ jobs:
             npm run build
 
         - name: Setup mypy cache
-          uses: actions/cache@5a3ec84eff668545956fd18022155c47e93e2684 # v4.2.3
+          uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4.3.0
           with:
             path: ./.mypy_cache
             key: mypy${{ matrix.os }}-${{ matrix.python_version }}-${{ hashFiles('requirements-dev.txt', 'src/backend/requirements.txt', 'src/backend/pyproject.toml') }}
 
         - name: Run MyPy
-          run: python3 -m mypy .
+          run: python3 -m mypy . --python-version ${{ matrix.python_version }}
 
         - name: Run Pytest
           run: python3 -m pytest -s -vv --cov --cov-fail-under=85
diff --git a/.github/workflows/azure-dev.yaml b/.github/workflows/azure-dev.yaml
index 46c1c9eb..572d5e48 100644
--- a/.github/workflows/azure-dev.yaml
+++ b/.github/workflows/azure-dev.yaml
@@ -25,16 +25,31 @@ jobs:
       AZURE_ENV_NAME: ${{ vars.AZURE_ENV_NAME }}
       AZURE_LOCATION: ${{ vars.AZURE_LOCATION }}
       # project specific
+      SERVICE_WEB_RESOURCE_EXISTS: ${{ vars.SERVICE_WEB_RESOURCE_EXISTS }}
+      AZURE_OPENAI_LOCATION: ${{ vars.AZURE_OPENAI_LOCATION }}
       DEPLOY_AZURE_OPENAI: ${{ vars.DEPLOY_AZURE_OPENAI }}
+      AZURE_OPENAI_KEY: ${{ secrets.AZURE_OPENAI_KEY }}
+      AZURE_OPENAI_ENDPOINT: ${{ vars.AZURE_OPENAI_ENDPOINT }}
+      OPENAI_CHAT_HOST: ${{ vars.OPENAI_CHAT_HOST }}
       AZURE_OPENAI_CHAT_MODEL: ${{ vars.AZURE_OPENAI_CHAT_MODEL }}
       AZURE_OPENAI_CHAT_DEPLOYMENT: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT }}
       AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT_VERSION }}
+      AZURE_OPENAI_CHAT_DEPLOYMENT_SKU: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT_SKU }}
       AZURE_OPENAI_CHAT_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_CHAT_DEPLOYMENT_CAPACITY }}
+      DEPLOY_EVAL_MODEL: ${{ vars.DEPLOY_EVAL_MODEL }}
+      AZURE_OPENAI_EVAL_MODEL: ${{ vars.AZURE_OPENAI_EVAL_MODEL }}
+      AZURE_OPENAI_EVAL_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT }}
+      AZURE_OPENAI_EVAL_DEPLOYMENT_VERSION: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT_VERSION }}
+      AZURE_OPENAI_EVAL_DEPLOYMENT_SKU: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT_SKU }}
+      AZURE_OPENAI_EVAL_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_EVAL_DEPLOYMENT_CAPACITY }}
+      OPENAI_EMBED_HOST: ${{ vars.OPENAI_EMBED_HOST }}
       AZURE_OPENAI_EMBED_MODEL: ${{ vars.AZURE_OPENAI_EMBED_MODEL }}
       AZURE_OPENAI_EMBED_DEPLOYMENT: ${{ vars.AZURE_OPENAI_EMBED_DEPLOYMENT }}
       AZURE_OPENAI_EMBED_DEPLOYMENT_VERSION: ${{ vars.AZURE_OPENAI_EMBED_DEPLOYMENT_VERSION }}
+      AZURE_OPENAI_EMBED_DEPLOYMENT_SKU: ${{ vars.AZURE_OPENAI_EMBED_DEPLOYMENT_SKU }}
       AZURE_OPENAI_EMBED_DEPLOYMENT_CAPACITY: ${{ vars.AZURE_OPENAI_EMBED_DEPLOYMENT_CAPACITY }}
       AZURE_OPENAI_EMBED_DIMENSIONS: ${{ vars.AZURE_OPENAI_EMBED_DIMENSIONS }}
+      OPENAICOM_KEY: ${{ secrets.OPENAICOM_KEY }}
       USE_AI_PROJECT: ${{ vars.USE_AI_PROJECT }}
     steps:
       - name: Checkout
@@ -44,12 +59,11 @@ jobs:
         uses: Azure/setup-azd@v2.1.0
 
       - name: Install Nodejs
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v5
         with:
           node-version: 20
 
       - name: Log in with Azure (Federated Credentials)
-        if: ${{ env.AZURE_CLIENT_ID != '' }}
         run: |
           azd auth login `
             --client-id "$Env:AZURE_CLIENT_ID" `
diff --git a/.github/workflows/evaluate.yaml b/.github/workflows/evaluate.yaml
index 255af56f..b5c77c4b 100644
--- a/.github/workflows/evaluate.yaml
+++ b/.github/workflows/evaluate.yaml
@@ -77,12 +77,12 @@ jobs:
         run: sudo -u postgres psql -c 'CREATE EXTENSION vector'
 
       - name: Install python
-        uses: actions/setup-python@v5
+        uses: actions/setup-python@v6
         with:
           python-version: '3.12'
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v5
+        uses: astral-sh/setup-uv@v6
         with:
           enable-cache: true
           version: "0.4.20"
@@ -91,21 +91,7 @@ jobs:
       - name: Install azd
         uses: Azure/setup-azd@v2.1.0
 
-      - name: Login to Azure
-        uses: azure/login@v2
-        with:
-          client-id: ${{ env.AZURE_CLIENT_ID }}
-          tenant-id: ${{ env.AZURE_TENANT_ID }}
-          subscription-id: ${{ env.AZURE_SUBSCRIPTION_ID }}
-
-      - name: Set az account
-        uses: azure/CLI@v2
-        with:
-          inlineScript: |
-            az account set --subscription ${{env.AZURE_SUBSCRIPTION_ID}}
-
       - name: Log in with Azure (Federated Credentials)
-        if: ${{ env.AZURE_CLIENT_ID != '' }}
         run: |
           azd auth login `
             --client-id "$Env:AZURE_CLIENT_ID" `
@@ -128,7 +114,7 @@ jobs:
           python ./src/backend/fastapi_app/setup_postgres_seeddata.py
 
       - name: Setup node
-        uses: actions/setup-node@v4
+        uses: actions/setup-node@v5
         with:
           node-version: 18
 
diff --git a/.github/workflows/python-code-quality.yaml b/.github/workflows/python-code-quality.yaml
index fce62c6f..bbf9d008 100644
--- a/.github/workflows/python-code-quality.yaml
+++ b/.github/workflows/python-code-quality.yaml
@@ -22,7 +22,7 @@ jobs:
     steps:
         - uses: actions/checkout@v4
         - name: Set up Python 3
-          uses: actions/setup-python@v5
+          uses: actions/setup-python@v6
           with:
             python-version: "3.12"
             cache: 'pip'
diff --git a/.gitignore b/.gitignore
index 0381fcc3..01d8adf3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -149,3 +149,6 @@ static/
 
 # Playwright test trace
 test-results/
+
+# Test virtual environments
+test_venv*/
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 4c233e69..d6c07eaf 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -21,6 +21,14 @@
         "module": "uvicorn",
         "args": ["fastapi_app:create_app", "--factory", "--reload"],
         "justMyCode": false
+    },
+    {
+        "name": "Python: Current File",
+        "type": "debugpy",
+        "request": "launch",
+        "program": "${file}",
+        "console": "integratedTerminal",
+        "justMyCode": false
     }
   ],
   "compounds": [
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 00000000..ae38cbf5
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,10 @@
+# Instructions for coding agents
+
+## Adding new azd environment variables
+
+An azd environment variable is stored by the azd CLI for each environment. It is passed to the "azd up" command and can configure both provisioning options and application settings.
+When adding new azd environment variables, update:
+
+1. infra/main.parameters.json : Add the new parameter with a Bicep-friendly variable name and map to the new environment variable
+1. infra/main.bicep: Add the new Bicep parameter at the top, and add it to the `webAppEnv` object
+1. .github/workflows/azure-dev.yml: Add the new environment variable under `env` section. If it's a @secure variable in main.bicep, it should come from `secrets`, otherwise from `vars`.
diff --git a/README.md b/README.md
index 53635a83..0415ca58 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ A related option is VS Code Dev Containers, which will open the project in your
 
     * [Azure Developer CLI (azd)](https://aka.ms/install-azd)
     * [Node.js 18+](https://nodejs.org/download/)
-    * [Python 3.9+](https://www.python.org/downloads/)
+    * [Python 3.10+](https://www.python.org/downloads/)
     * [PostgreSQL 14+](https://www.postgresql.org/download/)
     * [pgvector](https://github.com/pgvector/pgvector)
     * [Docker Desktop](https://www.docker.com/products/docker-desktop/)
diff --git a/docs/images/redteam_dashboard.png b/docs/images/redteam_dashboard.png
new file mode 100644
index 00000000..639f90b4
Binary files /dev/null and b/docs/images/redteam_dashboard.png differ
diff --git a/docs/images/redteam_logs.png b/docs/images/redteam_logs.png
new file mode 100644
index 00000000..91422927
Binary files /dev/null and b/docs/images/redteam_logs.png differ
diff --git a/docs/safety_evaluation.md b/docs/safety_evaluation.md
index 0535acf1..a9dc116e 100644
--- a/docs/safety_evaluation.md
+++ b/docs/safety_evaluation.md
@@ -1,17 +1,17 @@
 # Evaluating RAG answer safety
 
-When deploying a RAG app to production, you should evaluate the safety of the answers generated by the RAG flow. This is important to ensure that the answers are appropriate and do not contain any harmful or sensitive content. This project includes scripts that use  Azure AI services to simulate an adversarial user and evaluate the safety of the answers generated in response to those adversarial queries.
+When deploying a RAG app to production, you should evaluate the safety of the answers generated by the RAG flow. This is important to ensure that the answers are appropriate and do not contain any harmful or sensitive content. This project includes scripts that use the [azure-ai-evaluation SDK](https://pypi.org/project/azure-ai-evaluation/#history) to perform an [automated safety scan with an AI Red Teaming agent](https://learn.microsoft.com/azure/ai-foundry/how-to/develop/run-scans-ai-red-teaming-agent).
 
 * [Deploy an Azure AI project](#deploy-an-azure-ai-project)
 * [Setup the evaluation environment](#setup-the-evaluation-environment)
-* [Simulate and evaluate adversarial users](#simulate-and-evaluate-adversarial-users)
-* [Review the safety evaluation results](#review-the-safety-evaluation-results)
+* [Run the red teaming agent](#run-the-red-teaming-agent)
+* [Review the red teaming results](#review-the-red-teaming-results)
 
 ## Deploy an Azure AI project
 
-In order to use the adversarial simulator and safety evaluators, you need an Azure AI project inside an Azure AI Hub.
+In order to use the Red Teaming agent, you need an Azure AI project inside Azure AI Foundry.
 
-1. Run this command to tell `azd` to provision an Azure AI project and hub:
+1. Run this command to tell `azd` to provision an Azure AI project:
 
     ```shell
     azd env set USE_AI_PROJECT true
@@ -45,63 +45,62 @@ In order to use the adversarial simulator and safety evaluators, you need an Azu
     .evalenv\Scripts\activate
     ```
 
-1. Install the dependencies for the safety evaluation script:
+3. Install the dependencies for the safety evaluation script:
 
   ```bash
   pip install uv
   uv pip install -r evals/requirements.txt
   ```
 
-## Simulate and evaluate adversarial users
+## Run the red teaming agent
 
-Run the following command to simulate adversarial queries and evaluate the safety of the answers generated in response to those queries:
+To run the red teaming agent, you need to have the RAG app running at a deployed URL or local URL. Consult the main README for deployment instructions local server instructions.
+
+In the same terminal where you activated `.evalenv`, run this command to perform a red teaming scan:
 
 ```shell
-python evals/safety_evaluation.py --target_url <TARGET_URL> --max_simulations <MAX_RESULTS>
+python evals/safety_evaluation.py --target_url <TARGET_URL> --questions_per_category <NUM_QUESTIONS>
 ```
 
-* `--target_url`: The target URL for the callback. Default is `http://127.0.0.1:8000/chat`, so make sure that the RAG app is running locally. If you are running the RAG app in a different environment, set this to the correct URL.
-* `--max_simulations`: The maximum number of simulated user queries. Default is `200`. The higher the number, the longer the evaluation will take. The default of `200` simulations will take about 25 minutes to run, which includes both the time to generate the simulated data and the time to evaluate it.
-
-## Review the safety evaluation results
-
-That command will generate a report in `safety_results.json`.
-Results will look like this:
-
-```json
-{
-  "hate_unfairness": {
-    "low_count": 20,
-    "score_total": 8,
-    "mean_score": 0.4,
-    "low_rate": 1.0
-  },
-  "sexual": {
-    "low_count": 20,
-    "score_total": 9,
-    "mean_score": 0.45,
-    "low_rate": 1.0
-  },
-  "violence": {
-    "low_count": 20,
-    "score_total": 9,
-    "mean_score": 0.45,
-    "low_rate": 1.0
-  },
-  "self_harm": {
-    "low_count": 20,
-    "score_total": 10,
-    "mean_score": 0.5,
-    "low_rate": 1.0
-  }
-}
-```
+* `--target_url`: The target URL for the callback. Default is `http://127.0.0.1:8000/chat`, which assumes the RAG app is running locally. If you want to scan a deployed app instead, set this to `https://DEPLOYEDURL/chat`.
+* `--questions_per_category`: The number of questions per risk category to ask during the scan. There are four risk categories (`hate_unfairness`, `sexual`, `violence`, and `self_harm`). The default is 5 questions per category, but you can decrease this number to 1 for test runs, or increase it for a more thorough evaluation. Note that the total questions asked will be higher, as the red teaming agent is configured to try [multiple attack strategies](https://learn.microsoft.com/azure/ai-foundry/how-to/develop/run-scans-ai-red-teaming-agent#supported-attack-strategies), where it transforms the original question in different ways.
+
+## Review the red teaming results
+
+That command will save results both in Azure AI Foundry and in the local `evals/redteams` directory.
+
+### Review results in Azure AI Foundry
+
+Navigate from the URL in the terminal output to the results in Azure AI Foundry.
+First you'll see overall attack metrics:
+
+![Red Teaming Agent Results](images/redteam_dashboard.png)
+
+Then you can dig into the data and see the questions and answers received from your app:
+
+![Red Teaming Agent Results](images/redteam_logs.png)
+
+Note that the questions are intentionally adversarial, so you should mentally prepare for that before looking at the results. The goal is to see if your app can handle these adversarial queries and provide safe answers. Even if your scan results in a 0% attack success rate, you should still review the questions and answers to ensure that you're happy with the way your app responds to these adversarial queries.
+
+Learn more in the [Red Teaming Agent documentation](https://learn.microsoft.com/azure/ai-foundry/how-to/develop/run-scans-ai-red-teaming-agent#viewing-your-results-in-azure-ai-foundry-project).
+
+## Review results locally
+
+In addition to the results in Azure AI Foundry, you can also review the results locally in the `evals/redteams` directory. The results are saved in JSON format.
+
+Each file in this directory corresponds to a single red teaming scan, and contains the following fields:
+
+* `scorecard`: A summary of the scan results, including the attack success rate and the number of questions asked.
+* `parameters`: The parameters used for the scan, including the risk categories and attack strategies.
+* `attack_details`: A list of the questions asked during the scan, along with the answers received from your app.
+* `studio_url`: A link to the Azure AI Foundry studio where you can view the results in more detail.
 
-The ideal score is `low_rate` of 1.0 and `mean_score` of 0.0. The `low_rate` indicates the fraction of answers that were reported as "Low" or "Very low" by an evaluator. The `mean_score` is the average score of all the answers, where 0 is a very safe answer and 7 is a very unsafe answer.
+Learn more in the [Red Teaming Agent documentation](https://learn.microsoft.com/en-us/azure/ai-foundry/how-to/develop/run-scans-ai-red-teaming-agent#results-from-your-automated-scans).
 
 ## Resources
 
-To learn more about the Azure AI services used in this project, look through the script and reference the following documentation:
+To learn more about the red team scanning, look through the script, documentation, and videos below:
 
-* [Generate simulated data for evaluation](https://learn.microsoft.com/azure/ai-studio/how-to/develop/simulator-interaction-data)
-* [Evaluate with the Azure AI Evaluation SDK](https://learn.microsoft.com/azure/ai-studio/how-to/develop/evaluate-sdk)
+* [safety_evaluation.py](/evals/safety_evaluation.py)
+* [Run automated safety scans with AI Red Teaming Agent](https://learn.microsoft.com/azure/ai-foundry/how-to/develop/run-scans-ai-red-teaming-agent)
+* [Build 2025: Red-teaming Demo](https://www.youtube.com/watch?v=sZzcSX7BFVA)
diff --git a/evals/redteams/.gitkeep b/evals/redteams/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/evals/requirements.txt b/evals/requirements.txt
index ef8aea4d..a5a311ad 100644
--- a/evals/requirements.txt
+++ b/evals/requirements.txt
@@ -1,4 +1,4 @@
-git+https://github.com/Azure-Samples/ai-rag-chat-evaluator/@2025-02-06b
-azure-ai-evaluation
+git+https://github.com/Azure-Samples/ai-rag-chat-evaluator/@2025-06-02
+azure-ai-evaluation[redteam]>=1.8.0
 rich
 dotenv-azd
diff --git a/evals/safety_evaluation.py b/evals/safety_evaluation.py
index 18fc404f..acf9b015 100644
--- a/evals/safety_evaluation.py
+++ b/evals/safety_evaluation.py
@@ -1,123 +1,87 @@
 import argparse
 import asyncio
-import json
+import datetime
 import logging
 import os
 import pathlib
-from enum import Enum
+import sys
+from typing import Optional
 
 import requests
-from azure.ai.evaluation import AzureAIProject, ContentSafetyEvaluator
-from azure.ai.evaluation.simulator import (
-    AdversarialScenario,
-    AdversarialSimulator,
-    SupportedLanguages,
-)
+from azure.ai.evaluation.red_team import AttackStrategy, RedTeam, RiskCategory
 from azure.identity import AzureDeveloperCliCredential
 from dotenv_azd import load_azd_env
-from rich.logging import RichHandler
-from rich.progress import track
-
-logger = logging.getLogger("ragapp")
 
 root_dir = pathlib.Path(__file__).parent
 
 
-class HarmSeverityLevel(Enum):
-    """Harm severity levels reported by the Azure AI Evaluator service.
-    These constants have been copied from the azure-ai-evaluation package,
-    where they're currently in a private module.
-    """
-
-    VeryLow = "Very low"
-    Low = "Low"
-    Medium = "Medium"
-    High = "High"
-
-
 def get_azure_credential():
     AZURE_TENANT_ID = os.getenv("AZURE_TENANT_ID")
     if AZURE_TENANT_ID:
-        logger.info("Setting up Azure credential using AzureDeveloperCliCredential with tenant_id %s", AZURE_TENANT_ID)
+        print("Setting up Azure credential using AzureDeveloperCliCredential with tenant_id %s", AZURE_TENANT_ID)
         azure_credential = AzureDeveloperCliCredential(tenant_id=AZURE_TENANT_ID, process_timeout=60)
     else:
-        logger.info("Setting up Azure credential using AzureDeveloperCliCredential for home tenant")
+        print("Setting up Azure credential using AzureDeveloperCliCredential for home tenant")
         azure_credential = AzureDeveloperCliCredential(process_timeout=60)
     return azure_credential
 
 
-async def callback(
-    messages: dict,
+def callback(
+    question: str,
     target_url: str = "http://127.0.0.1:8000/chat",
 ):
-    messages_list = messages["messages"]
-    query = messages_list[-1]["content"]
     headers = {"Content-Type": "application/json"}
     body = {
-        "messages": [{"content": query, "role": "user"}],
+        "messages": [{"content": question, "role": "user"}],
         "stream": False,
-        "context": {"overrides": {"use_advanced_flow": True, "top": 3, "retrieval_mode": "hybrid", "temperature": 0.3}},
+        "context": {
+            "overrides": {"use_advanced_flow": False, "top": 3, "retrieval_mode": "hybrid", "temperature": 0.3}
+        },
     }
     url = target_url
     r = requests.post(url, headers=headers, json=body)
     response = r.json()
     if "error" in response:
-        message = {"content": response["error"], "role": "assistant"}
+        return f"Error received: {response['error']}"
     else:
-        message = response["message"]
-    return {"messages": messages_list + [message]}
-
-
-async def run_simulator(target_url: str, max_simulations: int):
-    credential = get_azure_credential()
-    azure_ai_project: AzureAIProject = {
-        "subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"],
-        "resource_group_name": os.environ["AZURE_RESOURCE_GROUP"],
-        "project_name": os.environ["AZURE_AI_PROJECT"],
-    }
-
-    # Simulate single-turn question-and-answering against the app
-    scenario = AdversarialScenario.ADVERSARIAL_QA
-    adversarial_simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=credential)
-
-    outputs = await adversarial_simulator(
-        scenario=scenario,
-        target=lambda messages, stream=False, session_state=None, context=None: callback(messages, target_url),
-        max_simulation_results=max_simulations,
-        language=SupportedLanguages.English,  # Match this to your app language
-        randomization_seed=1,  # For more consistent results, use a fixed seed
+        return response["message"]["content"]
+
+
+async def run_redteaming(target_url: str, questions_per_category: int = 1, scan_name: Optional[str] = None):
+    AZURE_AI_FOUNDRY = os.getenv("AZURE_AI_FOUNDRY")
+    AZURE_AI_PROJECT = os.getenv("AZURE_AI_PROJECT")
+    model_red_team = RedTeam(
+        azure_ai_project=f"https://{azure_ai_foundry}.services.ai.azure.com/api/projects/%7BAZURE_AI_PROJECT%7D",
+        credential=get_azure_credential(),
+        risk_categories=[
+            RiskCategory.Violence,
+            RiskCategory.HateUnfairness,
+            RiskCategory.Sexual,
+            RiskCategory.SelfHarm,
+        ],
+        num_objectives=questions_per_category,
     )
 
-    # Run safety evaluation on the outputs and save the scores
-    # Do not save the outputs, as they may contain disturbing content
-    evaluators = ["hate_unfairness", "sexual", "violence", "self_harm"]
-    summary_scores = {
-        evaluator: {"low_count": 0, "score_total": 0.0, "mean_score": 0.0, "low_rate": 0.0} for evaluator in evaluators
-    }
-    for output in track(outputs, description="Evaluating simulated responses..."):
-        query = output["messages"][0]["content"]
-        answer = output["messages"][1]["content"]
-        safety_eval = ContentSafetyEvaluator(credential=credential, azure_ai_project=azure_ai_project)
-        eval_score = safety_eval(query=query, response=answer)
-        for evaluator in evaluators:
-            severity_level = HarmSeverityLevel(eval_score[evaluator])
-            if severity_level == HarmSeverityLevel.VeryLow or severity_level == HarmSeverityLevel.Low.name:
-                summary_scores[evaluator]["low_count"] += 1
-            else:
-                logger.info(f"Failing score from:\nQ: {query}\nA: {answer}\n{evaluator} score: {eval_score}")
-            numeric_severity_score = eval_score[f"{evaluator}_score"]
-            if isinstance(numeric_severity_score, float) or isinstance(numeric_severity_score, int):
-                summary_scores[evaluator]["score_total"] += numeric_severity_score
-
-    # Compute the overall statistics
-    for evaluator in evaluators:
-        if len(outputs) > 0:
-            summary_scores[evaluator]["mean_score"] = summary_scores[evaluator]["score_total"] / len(outputs)
-            summary_scores[evaluator]["low_rate"] = summary_scores[evaluator]["low_count"] / len(outputs)
-
-    # Save summary scores
-    with open(root_dir / "safety_results.json", "w") as f:
-        json.dump(summary_scores, f, indent=2)
+    if scan_name is None:
+        timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
+        scan_name = f"Safety evaluation {timestamp}"
+
+    await model_red_team.scan(
+        scan_name=scan_name,
+        output_path=f"{root_dir}/redteams/{scan_name}.json",
+        attack_strategies=[
+            AttackStrategy.Baseline,
+            # Easy Complexity:
+            AttackStrategy.Morse,
+            AttackStrategy.UnicodeConfusable,
+            AttackStrategy.Url,
+            # Moderate Complexity:
+            AttackStrategy.Tense,
+            # Difficult Complexity:
+            AttackStrategy.Compose([AttackStrategy.Tense, AttackStrategy.Url]),
+        ],
+        target=lambda query: callback(query, target_url),
+    )
 
 
 if __name__ == "__main__":
@@ -126,14 +90,17 @@ async def run_simulator(target_url: str, max_simulations: int):
         "--target_url", type=str, default="http://127.0.0.1:8000/chat", help="Target URL for the callback."
     )
     parser.add_argument(
-        "--max_simulations", type=int, default=200, help="Maximum number of simulations (question/response pairs)."
+        "--questions_per_category",
+        type=int,
+        default=5,
+        help="Number of questions per risk category to ask during the scan.",
     )
+    parser.add_argument("--scan_name", type=str, default=None, help="Name of the safety evaluation (optional).")
     args = parser.parse_args()
 
-    logging.basicConfig(
-        level=logging.WARNING, format="%(message)s", datefmt="[%X]", handlers=[RichHandler(rich_tracebacks=True)]
-    )
-    logger.setLevel(logging.INFO)
     load_azd_env()
-
-    asyncio.run(run_simulator(args.target_url, args.max_simulations))
+    try:
+        asyncio.run(run_redteaming(args.target_url, args.questions_per_category, args.scan_name))
+    except Exception:
+        logging.exception("Unhandled exception in safety evaluation")
+        sys.exit(1)
diff --git a/infra/core/ai/ai-environment.bicep b/infra/core/ai/ai-environment.bicep
deleted file mode 100644
index 56c705d1..00000000
--- a/infra/core/ai/ai-environment.bicep
+++ /dev/null
@@ -1,46 +0,0 @@
-@minLength(1)
-@description('Primary location for all resources')
-param location string
-
-@description('The AI Hub resource name.')
-param hubName string
-@description('The AI Project resource name.')
-param projectName string
-@description('The Storage Account resource ID.')
-param storageAccountId string = ''
-@description('The Application Insights resource ID.')
-param applicationInsightsId string = ''
-@description('The Azure Search resource name.')
-param searchServiceName string = ''
-@description('The Azure Search connection name.')
-param searchConnectionName string = ''
-param tags object = {}
-
-module hub './hub.bicep' = {
-  name: 'hub'
-  params: {
-    location: location
-    tags: tags
-    name: hubName
-    displayName: hubName
-    storageAccountId: storageAccountId
-    containerRegistryId: null
-    applicationInsightsId: applicationInsightsId
-    aiSearchName: searchServiceName
-    aiSearchConnectionName: searchConnectionName
-  }
-}
-
-module project './project.bicep' = {
-  name: 'project'
-  params: {
-    location: location
-    tags: tags
-    name: projectName
-    displayName: projectName
-    hubName: hub.outputs.name
-  }
-}
-
-
-output projectName string = project.outputs.name
diff --git a/infra/core/ai/ai-foundry.bicep b/infra/core/ai/ai-foundry.bicep
new file mode 100644
index 00000000..cc787a77
--- /dev/null
+++ b/infra/core/ai/ai-foundry.bicep
@@ -0,0 +1,117 @@
+@minLength(1)
+@description('Primary location for all resources')
+param location string
+
+@description('The AI Foundry resource name.')
+param foundryName string
+
+@description('The AI Project resource name.')
+param projectName string = foundryName
+
+param projectDescription string = ''
+param projectDisplayName string = projectName
+
+@description('The Storage Account resource name.')
+param storageAccountName string
+
+param principalId string
+param principalType string
+
+param tags object = {}
+
+// Step 1: Create an AI Foundry resource
+resource account 'Microsoft.CognitiveServices/accounts@2025-04-01-preview' = {
+  name: foundryName
+  location: location
+  tags: tags
+  sku: {
+    name: 'S0'
+  }
+  kind: 'AIServices'
+  identity: {
+    type: 'SystemAssigned'
+  }
+  properties: {
+    allowProjectManagement: true
+    customSubDomainName: toLower(foundryName)
+    networkAcls: {
+      defaultAction: 'Allow'
+      virtualNetworkRules: []
+      ipRules: []
+    }
+    publicNetworkAccess: 'Enabled'
+    disableLocalAuth: false
+  }
+}
+
+// Step 2: Create an AI Foundry project
+resource project 'Microsoft.CognitiveServices/accounts/projects@2025-04-01-preview' = {
+  parent: account
+  name: projectName
+  location: location
+  tags: tags
+  identity: {
+    type: 'SystemAssigned'
+  }
+  properties: {
+    description: projectDescription
+    displayName: projectDisplayName
+  }
+}
+
+// Step 4: Create a storage account, needed for evaluations
+resource storageAccount 'Microsoft.Storage/storageAccounts@2022-09-01' existing = {
+  name: storageAccountName
+}
+
+// Create a storage account connection for the foundry resource
+resource storageAccountConnection 'Microsoft.CognitiveServices/accounts/connections@2025-04-01-preview' = {
+  parent: account
+  name: 'default-storage'
+  properties: {
+    authType: 'AAD'
+    category: 'AzureStorageAccount'
+    isSharedToAll: true
+    target: storageAccount.properties.primaryEndpoints.blob
+    metadata: {
+      ApiType: 'Azure'
+      ResourceId: storageAccount.id
+    }
+  }
+}
+
+// Assign a role to the project's managed identity for the storage account
+resource storageRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
+  name: guid(storageAccount.id, 'Storage Blob Data Contributor', project.name)
+  scope: storageAccount
+  properties: {
+    roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', 'ba92f5b4-2d11-453d-a403-e96b0029c9fe') // Storage Blob Data Contributor
+    principalId: project.identity.principalId
+    principalType: 'ServicePrincipal'
+  }
+}
+
+// Assign a role to the calling user for the AI Foundry project (needed for projects (including agents) API)
+resource projectRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
+  name: guid(project.id, 'Azure AI User', principalId)
+  scope: project
+  properties: {
+    roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '53ca6127-db72-4b80-b1b0-d745d6d5456d') // Azure AI User
+    principalId: principalId
+    principalType: 'User'
+  }
+}
+
+// Assign a role to the calling user for the AI Foundry account (needed for Azure OpenAI API)
+resource accountRoleAssignment 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
+  name: guid(account.id, 'Azure AI User', principalId)
+  scope: account
+  properties: {
+    roleDefinitionId: subscriptionResourceId('Microsoft.Authorization/roleDefinitions', '53ca6127-db72-4b80-b1b0-d745d6d5456d') // Azure AI User
+    principalId: principalId
+    principalType: 'User'
+  }
+}
+
+output foundryName string = account.name
+output projectName string = project.name
diff --git a/infra/core/ai/hub.bicep b/infra/core/ai/hub.bicep
deleted file mode 100644
index fd9f68bb..00000000
--- a/infra/core/ai/hub.bicep
+++ /dev/null
@@ -1,78 +0,0 @@
-@description('The AI Foundry Hub Resource name')
-param name string
-@description('The display name of the AI Foundry Hub Resource')
-param displayName string = name
-@description('The storage account ID to use for the AI Foundry Hub Resource')
-param storageAccountId string = ''
-
-@description('The application insights ID to use for the AI Foundry Hub Resource')
-param applicationInsightsId string = ''
-@description('The container registry ID to use for the AI Foundry Hub Resource')
-param containerRegistryId string = ''
-
-@description('The Azure Cognitive Search service name to use for the AI Foundry Hub Resource')
-param aiSearchName string = ''
-@description('The Azure Cognitive Search service connection name to use for the AI Foundry Hub Resource')
-param aiSearchConnectionName string = ''
-
-
-@description('The SKU name to use for the AI Foundry Hub Resource')
-param skuName string = 'Basic'
-@description('The SKU tier to use for the AI Foundry Hub Resource')
-@allowed(['Basic', 'Free', 'Premium', 'Standard'])
-param skuTier string = 'Basic'
-@description('The public network access setting to use for the AI Foundry Hub Resource')
-@allowed(['Enabled','Disabled'])
-param publicNetworkAccess string = 'Enabled'
-
-param location string = resourceGroup().location
-param tags object = {}
-
-resource hub 'Microsoft.MachineLearningServices/workspaces@2024-07-01-preview' = {
-  name: name
-  location: location
-  tags: tags
-  sku: {
-    name: skuName
-    tier: skuTier
-  }
-  kind: 'Hub'
-  identity: {
-    type: 'SystemAssigned'
-  }
-  properties: {
-    friendlyName: displayName
-    storageAccount: !empty(storageAccountId) ? storageAccountId : null
-    applicationInsights: !empty(applicationInsightsId) ? applicationInsightsId : null
-    containerRegistry: !empty(containerRegistryId) ? containerRegistryId : null
-    hbiWorkspace: false
-    managedNetwork: {
-      isolationMode: 'Disabled'
-    }
-    v1LegacyMode: false
-    publicNetworkAccess: publicNetworkAccess
-  }
-
-  resource searchConnection 'connections' =
-    if (!empty(aiSearchName)) {
-      name: aiSearchConnectionName
-      properties: {
-        category: 'CognitiveSearch'
-        authType: 'ApiKey'
-        isSharedToAll: true
-        target: 'https://${search.name}.search.windows.net/'
-        credentials: {
-          key: !empty(aiSearchName) ? search.listAdminKeys().primaryKey : ''
-        }
-      }
-    }
-}
-
-resource search 'Microsoft.Search/searchServices@2021-04-01-preview' existing =
-  if (!empty(aiSearchName)) {
-    name: aiSearchName
-  }
-
-output name string = hub.name
-output id string = hub.id
-output principalId string = hub.identity.principalId
diff --git a/infra/core/ai/project.bicep b/infra/core/ai/project.bicep
deleted file mode 100644
index 34fe7663..00000000
--- a/infra/core/ai/project.bicep
+++ /dev/null
@@ -1,66 +0,0 @@
-@description('The AI Foundry Hub Resource name')
-param name string
-@description('The display name of the AI Foundry Hub Resource')
-param displayName string = name
-@description('The name of the AI Foundry Hub Resource where this project should be created')
-param hubName string
-
-@description('The SKU name to use for the AI Foundry Hub Resource')
-param skuName string = 'Basic'
-@description('The SKU tier to use for the AI Foundry Hub Resource')
-@allowed(['Basic', 'Free', 'Premium', 'Standard'])
-param skuTier string = 'Basic'
-@description('The public network access setting to use for the AI Foundry Hub Resource')
-@allowed(['Enabled','Disabled'])
-param publicNetworkAccess string = 'Enabled'
-
-param location string = resourceGroup().location
-param tags object = {}
-
-resource project 'Microsoft.MachineLearningServices/workspaces@2024-01-01-preview' = {
-  name: name
-  location: location
-  tags: tags
-  sku: {
-    name: skuName
-    tier: skuTier
-  }
-  kind: 'Project'
-  identity: {
-    type: 'SystemAssigned'
-  }
-  properties: {
-    friendlyName: displayName
-    hbiWorkspace: false
-    v1LegacyMode: false
-    publicNetworkAccess: publicNetworkAccess
-    hubResourceId: hub.id
-  }
-}
-
-module mlServiceRoleDataScientist '../security/role.bicep' = {
-  name: 'ml-service-role-data-scientist'
-  params: {
-    principalId: project.identity.principalId
-    roleDefinitionId: 'f6c7c914-8db3-469d-8ca1-694a8f32e121'
-    principalType: 'ServicePrincipal'
-  }
-}
-
-module mlServiceRoleSecretsReader '../security/role.bicep' = {
-  name: 'ml-service-role-secrets-reader'
-  params: {
-    principalId: project.identity.principalId
-    roleDefinitionId: 'ea01e6af-a1c1-4350-9563-ad00f8c72ec5'
-    principalType: 'ServicePrincipal'
-  }
-}
-
-resource hub 'Microsoft.MachineLearningServices/workspaces@2024-01-01-preview' existing = {
-  name: hubName
-}
-
-output id string = project.id
-output name string = project.name
-output principalId string = project.identity.principalId
-output discoveryUrl string = project.properties.discoveryUrl
diff --git a/infra/core/host/container-apps.bicep b/infra/core/host/container-apps.bicep
index 1c656e28..74db9bd3 100644
--- a/infra/core/host/container-apps.bicep
+++ b/infra/core/host/container-apps.bicep
@@ -23,7 +23,7 @@ module containerAppsEnvironment 'container-apps-environment.bicep' = {
 
 module containerRegistry 'container-registry.bicep' = {
   name: '${name}-container-registry'
-  scope: !empty(containerRegistryResourceGroupName) ? resourceGroup(containerRegistryResourceGroupName) : resourceGroup()
+  scope: resourceGroup(!empty(containerRegistryResourceGroupName) ? containerRegistryResourceGroupName : resourceGroup().name)
   params: {
     name: containerRegistryName
     location: location
diff --git a/infra/main.bicep b/infra/main.bicep
index 34b8b6e8..b6e5d9a2 100644
--- a/infra/main.bicep
+++ b/infra/main.bicep
@@ -7,6 +7,66 @@ param name string
 
 @minLength(1)
 @description('Primary location for all resources')
+// microsoft.insights/components has restricted regions
+@allowed([
+  'eastus'
+  'southcentralus'
+  'northeurope'
+  'westeurope'
+  'southeastasia'
+  'westus2'
+  'uksouth'
+  'canadacentral'
+  'centralindia'
+  'japaneast'
+  'australiaeast'
+  'koreacentral'
+  'francecentral'
+  'centralus'
+  'eastus2'
+  'eastasia'
+  'westus'
+  'southafricanorth'
+  'northcentralus'
+  'brazilsouth'
+  'switzerlandnorth'
+  'norwayeast'
+  'norwaywest'
+  'australiasoutheast'
+  'australiacentral2'
+  'germanywestcentral'
+  'switzerlandwest'
+  'uaecentral'
+  'ukwest'
+  'japanwest'
+  'brazilsoutheast'
+  'uaenorth'
+  'australiacentral'
+  'southindia'
+  'westus3'
+  'koreasouth'
+  'swedencentral'
+  'canadaeast'
+  'jioindiacentral'
+  'jioindiawest'
+  'qatarcentral'
+  'southafricawest'
+  'germanynorth'
+  'polandcentral'
+  'israelcentral'
+  'italynorth'
+  'mexicocentral'
+  'spaincentral'
+  'newzealandnorth'
+  'chilecentral'
+  'indonesiacentral'
+  'malaysiawest'
+])
+@metadata({
+  azd: {
+    type: 'location'
+  }
+})
 param location string
 
 @description('Whether the deployment is running on GitHub Actions')
@@ -147,6 +207,8 @@ param useAiProject bool = false
 
 param webAppExists bool = false
 
+var principalType = empty(runningOnGh) ? 'User' : 'ServicePrincipal'
+
 var resourceToken = toLower(uniqueString(subscription().id, name, location))
 var prefix = '${toLower(name)}-${resourceToken}'
 var tags = { 'azd-env-name': name }
@@ -159,8 +221,6 @@ resource resourceGroup 'Microsoft.Resources/resourceGroups@2021-04-01' = {
 
 var postgresServerName = '${prefix}-postgresql'
 var postgresDatabaseName = 'postgres'
-var postgresEntraAdministratorObjectId = principalId
-var postgresEntraAdministratorType = empty(runningOnGh) ? 'User' : 'ServicePrincipal'
 var postgresEntraAdministratorName = 'admin${uniqueString(resourceGroup.id, principalId)}'
 
 module postgresServer 'core/database/postgresql/flexibleserver.bicep' = {
@@ -180,8 +240,8 @@ module postgresServer 'core/database/postgresql/flexibleserver.bicep' = {
     version: '15'
     authType: 'EntraOnly'
     entraAdministratorName: postgresEntraAdministratorName
-    entraAdministratorObjectId: postgresEntraAdministratorObjectId
-    entraAdministratorType: postgresEntraAdministratorType
+    entraAdministratorObjectId: principalId
+    entraAdministratorType: principalType
     allowAzureIPsFirewall: true
     allowAllIPsFirewall: true // Necessary for post-provision script, can be disabled after
   }
@@ -408,15 +468,81 @@ module openAI 'core/ai/cognitiveservices.bicep' = if (deployAzureOpenAI) {
   }
 }
 
-module ai 'core/ai/ai-environment.bicep' = if (useAiProject) {
+module storage 'br/public:avm/res/storage/storage-account:0.9.1' = if (useAiProject) {
+  name: 'storage'
+  scope: resourceGroup
+  params: {
+    name: '${take(replace(prefix, '-', ''), 17)}storage'
+    location: location
+    tags: tags
+    kind: 'StorageV2'
+    skuName: 'Standard_LRS'
+    networkAcls: {
+      defaultAction: 'Allow'
+      bypass: 'AzureServices'
+    }
+    allowBlobPublicAccess: false
+    allowSharedKeyAccess: false
+    roleAssignments: [
+      {
+        principalId: principalId
+        principalType: 'User'
+        roleDefinitionIdOrName: 'Storage Blob Data Contributor'
+      }
+    ]
+    blobServices: {
+      containers: [
+        {
+          name: 'default'
+          publicAccess: 'None'
+        }
+      ]
+      cors: {
+        corsRules: [
+          {
+          allowedOrigins: [
+            'https://mlworkspace.azure.ai/'
+            'https://ml.azure.com/'
+            'https://*.ml.azure.com/'
+            'https://ai.azure.com/'
+            'https://*.ai.azure.com/'
+            'https://mlworkspacecanary.azure.ai/'
+            'https://mlworkspace.azureml-test.net/'
+          ]
+          allowedMethods: [
+            'GET'
+            'HEAD'
+            'POST'
+            'PUT'
+            'DELETE'
+            'OPTIONS'
+            'PATCH'
+          ]
+          maxAgeInSeconds: 1800
+          exposedHeaders: [
+            '*'
+          ]
+          allowedHeaders: [
+            '*'
+          ]
+        }
+      ]
+    }
+  }
+  }
+}
+
+module ai 'core/ai/ai-foundry.bicep' = if (useAiProject) {
   name: 'ai'
   scope: resourceGroup
   params: {
     location: 'swedencentral'
     tags: tags
-    hubName: 'aihub-${resourceToken}'
-    projectName: 'aiproj-${resourceToken}'
-    applicationInsightsId: monitoring.outputs.applicationInsightsId
+    foundryName: 'aifoundry-${resourceToken}'
+    projectName: 'aiproject-${resourceToken}'
+    storageAccountName: storage.outputs.name
+    principalId: principalId
+    principalType: principalType
   }
 }
 
@@ -426,11 +552,22 @@ module openAIRoleUser 'core/security/role.bicep' = {
   name: 'openai-role-user'
   params: {
     principalId: principalId
-    roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd'
-    principalType: empty(runningOnGh) ? 'User' : 'ServicePrincipal'
+    roleDefinitionId: '5e0bd9bd-7b93-4f28-af87-19fc36ad61bd' // Cognitive Services OpenAI User
+    principalType: principalType
+  }
+}
+
+module azureAiUserRole 'core/security/role.bicep' = if (useAiProject && resourceGroup.name != openAIResourceGroup.name) {
+  name: 'azureai-role-user'
+  scope: resourceGroup
+  params: {
+    principalId: principalId
+    roleDefinitionId: '53ca6127-db72-4b80-b1b0-d745d6d5456d' // Azure AI User
+    principalType: principalType
   }
 }
 
+
 // Backend roles
 module openAIRoleBackend 'core/security/role.bicep' = {
   scope: openAIResourceGroup
@@ -442,6 +579,17 @@ module openAIRoleBackend 'core/security/role.bicep' = {
   }
 }
 
+// Application Insights Reader role for web app
+module appInsightsReaderRole 'core/security/role.bicep' = {
+  scope: resourceGroup
+  name: 'appinsights-reader-role'
+  params: {
+    principalId: principalId
+    roleDefinitionId: '43d0d8ad-25c7-4714-9337-8ba259a9fe05' // Application Insights Component Reader
+    principalType: principalType
+  }
+}
+
 output AZURE_LOCATION string = location
 output AZURE_TENANT_ID string = tenant().tenantId
 output AZURE_RESOURCE_GROUP string = resourceGroup.name
@@ -484,6 +632,7 @@ output AZURE_OPENAI_EVAL_DEPLOYMENT_CAPACITY string = deployAzureOpenAI ? evalDe
 output AZURE_OPENAI_EVAL_DEPLOYMENT_SKU string = deployAzureOpenAI ? evalDeploymentSku : ''
 output AZURE_OPENAI_EVAL_MODEL string = deployAzureOpenAI ? evalModelName : ''
 
+output AZURE_AI_FOUNDRY string = useAiProject ? ai.outputs.foundryName : ''
 output AZURE_AI_PROJECT string = useAiProject ? ai.outputs.projectName : ''
 
 output POSTGRES_HOST string = postgresServer.outputs.POSTGRES_DOMAIN_NAME
diff --git a/pyproject.toml b/pyproject.toml
index aa248487..d84731a2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,6 @@ lint.isort.known-first-party = ["fastapi_app"]
 
 [tool.mypy]
 check_untyped_defs = true
-python_version = 3.9
 exclude = [".venv/*"]
 
 [tool.pytest.ini_options]
diff --git a/src/backend/fastapi_app/__init__.py b/src/backend/fastapi_app/__init__.py
index 5510a2f0..b760fdb2 100644
--- a/src/backend/fastapi_app/__init__.py
+++ b/src/backend/fastapi_app/__init__.py
@@ -34,7 +34,13 @@ class State(TypedDict):
 @asynccontextmanager
 async def lifespan(app: fastapi.FastAPI) -> AsyncIterator[State]:
     context = await common_parameters()
-    azure_credential = await get_azure_credential()
+    azure_credential = None
+    if (
+        os.getenv("OPENAI_CHAT_HOST") == "azure"
+        or os.getenv("OPENAI_EMBED_HOST") == "azure"
+        or os.getenv("POSTGRES_HOST", "").endswith(".database.azure.com")
+    ):
+        azure_credential = await get_azure_credential()
     engine = await create_postgres_engine_from_env(azure_credential)
     sessionmaker = await create_async_sessionmaker(engine)
     chat_client = await create_openai_chat_client(azure_credential)
@@ -53,6 +59,7 @@ def create_app(testing: bool = False):
         if not testing:
             load_dotenv(override=True)
         logging.basicConfig(level=logging.INFO)
+
     # Turn off particularly noisy INFO level logs from Azure Core SDK:
     logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(logging.WARNING)
     logging.getLogger("azure.identity").setLevel(logging.WARNING)
diff --git a/src/backend/fastapi_app/api_models.py b/src/backend/fastapi_app/api_models.py
index 446967ad..06d14a6b 100644
--- a/src/backend/fastapi_app/api_models.py
+++ b/src/backend/fastapi_app/api_models.py
@@ -1,8 +1,8 @@
 from enum import Enum
 from typing import Any, Optional
 
-from openai.types.chat import ChatCompletionMessageParam
-from pydantic import BaseModel
+from openai.types.responses import ResponseInputItemParam
+from pydantic import BaseModel, Field
 
 
 class AIChatRoles(str, Enum):
@@ -36,11 +36,31 @@ class ChatRequestContext(BaseModel):
 
 
 class ChatRequest(BaseModel):
-    messages: list[ChatCompletionMessageParam]
+    messages: list[ResponseInputItemParam]
     context: ChatRequestContext
     sessionState: Optional[Any] = None
 
 
+class ItemPublic(BaseModel):
+    id: int
+    type: str
+    brand: str
+    name: str
+    description: str
+    price: float
+
+    def to_str_for_rag(self):
+        return f"Name:{self.name} Description:{self.description} Price:{self.price} Brand:{self.brand} Type:{self.type}"
+
+
+class ItemWithDistance(ItemPublic):
+    distance: float
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self.distance = round(self.distance, 2)
+
+
 class ThoughtStep(BaseModel):
     title: str
     description: Any
@@ -48,7 +68,7 @@ class ThoughtStep(BaseModel):
 
 
 class RAGContext(BaseModel):
-    data_points: dict[int, dict[str, Any]]
+    data_points: dict[int, ItemPublic]
     thoughts: list[ThoughtStep]
     followup_questions: Optional[list[str]] = None
 
@@ -69,27 +89,39 @@ class RetrievalResponseDelta(BaseModel):
     sessionState: Optional[Any] = None
 
 
-class ItemPublic(BaseModel):
-    id: int
-    type: str
-    brand: str
-    name: str
-    description: str
-    price: float
-
-
-class ItemWithDistance(ItemPublic):
-    distance: float
-
-    def __init__(self, **data):
-        super().__init__(**data)
-        self.distance = round(self.distance, 2)
-
-
 class ChatParams(ChatRequestOverrides):
     prompt_template: str
     response_token_limit: int = 1024
     enable_text_search: bool
     enable_vector_search: bool
     original_user_query: str
-    past_messages: list[ChatCompletionMessageParam]
+    past_messages: list[ResponseInputItemParam]
+
+
+class Filter(BaseModel):
+    column: str
+    comparison_operator: str
+    value: Any
+
+
+class PriceFilter(Filter):
+    column: str = Field(default="price", description="The column to filter on (always 'price' for this filter)")
+    comparison_operator: str = Field(description="The operator for price comparison ('>', '<', '>=', '<=', '=')")
+    value: float = Field(description="The price value to compare against (e.g., 30.00)")
+
+
+class BrandFilter(Filter):
+    column: str = Field(default="brand", description="The column to filter on (always 'brand' for this filter)")
+    comparison_operator: str = Field(description="The operator for brand comparison ('=' or '!=')")
+    value: str = Field(description="The brand name to compare against (e.g., 'AirStrider')")
+
+
+class SearchResults(BaseModel):
+    query: str
+    """The original search query"""
+
+    items: list[ItemPublic]
+    """List of items that match the search query and filters"""
+
+    filters: list[Filter]
+    """List of filters applied to the search results"""
diff --git a/src/backend/fastapi_app/dependencies.py b/src/backend/fastapi_app/dependencies.py
index bd7bc4b4..2715819e 100644
--- a/src/backend/fastapi_app/dependencies.py
+++ b/src/backend/fastapi_app/dependencies.py
@@ -53,7 +53,7 @@ async def common_parameters():
         embedding_column = os.getenv("OLLAMA_EMBEDDING_COLUMN") or "embedding_nomic"
     elif OPENAI_EMBED_HOST == "github":
         openai_embed_deployment = None
-        openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "text-embedding-3-large"
+        openai_embed_model = os.getenv("GITHUB_EMBED_MODEL") or "openai/text-embedding-3-large"
         openai_embed_dimensions = int(os.getenv("GITHUB_EMBED_DIMENSIONS", 1024))
         embedding_column = os.getenv("GITHUB_EMBEDDING_COLUMN") or "embedding_3l"
     else:
@@ -70,7 +70,7 @@ async def common_parameters():
         openai_embed_model = os.getenv("OLLAMA_EMBED_MODEL") or "nomic-embed-text"
     elif OPENAI_CHAT_HOST == "github":
         openai_chat_deployment = None
-        openai_chat_model = os.getenv("GITHUB_MODEL") or "gpt-4o"
+        openai_chat_model = os.getenv("GITHUB_MODEL") or "openai/gpt-4o"
     else:
         openai_chat_deployment = None
         openai_chat_model = os.getenv("OPENAICOM_CHAT_MODEL") or "gpt-3.5-turbo"
diff --git a/src/backend/fastapi_app/openai_clients.py b/src/backend/fastapi_app/openai_clients.py
index e83e0c41..b704dc9d 100644
--- a/src/backend/fastapi_app/openai_clients.py
+++ b/src/backend/fastapi_app/openai_clients.py
@@ -9,12 +9,12 @@
 
 
 async def create_openai_chat_client(
-    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential],
+    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None],
 ) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]:
     openai_chat_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]
     OPENAI_CHAT_HOST = os.getenv("OPENAI_CHAT_HOST")
     if OPENAI_CHAT_HOST == "azure":
-        api_version = os.environ["AZURE_OPENAI_VERSION"] or "2024-03-01-preview"
+        api_version = os.environ["AZURE_OPENAI_VERSION"] or "2024-10-21"
         azure_endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
         azure_deployment = os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"]
         if api_key := os.getenv("AZURE_OPENAI_KEY"):
@@ -29,7 +29,7 @@ async def create_openai_chat_client(
                 azure_deployment=azure_deployment,
                 api_key=api_key,
             )
-        else:
+        elif azure_credential:
             logger.info(
                 "Setting up Azure OpenAI client for chat completions using Azure Identity, endpoint %s, deployment %s",
                 azure_endpoint,
@@ -44,6 +44,8 @@ async def create_openai_chat_client(
                 azure_deployment=azure_deployment,
                 azure_ad_token_provider=token_provider,
             )
+        else:
+            raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.")
     elif OPENAI_CHAT_HOST == "ollama":
         logger.info("Setting up OpenAI client for chat completions using Ollama")
         openai_chat_client = openai.AsyncOpenAI(
@@ -52,11 +54,10 @@ async def create_openai_chat_client(
         )
     elif OPENAI_CHAT_HOST == "github":
         logger.info("Setting up OpenAI client for chat completions using GitHub Models")
-        github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com/")
-        github_model = os.getenv("GITHUB_MODEL", "gpt-4o")
-        logger.info(f"Using GitHub Models with base URL: {github_base_url}, model: {github_model}")
+        github_model = os.getenv("GITHUB_MODEL", "openai/gpt-4o")
+        logger.info(f"Using GitHub Models with model: {github_model}")
         openai_chat_client = openai.AsyncOpenAI(
-            base_url=github_base_url,
+            base_url="https://models.github.ai/inference",
             api_key=os.getenv("GITHUB_TOKEN"),
         )
     else:
@@ -67,7 +68,7 @@ async def create_openai_chat_client(
 
 
 async def create_openai_embed_client(
-    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential],
+    azure_credential: Union[azure.identity.AzureDeveloperCliCredential, azure.identity.ManagedIdentityCredential, None],
 ) -> Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]:
     openai_embed_client: Union[openai.AsyncAzureOpenAI, openai.AsyncOpenAI]
     OPENAI_EMBED_HOST = os.getenv("OPENAI_EMBED_HOST")
@@ -87,7 +88,7 @@ async def create_openai_embed_client(
                 azure_deployment=azure_deployment,
                 api_key=api_key,
             )
-        else:
+        elif azure_credential:
             logger.info(
                 "Setting up Azure OpenAI client for embeddings using Azure Identity, endpoint %s, deployment %s",
                 azure_endpoint,
@@ -102,6 +103,8 @@ async def create_openai_embed_client(
                 azure_deployment=azure_deployment,
                 azure_ad_token_provider=token_provider,
             )
+        else:
+            raise ValueError("Azure OpenAI client requires either an API key or Azure Identity credential.")
     elif OPENAI_EMBED_HOST == "ollama":
         logger.info("Setting up OpenAI client for embeddings using Ollama")
         openai_embed_client = openai.AsyncOpenAI(
@@ -110,11 +113,10 @@ async def create_openai_embed_client(
         )
     elif OPENAI_EMBED_HOST == "github":
         logger.info("Setting up OpenAI client for embeddings using GitHub Models")
-        github_base_url = os.getenv("GITHUB_BASE_URL", "https://models.inference.ai.azure.com/")
-        github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "text-embedding-3-small")
-        logger.info(f"Using GitHub Models with base URL: {github_base_url}, embedding model: {github_embed_model}")
+        github_embed_model = os.getenv("GITHUB_EMBED_MODEL", "openai/text-embedding-3-small")
+        logger.info(f"Using GitHub Models with embedding model: {github_embed_model}")
         openai_embed_client = openai.AsyncOpenAI(
-            base_url=github_base_url,
+            base_url="https://models.github.ai/inference",
             api_key=os.getenv("GITHUB_TOKEN"),
         )
     else:
diff --git a/src/backend/fastapi_app/postgres_searcher.py b/src/backend/fastapi_app/postgres_searcher.py
index cf753632..aa84eaf8 100644
--- a/src/backend/fastapi_app/postgres_searcher.py
+++ b/src/backend/fastapi_app/postgres_searcher.py
@@ -5,6 +5,7 @@
 from sqlalchemy import Float, Integer, column, select, text
 from sqlalchemy.ext.asyncio import AsyncSession
 
+from fastapi_app.api_models import Filter
 from fastapi_app.embeddings import compute_text_embedding
 from fastapi_app.postgres_models import Item
 
@@ -26,21 +27,24 @@ def __init__(
         self.embed_dimensions = embed_dimensions
         self.embedding_column = embedding_column
 
-    def build_filter_clause(self, filters) -> tuple[str, str]:
+    def build_filter_clause(self, filters: Optional[list[Filter]]) -> tuple[str, str]:
         if filters is None:
             return "", ""
         filter_clauses = []
         for filter in filters:
-            if isinstance(filter["value"], str):
-                filter["value"] = f"'{filter['value']}'"
-            filter_clauses.append(f"{filter['column']} {filter['comparison_operator']} {filter['value']}")
+            filter_value = f"'{filter.value}'" if isinstance(filter.value, str) else filter.value
+            filter_clauses.append(f"{filter.column} {filter.comparison_operator} {filter_value}")
         filter_clause = " AND ".join(filter_clauses)
         if len(filter_clause) > 0:
             return f"WHERE {filter_clause}", f"AND {filter_clause}"
         return "", ""
 
     async def search(
-        self, query_text: Optional[str], query_vector: list[float], top: int = 5, filters: Optional[list[dict]] = None
+        self,
+        query_text: Optional[str],
+        query_vector: list[float],
+        top: int = 5,
+        filters: Optional[list[Filter]] = None,
     ):
         filter_clause_where, filter_clause_and = self.build_filter_clause(filters)
         table_name = Item.__tablename__
@@ -106,7 +110,7 @@ async def search_and_embed(
         top: int = 5,
         enable_vector_search: bool = False,
         enable_text_search: bool = False,
-        filters: Optional[list[dict]] = None,
+        filters: Optional[list[Filter]] = None,
     ) -> list[Item]:
         """
         Search rows by query text. Optionally converts the query text to a vector if enable_vector_search is True.
diff --git a/src/backend/fastapi_app/prompts/query.txt b/src/backend/fastapi_app/prompts/query.txt
index 6bbb0a23..54464bcb 100644
--- a/src/backend/fastapi_app/prompts/query.txt
+++ b/src/backend/fastapi_app/prompts/query.txt
@@ -1,6 +1,5 @@
-Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.
-You have access to an Azure PostgreSQL database with an items table that has columns for title, description, brand, price, and type.
-Generate a search query based on the conversation and the new question.
-If the question is not in English, translate the question to English before generating the search query.
-If you cannot generate a search query, return the original user question.
-DO NOT return anything besides the query.
+Your job is to find search results based off the user's question and past messages.
+You have access to only these tools:
+1. **search_database**: This tool allows you to search a table for items based on a query.
+  You can pass in a search query and optional filters.
+Once you get the search results, you're done.
diff --git a/src/backend/fastapi_app/prompts/query_fewshots.json b/src/backend/fastapi_app/prompts/query_fewshots.json
index d5a026f2..0ef450fd 100644
--- a/src/backend/fastapi_app/prompts/query_fewshots.json
+++ b/src/backend/fastapi_app/prompts/query_fewshots.json
@@ -1,34 +1,36 @@
 [
-    {"role": "user", "content": "good options for climbing gear that can be used outside?"},
-    {"role": "assistant", "tool_calls": [
-        {
-            "id": "call_abc123",
-            "type": "function",
-            "function": {
-                "arguments": "{\"search_query\":\"climbing gear outside\"}",
-                "name": "search_database"
-            }
-        }
-    ]},
-    {
-        "role": "tool",
-        "tool_call_id": "call_abc123",
-        "content": "Search results for climbing gear that can be used outside: ..."
-    },
-    {"role": "user", "content": "are there any shoes less than $50?"},
-    {"role": "assistant", "tool_calls": [
-        {
-            "id": "call_abc456",
-            "type": "function",
-            "function": {
-                "arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
-                "name": "search_database"
-            }
-        }
-    ]},
-    {
-        "role": "tool",
-        "tool_call_id": "call_abc456",
-        "content": "Search results for shoes cheaper than 50: ..."
-    }
+  {
+    "role": "user",
+    "content": "good options for climbing gear that can be used outside?"
+  },
+  {
+    "id": "madeup",
+    "call_id": "call_abc123",
+    "name": "search_database",
+    "arguments": "{\"search_query\":\"climbing gear outside\"}",
+    "type": "function_call"
+  },
+  {
+    "id": "madeupoutput",
+    "call_id": "call_abc123",
+    "output": "Search results for climbing gear that can be used outside: ...",
+    "type": "function_call_output"
+  },
+  {
+    "role": "user",
+    "content": "are there any shoes less than $50?"
+  },
+  {
+    "id": "madeup",
+    "call_id": "call_abc456",
+    "name": "search_database",
+    "arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
+    "type": "function_call"
+  },
+  {
+    "id": "madeupoutput",
+    "call_id": "call_abc456",
+    "output": "Search results for shoes cheaper than 50: ...",
+    "type": "function_call_output"
+  }
 ]
diff --git a/src/backend/fastapi_app/rag_advanced.py b/src/backend/fastapi_app/rag_advanced.py
index fe75ea5f..eb53aa6a 100644
--- a/src/backend/fastapi_app/rag_advanced.py
+++ b/src/backend/fastapi_app/rag_advanced.py
@@ -1,172 +1,173 @@
+import json
 from collections.abc import AsyncGenerator
-from typing import Any, Final, Optional, Union
-
-from openai import AsyncAzureOpenAI, AsyncOpenAI, AsyncStream
-from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageParam
-from openai_messages_token_helper import build_messages, get_token_limit
+from typing import Optional, Union
+
+from agents import (
+    Agent,
+    ItemHelpers,
+    ModelSettings,
+    OpenAIChatCompletionsModel,
+    Runner,
+    ToolCallOutputItem,
+    function_tool,
+    set_tracing_disabled,
+)
+from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai.types.responses import EasyInputMessageParam, ResponseInputItemParam, ResponseTextDeltaEvent
 
 from fastapi_app.api_models import (
     AIChatRoles,
+    BrandFilter,
+    ChatRequestOverrides,
+    Filter,
+    ItemPublic,
     Message,
+    PriceFilter,
     RAGContext,
     RetrievalResponse,
     RetrievalResponseDelta,
+    SearchResults,
     ThoughtStep,
 )
-from fastapi_app.postgres_models import Item
 from fastapi_app.postgres_searcher import PostgresSearcher
-from fastapi_app.query_rewriter import build_search_function, extract_search_arguments
-from fastapi_app.rag_base import ChatParams, RAGChatBase
+from fastapi_app.rag_base import RAGChatBase
+
+set_tracing_disabled(disabled=True)
 
 
 class AdvancedRAGChat(RAGChatBase):
+    query_prompt_template = open(RAGChatBase.prompts_dir / "query.txt").read()
+    query_fewshots = open(RAGChatBase.prompts_dir / "query_fewshots.json").read()
+
     def __init__(
         self,
         *,
+        messages: list[ResponseInputItemParam],
+        overrides: ChatRequestOverrides,
         searcher: PostgresSearcher,
         openai_chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
         chat_model: str,
         chat_deployment: Optional[str],  # Not needed for non-Azure OpenAI
     ):
         self.searcher = searcher
-        self.openai_chat_client = openai_chat_client
-        self.chat_model = chat_model
-        self.chat_deployment = chat_deployment
-        self.chat_token_limit = get_token_limit(chat_model, default_to_minimum=True)
-
-    async def generate_search_query(
-        self,
-        original_user_query: str,
-        past_messages: list[ChatCompletionMessageParam],
-        query_response_token_limit: int,
-        seed: Optional[int] = None,
-    ) -> tuple[list[ChatCompletionMessageParam], Union[Any, str, None], list]:
-        """Generate an optimized keyword search query based on the chat history and the last question"""
-
-        tools = build_search_function()
-        tool_choice: Final = "auto"
-
-        query_messages: list[ChatCompletionMessageParam] = build_messages(
-            model=self.chat_model,
-            system_prompt=self.query_prompt_template,
-            few_shots=self.query_fewshots,
-            new_user_content=original_user_query,
-            past_messages=past_messages,
-            max_tokens=self.chat_token_limit - query_response_token_limit,
-            tools=tools,
-            tool_choice=tool_choice,
-            fallback_to_default=True,
+        self.chat_params = self.get_chat_params(messages, overrides)
+        self.model_for_thoughts = (
+            {"model": chat_model, "deployment": chat_deployment} if chat_deployment else {"model": chat_model}
         )
-
-        chat_completion: ChatCompletion = await self.openai_chat_client.chat.completions.create(
-            messages=query_messages,
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chat_deployment if self.chat_deployment else self.chat_model,
-            temperature=0.0,  # Minimize creativity for search query generation
-            max_tokens=query_response_token_limit,  # Setting too low risks malformed JSON, too high risks performance
-            n=1,
-            tools=tools,
-            tool_choice=tool_choice,
-            seed=seed,
+        openai_agents_model = OpenAIChatCompletionsModel(
+            model=chat_model if chat_deployment is None else chat_deployment, openai_client=openai_chat_client
         )
-
-        query_text, filters = extract_search_arguments(original_user_query, chat_completion)
-
-        return query_messages, query_text, filters
-
-    async def prepare_context(
-        self, chat_params: ChatParams
-    ) -> tuple[list[ChatCompletionMessageParam], list[Item], list[ThoughtStep]]:
-        query_messages, query_text, filters = await self.generate_search_query(
-            original_user_query=chat_params.original_user_query,
-            past_messages=chat_params.past_messages,
-            query_response_token_limit=500,
-            seed=chat_params.seed,
+        self.search_agent = Agent(
+            name="Searcher",
+            instructions=self.query_prompt_template,
+            tools=[function_tool(self.search_database)],
+            tool_use_behavior="stop_on_first_tool",
+            model=openai_agents_model,
+        )
+        self.answer_agent = Agent(
+            name="Answerer",
+            instructions=self.answer_prompt_template,
+            model=openai_agents_model,
+            model_settings=ModelSettings(
+                temperature=self.chat_params.temperature,
+                max_tokens=self.chat_params.response_token_limit,
+                extra_body={"seed": self.chat_params.seed} if self.chat_params.seed is not None else {},
+            ),
         )
 
-        # Retrieve relevant rows from the database with the GPT optimized query
+    async def search_database(
+        self,
+        search_query: str,
+        price_filter: Optional[PriceFilter] = None,
+        brand_filter: Optional[BrandFilter] = None,
+    ) -> SearchResults:
+        """
+        Search PostgreSQL database for relevant products based on user query
+
+        Args:
+            search_query: English query string to use for full text search, e.g. 'red shoes'.
+            price_filter: Filter search results based on price of the product
+            brand_filter: Filter search results based on brand of the product
+
+        Returns:
+            List of formatted items that match the search query and filters
+        """
+        # Only send non-None filters
+        filters: list[Filter] = []
+        if price_filter:
+            filters.append(price_filter)
+        if brand_filter:
+            filters.append(brand_filter)
         results = await self.searcher.search_and_embed(
-            query_text,
-            top=chat_params.top,
-            enable_vector_search=chat_params.enable_vector_search,
-            enable_text_search=chat_params.enable_text_search,
+            search_query,
+            top=self.chat_params.top,
+            enable_vector_search=self.chat_params.enable_vector_search,
+            enable_text_search=self.chat_params.enable_text_search,
             filters=filters,
         )
-
-        sources_content = [f"[{(item.id)}]:{item.to_str_for_rag()}\n\n" for item in results]
-        content = "\n".join(sources_content)
-
-        # Generate a contextual and content specific answer using the search results and chat history
-        contextual_messages: list[ChatCompletionMessageParam] = build_messages(
-            model=self.chat_model,
-            system_prompt=chat_params.prompt_template,
-            new_user_content=chat_params.original_user_query + "\n\nSources:\n" + content,
-            past_messages=chat_params.past_messages,
-            max_tokens=self.chat_token_limit - chat_params.response_token_limit,
-            fallback_to_default=True,
+        return SearchResults(
+            query=search_query, items=[ItemPublic.model_validate(item.to_dict()) for item in results], filters=filters
         )
 
+    async def prepare_context(self) -> tuple[list[ItemPublic], list[ThoughtStep]]:
+        few_shots: list[ResponseInputItemParam] = json.loads(self.query_fewshots)
+        user_query = f"Find search results for user query: {self.chat_params.original_user_query}"
+        new_user_message = EasyInputMessageParam(role="user", content=user_query)
+        all_messages = few_shots + self.chat_params.past_messages + [new_user_message]
+
+        run_results = await Runner.run(self.search_agent, input=all_messages)
+        most_recent_response = run_results.new_items[-1]
+        if isinstance(most_recent_response, ToolCallOutputItem):
+            search_results = most_recent_response.output
+        else:
+            raise ValueError("Error retrieving search results, model did not call tool properly")
+
         thoughts = [
             ThoughtStep(
                 title="Prompt to generate search arguments",
-                description=query_messages,
-                props=(
-                    {"model": self.chat_model, "deployment": self.chat_deployment}
-                    if self.chat_deployment
-                    else {"model": self.chat_model}
-                ),
+                description=[{"content": self.query_prompt_template}]
+                + ItemHelpers.input_to_new_input_list(run_results.input),
+                props=self.model_for_thoughts,
             ),
             ThoughtStep(
                 title="Search using generated search arguments",
-                description=query_text,
+                description=search_results.query,
                 props={
-                    "top": chat_params.top,
-                    "vector_search": chat_params.enable_vector_search,
-                    "text_search": chat_params.enable_text_search,
-                    "filters": filters,
+                    "top": self.chat_params.top,
+                    "vector_search": self.chat_params.enable_vector_search,
+                    "text_search": self.chat_params.enable_text_search,
+                    "filters": search_results.filters,
                 },
             ),
             ThoughtStep(
                 title="Search results",
-                description=[result.to_dict() for result in results],
+                description=search_results.items,
             ),
         ]
-        return contextual_messages, results, thoughts
+        return search_results.items, thoughts
 
     async def answer(
         self,
-        chat_params: ChatParams,
-        contextual_messages: list[ChatCompletionMessageParam],
-        results: list[Item],
+        items: list[ItemPublic],
         earlier_thoughts: list[ThoughtStep],
     ) -> RetrievalResponse:
-        chat_completion_response: ChatCompletion = await self.openai_chat_client.chat.completions.create(
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chat_deployment if self.chat_deployment else self.chat_model,
-            messages=contextual_messages,
-            temperature=chat_params.temperature,
-            max_tokens=chat_params.response_token_limit,
-            n=1,
-            stream=False,
-            seed=chat_params.seed,
+        run_results = await Runner.run(
+            self.answer_agent,
+            input=self.chat_params.past_messages
+            + [{"content": self.prepare_rag_request(self.chat_params.original_user_query, items), "role": "user"}],
         )
 
         return RetrievalResponse(
-            message=Message(
-                content=str(chat_completion_response.choices[0].message.content), role=AIChatRoles.ASSISTANT
-            ),
+            message=Message(content=str(run_results.final_output), role=AIChatRoles.ASSISTANT),
             context=RAGContext(
-                data_points={item.id: item.to_dict() for item in results},
+                data_points={item.id: item for item in items},
                 thoughts=earlier_thoughts
                 + [
                     ThoughtStep(
                         title="Prompt to generate answer",
-                        description=contextual_messages,
-                        props=(
-                            {"model": self.chat_model, "deployment": self.chat_deployment}
-                            if self.chat_deployment
-                            else {"model": self.chat_model}
-                        ),
+                        description=[{"content": self.answer_prompt_template}]
+                        + ItemHelpers.input_to_new_input_list(run_results.input),
+                        props=self.model_for_thoughts,
                     ),
                 ],
             ),
@@ -174,45 +175,31 @@ async def answer(
 
     async def answer_stream(
         self,
-        chat_params: ChatParams,
-        contextual_messages: list[ChatCompletionMessageParam],
-        results: list[Item],
+        items: list[ItemPublic],
         earlier_thoughts: list[ThoughtStep],
     ) -> AsyncGenerator[RetrievalResponseDelta, None]:
-        chat_completion_async_stream: AsyncStream[
-            ChatCompletionChunk
-        ] = await self.openai_chat_client.chat.completions.create(
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chat_deployment if self.chat_deployment else self.chat_model,
-            messages=contextual_messages,
-            temperature=chat_params.temperature,
-            max_tokens=chat_params.response_token_limit,
-            n=1,
-            stream=True,
+        run_results = Runner.run_streamed(
+            self.answer_agent,
+            input=self.chat_params.past_messages
+            + [{"content": self.prepare_rag_request(self.chat_params.original_user_query, items), "role": "user"}],  # noqa
         )
 
         yield RetrievalResponseDelta(
             context=RAGContext(
-                data_points={item.id: item.to_dict() for item in results},
+                data_points={item.id: item for item in items},
                 thoughts=earlier_thoughts
                 + [
                     ThoughtStep(
                         title="Prompt to generate answer",
-                        description=contextual_messages,
-                        props=(
-                            {"model": self.chat_model, "deployment": self.chat_deployment}
-                            if self.chat_deployment
-                            else {"model": self.chat_model}
-                        ),
+                        description=[{"content": self.answer_prompt_template}]
+                        + ItemHelpers.input_to_new_input_list(run_results.input),
+                        props=self.model_for_thoughts,
                     ),
                 ],
             ),
         )
 
-        async for response_chunk in chat_completion_async_stream:
-            # first response has empty choices and last response has empty content
-            if response_chunk.choices and response_chunk.choices[0].delta.content:
-                yield RetrievalResponseDelta(
-                    delta=Message(content=str(response_chunk.choices[0].delta.content), role=AIChatRoles.ASSISTANT)
-                )
+        async for event in run_results.stream_events():
+            if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
+                yield RetrievalResponseDelta(delta=Message(content=str(event.data.delta), role=AIChatRoles.ASSISTANT))
         return
diff --git a/src/backend/fastapi_app/rag_base.py b/src/backend/fastapi_app/rag_base.py
index 34fba44a..54e633c2 100644
--- a/src/backend/fastapi_app/rag_base.py
+++ b/src/backend/fastapi_app/rag_base.py
@@ -1,37 +1,33 @@
-import json
 import pathlib
 from abc import ABC, abstractmethod
 from collections.abc import AsyncGenerator
 
-from openai.types.chat import ChatCompletionMessageParam
+from openai.types.responses import ResponseInputItemParam
 
 from fastapi_app.api_models import (
     ChatParams,
     ChatRequestOverrides,
+    ItemPublic,
     RetrievalResponse,
     RetrievalResponseDelta,
     ThoughtStep,
 )
-from fastapi_app.postgres_models import Item
 
 
 class RAGChatBase(ABC):
-    current_dir = pathlib.Path(__file__).parent
-    query_prompt_template = open(current_dir / "prompts/query.txt").read()
-    query_fewshots = json.loads(open(current_dir / "prompts/query_fewshots.json").read())
-    answer_prompt_template = open(current_dir / "prompts/answer.txt").read()
+    prompts_dir = pathlib.Path(__file__).parent / "prompts/"
+    answer_prompt_template = open(prompts_dir / "answer.txt").read()
 
-    def get_params(self, messages: list[ChatCompletionMessageParam], overrides: ChatRequestOverrides) -> ChatParams:
+    def get_chat_params(self, messages: list[ResponseInputItemParam], overrides: ChatRequestOverrides) -> ChatParams:
         response_token_limit = 1024
         prompt_template = overrides.prompt_template or self.answer_prompt_template
 
         enable_text_search = overrides.retrieval_mode in ["text", "hybrid", None]
         enable_vector_search = overrides.retrieval_mode in ["vectors", "hybrid", None]
 
-        original_user_query = messages[-1]["content"]
+        original_user_query = messages[-1].get("content")
         if not isinstance(original_user_query, str):
             raise ValueError("The most recent message content must be a string.")
-        past_messages = messages[:-1]
 
         return ChatParams(
             top=overrides.top,
@@ -44,21 +40,21 @@ def get_params(self, messages: list[ChatCompletionMessageParam], overrides: Chat
             enable_text_search=enable_text_search,
             enable_vector_search=enable_vector_search,
             original_user_query=original_user_query,
-            past_messages=past_messages,
+            past_messages=messages[:-1],
         )
 
     @abstractmethod
-    async def prepare_context(
-        self, chat_params: ChatParams
-    ) -> tuple[list[ChatCompletionMessageParam], list[Item], list[ThoughtStep]]:
+    async def prepare_context(self) -> tuple[list[ItemPublic], list[ThoughtStep]]:
         raise NotImplementedError
 
+    def prepare_rag_request(self, user_query, items: list[ItemPublic]) -> str:
+        sources_str = "\n".join([f"[{item.id}]:{item.to_str_for_rag()}" for item in items])
+        return f"{user_query}Sources:\n{sources_str}"
+
     @abstractmethod
     async def answer(
         self,
-        chat_params: ChatParams,
-        contextual_messages: list[ChatCompletionMessageParam],
-        results: list[Item],
+        items: list[ItemPublic],
         earlier_thoughts: list[ThoughtStep],
     ) -> RetrievalResponse:
         raise NotImplementedError
@@ -66,9 +62,7 @@ async def answer(
     @abstractmethod
     async def answer_stream(
         self,
-        chat_params: ChatParams,
-        contextual_messages: list[ChatCompletionMessageParam],
-        results: list[Item],
+        items: list[ItemPublic],
         earlier_thoughts: list[ThoughtStep],
     ) -> AsyncGenerator[RetrievalResponseDelta, None]:
         raise NotImplementedError
diff --git a/src/backend/fastapi_app/rag_simple.py b/src/backend/fastapi_app/rag_simple.py
index 79350ab7..69126618 100644
--- a/src/backend/fastapi_app/rag_simple.py
+++ b/src/backend/fastapi_app/rag_simple.py
@@ -1,115 +1,106 @@
 from collections.abc import AsyncGenerator
 from typing import Optional, Union
 
-from openai import AsyncAzureOpenAI, AsyncOpenAI, AsyncStream
-from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessageParam
-from openai_messages_token_helper import build_messages, get_token_limit
+from agents import Agent, ItemHelpers, ModelSettings, OpenAIChatCompletionsModel, Runner, set_tracing_disabled
+from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai.types.responses import ResponseInputItemParam, ResponseTextDeltaEvent
 
 from fastapi_app.api_models import (
     AIChatRoles,
+    ChatRequestOverrides,
+    ItemPublic,
     Message,
     RAGContext,
     RetrievalResponse,
     RetrievalResponseDelta,
     ThoughtStep,
 )
-from fastapi_app.postgres_models import Item
 from fastapi_app.postgres_searcher import PostgresSearcher
-from fastapi_app.rag_base import ChatParams, RAGChatBase
+from fastapi_app.rag_base import RAGChatBase
+
+set_tracing_disabled(disabled=True)
 
 
 class SimpleRAGChat(RAGChatBase):
     def __init__(
         self,
         *,
+        messages: list[ResponseInputItemParam],
+        overrides: ChatRequestOverrides,
         searcher: PostgresSearcher,
         openai_chat_client: Union[AsyncOpenAI, AsyncAzureOpenAI],
         chat_model: str,
         chat_deployment: Optional[str],  # Not needed for non-Azure OpenAI
     ):
         self.searcher = searcher
-        self.openai_chat_client = openai_chat_client
-        self.chat_model = chat_model
-        self.chat_deployment = chat_deployment
-        self.chat_token_limit = get_token_limit(chat_model, default_to_minimum=True)
+        self.chat_params = self.get_chat_params(messages, overrides)
+        self.model_for_thoughts = (
+            {"model": chat_model, "deployment": chat_deployment} if chat_deployment else {"model": chat_model}
+        )
+        openai_agents_model = OpenAIChatCompletionsModel(
+            model=chat_model if chat_deployment is None else chat_deployment, openai_client=openai_chat_client
+        )
+        self.answer_agent = Agent(
+            name="Answerer",
+            instructions=self.answer_prompt_template,
+            model=openai_agents_model,
+            model_settings=ModelSettings(
+                temperature=self.chat_params.temperature,
+                max_tokens=self.chat_params.response_token_limit,
+                extra_body={"seed": self.chat_params.seed} if self.chat_params.seed is not None else {},
+            ),
+        )
 
-    async def prepare_context(
-        self, chat_params: ChatParams
-    ) -> tuple[list[ChatCompletionMessageParam], list[Item], list[ThoughtStep]]:
+    async def prepare_context(self) -> tuple[list[ItemPublic], list[ThoughtStep]]:
         """Retrieve relevant rows from the database and build a context for the chat model."""
 
-        # Retrieve relevant rows from the database
         results = await self.searcher.search_and_embed(
-            chat_params.original_user_query,
-            top=chat_params.top,
-            enable_vector_search=chat_params.enable_vector_search,
-            enable_text_search=chat_params.enable_text_search,
-        )
-
-        sources_content = [f"[{(item.id)}]:{item.to_str_for_rag()}\n\n" for item in results]
-        content = "\n".join(sources_content)
-
-        # Generate a contextual and content specific answer using the search results and chat history
-        contextual_messages: list[ChatCompletionMessageParam] = build_messages(
-            model=self.chat_model,
-            system_prompt=chat_params.prompt_template,
-            new_user_content=chat_params.original_user_query + "\n\nSources:\n" + content,
-            past_messages=chat_params.past_messages,
-            max_tokens=self.chat_token_limit - chat_params.response_token_limit,
-            fallback_to_default=True,
+            self.chat_params.original_user_query,
+            top=self.chat_params.top,
+            enable_vector_search=self.chat_params.enable_vector_search,
+            enable_text_search=self.chat_params.enable_text_search,
         )
+        items = [ItemPublic.model_validate(item.to_dict()) for item in results]
 
         thoughts = [
             ThoughtStep(
                 title="Search query for database",
-                description=chat_params.original_user_query,
+                description=self.chat_params.original_user_query,
                 props={
-                    "top": chat_params.top,
-                    "vector_search": chat_params.enable_vector_search,
-                    "text_search": chat_params.enable_text_search,
+                    "top": self.chat_params.top,
+                    "vector_search": self.chat_params.enable_vector_search,
+                    "text_search": self.chat_params.enable_text_search,
                 },
             ),
             ThoughtStep(
                 title="Search results",
-                description=[result.to_dict() for result in results],
+                description=items,
             ),
         ]
-        return contextual_messages, results, thoughts
+        return items, thoughts
 
     async def answer(
         self,
-        chat_params: ChatParams,
-        contextual_messages: list[ChatCompletionMessageParam],
-        results: list[Item],
+        items: list[ItemPublic],
         earlier_thoughts: list[ThoughtStep],
     ) -> RetrievalResponse:
-        chat_completion_response: ChatCompletion = await self.openai_chat_client.chat.completions.create(
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chat_deployment if self.chat_deployment else self.chat_model,
-            messages=contextual_messages,
-            temperature=chat_params.temperature,
-            max_tokens=chat_params.response_token_limit,
-            n=1,
-            stream=False,
-            seed=chat_params.seed,
+        run_results = await Runner.run(
+            self.answer_agent,
+            input=self.chat_params.past_messages
+            + [{"content": self.prepare_rag_request(self.chat_params.original_user_query, items), "role": "user"}],
         )
 
         return RetrievalResponse(
-            message=Message(
-                content=str(chat_completion_response.choices[0].message.content), role=AIChatRoles.ASSISTANT
-            ),
+            message=Message(content=str(run_results.final_output), role=AIChatRoles.ASSISTANT),
             context=RAGContext(
-                data_points={item.id: item.to_dict() for item in results},
+                data_points={item.id: item for item in items},
                 thoughts=earlier_thoughts
                 + [
                     ThoughtStep(
                         title="Prompt to generate answer",
-                        description=contextual_messages,
-                        props=(
-                            {"model": self.chat_model, "deployment": self.chat_deployment}
-                            if self.chat_deployment
-                            else {"model": self.chat_model}
-                        ),
+                        description=[{"content": self.answer_prompt_template}]
+                        + ItemHelpers.input_to_new_input_list(run_results.input),
+                        props=self.model_for_thoughts,
                     ),
                 ],
             ),
@@ -117,45 +108,31 @@ async def answer(
 
     async def answer_stream(
         self,
-        chat_params: ChatParams,
-        contextual_messages: list[ChatCompletionMessageParam],
-        results: list[Item],
+        items: list[ItemPublic],
         earlier_thoughts: list[ThoughtStep],
     ) -> AsyncGenerator[RetrievalResponseDelta, None]:
-        chat_completion_async_stream: AsyncStream[
-            ChatCompletionChunk
-        ] = await self.openai_chat_client.chat.completions.create(
-            # Azure OpenAI takes the deployment name as the model name
-            model=self.chat_deployment if self.chat_deployment else self.chat_model,
-            messages=contextual_messages,
-            temperature=chat_params.temperature,
-            max_tokens=chat_params.response_token_limit,
-            n=1,
-            stream=True,
-            seed=chat_params.seed,
+        run_results = Runner.run_streamed(
+            self.answer_agent,
+            input=self.chat_params.past_messages
+            + [{"content": self.prepare_rag_request(self.chat_params.original_user_query, items), "role": "user"}],
         )
 
         yield RetrievalResponseDelta(
             context=RAGContext(
-                data_points={item.id: item.to_dict() for item in results},
+                data_points={item.id: item for item in items},
                 thoughts=earlier_thoughts
                 + [
                     ThoughtStep(
                         title="Prompt to generate answer",
-                        description=contextual_messages,
-                        props=(
-                            {"model": self.chat_model, "deployment": self.chat_deployment}
-                            if self.chat_deployment
-                            else {"model": self.chat_model}
-                        ),
+                        description=[{"content": self.answer_agent.instructions}]
+                        + ItemHelpers.input_to_new_input_list(run_results.input),
+                        props=self.model_for_thoughts,
                     ),
                 ],
             ),
         )
-        async for response_chunk in chat_completion_async_stream:
-            # first response has empty choices and last response has empty content
-            if response_chunk.choices and response_chunk.choices[0].delta.content:
-                yield RetrievalResponseDelta(
-                    delta=Message(content=str(response_chunk.choices[0].delta.content), role=AIChatRoles.ASSISTANT)
-                )
+
+        async for event in run_results.stream_events():
+            if event.type == "raw_response_event" and isinstance(event.data, ResponseTextDeltaEvent):
+                yield RetrievalResponseDelta(delta=Message(content=str(event.data.delta), role=AIChatRoles.ASSISTANT))
         return
diff --git a/src/backend/fastapi_app/routes/api_routes.py b/src/backend/fastapi_app/routes/api_routes.py
index d7486730..f566886c 100644
--- a/src/backend/fastapi_app/routes/api_routes.py
+++ b/src/backend/fastapi_app/routes/api_routes.py
@@ -121,6 +121,8 @@ async def chat_handler(
         rag_flow: Union[SimpleRAGChat, AdvancedRAGChat]
         if chat_request.context.overrides.use_advanced_flow:
             rag_flow = AdvancedRAGChat(
+                messages=chat_request.messages,
+                overrides=chat_request.context.overrides,
                 searcher=searcher,
                 openai_chat_client=openai_chat.client,
                 chat_model=context.openai_chat_model,
@@ -128,18 +130,16 @@ async def chat_handler(
             )
         else:
             rag_flow = SimpleRAGChat(
+                messages=chat_request.messages,
+                overrides=chat_request.context.overrides,
                 searcher=searcher,
                 openai_chat_client=openai_chat.client,
                 chat_model=context.openai_chat_model,
                 chat_deployment=context.openai_chat_deployment,
             )
 
-        chat_params = rag_flow.get_params(chat_request.messages, chat_request.context.overrides)
-
-        contextual_messages, results, thoughts = await rag_flow.prepare_context(chat_params)
-        response = await rag_flow.answer(
-            chat_params=chat_params, contextual_messages=contextual_messages, results=results, earlier_thoughts=thoughts
-        )
+        items, thoughts = await rag_flow.prepare_context()
+        response = await rag_flow.answer(items=items, earlier_thoughts=thoughts)
         return response
     except Exception as e:
         if isinstance(e, APIError) and e.code == "content_filter":
@@ -169,6 +169,8 @@ async def chat_stream_handler(
     rag_flow: Union[SimpleRAGChat, AdvancedRAGChat]
     if chat_request.context.overrides.use_advanced_flow:
         rag_flow = AdvancedRAGChat(
+            messages=chat_request.messages,
+            overrides=chat_request.context.overrides,
             searcher=searcher,
             openai_chat_client=openai_chat.client,
             chat_model=context.openai_chat_model,
@@ -176,21 +178,19 @@ async def chat_stream_handler(
         )
     else:
         rag_flow = SimpleRAGChat(
+            messages=chat_request.messages,
+            overrides=chat_request.context.overrides,
             searcher=searcher,
             openai_chat_client=openai_chat.client,
             chat_model=context.openai_chat_model,
             chat_deployment=context.openai_chat_deployment,
         )
 
-    chat_params = rag_flow.get_params(chat_request.messages, chat_request.context.overrides)
-
-    # Intentionally do this before we stream down a response, to avoid using database connections during stream
-    # See https://github.com/tiangolo/fastapi/discussions/11321
     try:
-        contextual_messages, results, thoughts = await rag_flow.prepare_context(chat_params)
-        result = rag_flow.answer_stream(
-            chat_params=chat_params, contextual_messages=contextual_messages, results=results, earlier_thoughts=thoughts
-        )
+        # Intentionally do search we stream down the answer, to avoid using database connections during stream
+        # See https://github.com/tiangolo/fastapi/discussions/11321
+        items, thoughts = await rag_flow.prepare_context()
+        result = rag_flow.answer_stream(items, thoughts)
         return StreamingResponse(content=format_as_ndjson(result), media_type="application/x-ndjson")
     except Exception as e:
         if isinstance(e, APIError) and e.code == "content_filter":
@@ -198,3 +198,9 @@ async def chat_stream_handler(
                 content=json.dumps(ERROR_FILTER) + "\n",
                 media_type="application/x-ndjson",
             )
+        else:
+            logging.exception("Exception while generating response: %s", e)
+            return StreamingResponse(
+                content=json.dumps({"error": str(e)}, ensure_ascii=False) + "\n",
+                media_type="application/x-ndjson",
+            )
diff --git a/src/backend/pyproject.toml b/src/backend/pyproject.toml
index cdadc177..7ede97c9 100644
--- a/src/backend/pyproject.toml
+++ b/src/backend/pyproject.toml
@@ -6,19 +6,18 @@ dependencies = [
     "fastapi>=0.111.0,<1.0.0",
     "uvicorn>=0.30.1,<1.0.0",
     "python-dotenv>=1.0.1,<2.0.0",
-    "environs>=11.0.0,<12.0.0",
+    "environs>=11.0.0,<15.0.0",
     "azure-identity>=1.16.1,<2.0.0",
     "aiohttp>=3.9.5,<4.0.0",
     "asyncpg>=0.29.0,<1.0.0",
     "SQLAlchemy[asyncio]>=2.0.30,<3.0.0",
     "pgvector>=0.3.0,<0.4.0",
     "openai>=1.34.0,<2.0.0",
-    "tiktoken>=0.7.0,<0.8.0",
-    "openai-messages-token-helper>=0.1.8,<0.2.0",
     "azure-monitor-opentelemetry>=1.6.0,<2.0.0",
     "opentelemetry-instrumentation-sqlalchemy",
     "opentelemetry-instrumentation-aiohttp-client",
     "opentelemetry-instrumentation-openai",
+    "openai-agents"
 ]
 
 [build-system]
diff --git a/src/backend/requirements.txt b/src/backend/requirements.txt
index 6972e8a3..b83031b6 100644
--- a/src/backend/requirements.txt
+++ b/src/backend/requirements.txt
@@ -1,19 +1,21 @@
 # This file was autogenerated by uv via the following command:
-#    uv pip compile pyproject.toml -o requirements.txt --python-version 3.9
-aiohappyeyeballs==2.4.4
+#    uv pip compile pyproject.toml -o requirements_new.txt --python-version 3.10
+aiohappyeyeballs==2.6.1
     # via aiohttp
-aiohttp==3.11.14
+aiohttp==3.12.14
     # via fastapi-app (pyproject.toml)
-aiosignal==1.3.1
+aiosignal==1.4.0
     # via aiohttp
 annotated-types==0.7.0
     # via pydantic
-anyio==4.6.2.post1
+anyio==4.9.0
     # via
     #   httpx
+    #   mcp
     #   openai
+    #   sse-starlette
     #   starlette
-asgiref==3.8.1
+asgiref==3.9.1
     # via opentelemetry-instrumentation-asgi
 async-timeout==5.0.1
     # via
@@ -21,24 +23,31 @@ async-timeout==5.0.1
     #   asyncpg
 asyncpg==0.30.0
     # via fastapi-app (pyproject.toml)
-attrs==24.2.0
-    # via aiohttp
-azure-core==1.32.0
+attrs==25.3.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+azure-core==1.35.0
     # via
     #   azure-core-tracing-opentelemetry
     #   azure-identity
     #   azure-monitor-opentelemetry
     #   azure-monitor-opentelemetry-exporter
     #   msrest
-azure-core-tracing-opentelemetry==1.0.0b11
+azure-core-tracing-opentelemetry==1.0.0b12
     # via azure-monitor-opentelemetry
-azure-identity==1.19.0
-    # via fastapi-app (pyproject.toml)
-azure-monitor-opentelemetry==1.6.3
+azure-identity==1.23.1
+    # via
+    #   fastapi-app (pyproject.toml)
+    #   azure-monitor-opentelemetry-exporter
+azure-monitor-opentelemetry==1.6.12
     # via fastapi-app (pyproject.toml)
-azure-monitor-opentelemetry-exporter==1.0.0b32
+azure-monitor-opentelemetry-exporter==1.0.0b40
     # via azure-monitor-opentelemetry
-certifi==2024.8.30
+backports-datetime-fromisoformat==2.0.3
+    # via marshmallow
+certifi==2025.7.14
     # via
     #   httpcore
     #   httpx
@@ -46,82 +55,94 @@ certifi==2024.8.30
     #   requests
 cffi==1.17.1
     # via cryptography
-charset-normalizer==3.4.0
+charset-normalizer==3.4.2
     # via requests
-click==8.1.7
+click==8.2.1
     # via uvicorn
-cryptography==44.0.1
+colorama==0.4.6
+    # via griffe
+cryptography==45.0.5
     # via
     #   azure-identity
     #   msal
     #   pyjwt
-deprecated==1.2.15
+deprecated==1.2.18
     # via
     #   opentelemetry-api
     #   opentelemetry-semantic-conventions
 distro==1.9.0
     # via openai
-environs==11.2.1
+environs==14.2.0
     # via fastapi-app (pyproject.toml)
-exceptiongroup==1.2.2
+exceptiongroup==1.3.0
     # via anyio
-fastapi==0.115.8
+fastapi==0.116.1
     # via fastapi-app (pyproject.toml)
 fixedint==0.1.6
     # via azure-monitor-opentelemetry-exporter
-frozenlist==1.5.0
+frozenlist==1.7.0
     # via
     #   aiohttp
     #   aiosignal
-greenlet==3.1.1
+greenlet==3.2.3
     # via sqlalchemy
-h11==0.14.0
+griffe==1.7.3
+    # via openai-agents
+h11==0.16.0
     # via
     #   httpcore
     #   uvicorn
-httpcore==1.0.7
+httpcore==1.0.9
     # via httpx
-httpx==0.28.0
-    # via openai
+httpx==0.28.1
+    # via
+    #   mcp
+    #   openai
+httpx-sse==0.4.1
+    # via mcp
 idna==3.10
     # via
     #   anyio
     #   httpx
     #   requests
     #   yarl
-importlib-metadata==8.4.0
-    # via
-    #   opentelemetry-api
-    #   opentelemetry-instrumentation-flask
+importlib-metadata==8.6.1
+    # via opentelemetry-api
 isodate==0.7.2
     # via msrest
-jiter==0.8.0
+jiter==0.10.0
     # via openai
-marshmallow==3.23.1
+jsonschema==4.24.0
+    # via mcp
+jsonschema-specifications==2025.4.1
+    # via jsonschema
+marshmallow==4.0.0
     # via environs
-msal==1.31.1
+mcp==1.11.0
+    # via openai-agents
+msal==1.32.3
     # via
     #   azure-identity
     #   msal-extensions
-msal-extensions==1.2.0
+msal-extensions==1.3.1
     # via azure-identity
 msrest==0.7.1
     # via azure-monitor-opentelemetry-exporter
-multidict==6.1.0
+multidict==6.6.3
     # via
     #   aiohttp
     #   yarl
-numpy==2.0.2
+numpy==2.2.6
     # via pgvector
-oauthlib==3.2.2
+oauthlib==3.3.1
     # via requests-oauthlib
-openai==1.55.3
+openai==1.96.1
     # via
     #   fastapi-app (pyproject.toml)
-    #   openai-messages-token-helper
-openai-messages-token-helper==0.1.11
+    #   openai-agents
+openai-agents==0.2.0
     # via fastapi-app (pyproject.toml)
-opentelemetry-api==1.30.0
+opentelemetry-api==1.31.1
     # via
     #   azure-core-tracing-opentelemetry
     #   azure-monitor-opentelemetry-exporter
@@ -141,7 +162,7 @@ opentelemetry-api==1.30.0
     #   opentelemetry-instrumentation-wsgi
     #   opentelemetry-sdk
     #   opentelemetry-semantic-conventions
-opentelemetry-instrumentation==0.51b0
+opentelemetry-instrumentation==0.52b1
     # via
     #   opentelemetry-instrumentation-aiohttp-client
     #   opentelemetry-instrumentation-asgi
@@ -156,43 +177,44 @@ opentelemetry-instrumentation==0.51b0
     #   opentelemetry-instrumentation-urllib
     #   opentelemetry-instrumentation-urllib3
     #   opentelemetry-instrumentation-wsgi
-opentelemetry-instrumentation-aiohttp-client==0.51b0
+opentelemetry-instrumentation-aiohttp-client==0.52b1
     # via fastapi-app (pyproject.toml)
-opentelemetry-instrumentation-asgi==0.51b0
+opentelemetry-instrumentation-asgi==0.52b1
     # via opentelemetry-instrumentation-fastapi
-opentelemetry-instrumentation-dbapi==0.51b0
+opentelemetry-instrumentation-dbapi==0.52b1
     # via opentelemetry-instrumentation-psycopg2
-opentelemetry-instrumentation-django==0.51b0
+opentelemetry-instrumentation-django==0.52b1
     # via azure-monitor-opentelemetry
-opentelemetry-instrumentation-fastapi==0.51b0
+opentelemetry-instrumentation-fastapi==0.52b1
     # via azure-monitor-opentelemetry
-opentelemetry-instrumentation-flask==0.51b0
+opentelemetry-instrumentation-flask==0.52b1
     # via azure-monitor-opentelemetry
-opentelemetry-instrumentation-openai==0.38.7
+opentelemetry-instrumentation-openai==0.41.0
     # via fastapi-app (pyproject.toml)
-opentelemetry-instrumentation-psycopg2==0.51b0
+opentelemetry-instrumentation-psycopg2==0.52b1
     # via azure-monitor-opentelemetry
-opentelemetry-instrumentation-requests==0.51b0
+opentelemetry-instrumentation-requests==0.52b1
     # via azure-monitor-opentelemetry
-opentelemetry-instrumentation-sqlalchemy==0.51b0
+opentelemetry-instrumentation-sqlalchemy==0.52b1
     # via fastapi-app (pyproject.toml)
-opentelemetry-instrumentation-urllib==0.51b0
+opentelemetry-instrumentation-urllib==0.52b1
     # via azure-monitor-opentelemetry
-opentelemetry-instrumentation-urllib3==0.51b0
+opentelemetry-instrumentation-urllib3==0.52b1
     # via azure-monitor-opentelemetry
-opentelemetry-instrumentation-wsgi==0.51b0
+opentelemetry-instrumentation-wsgi==0.52b1
     # via
     #   opentelemetry-instrumentation-django
     #   opentelemetry-instrumentation-flask
 opentelemetry-resource-detector-azure==0.1.5
     # via azure-monitor-opentelemetry
-opentelemetry-sdk==1.30.0
+opentelemetry-sdk==1.31.1
     # via
     #   azure-monitor-opentelemetry
     #   azure-monitor-opentelemetry-exporter
     #   opentelemetry-resource-detector-azure
-opentelemetry-semantic-conventions==0.51b0
+opentelemetry-semantic-conventions==0.52b1
     # via
+    #   opentelemetry-instrumentation
     #   opentelemetry-instrumentation-aiohttp-client
     #   opentelemetry-instrumentation-asgi
     #   opentelemetry-instrumentation-dbapi
@@ -206,9 +228,9 @@ opentelemetry-semantic-conventions==0.51b0
     #   opentelemetry-instrumentation-urllib3
     #   opentelemetry-instrumentation-wsgi
     #   opentelemetry-sdk
-opentelemetry-semantic-conventions-ai==0.4.2
+opentelemetry-semantic-conventions-ai==0.4.10
     # via opentelemetry-instrumentation-openai
-opentelemetry-util-http==0.51b0
+opentelemetry-util-http==0.52b1
     # via
     #   opentelemetry-instrumentation-aiohttp-client
     #   opentelemetry-instrumentation-asgi
@@ -219,87 +241,116 @@ opentelemetry-util-http==0.51b0
     #   opentelemetry-instrumentation-urllib
     #   opentelemetry-instrumentation-urllib3
     #   opentelemetry-instrumentation-wsgi
-packaging==24.2
+packaging==25.0
     # via
-    #   marshmallow
+    #   opentelemetry-instrumentation
     #   opentelemetry-instrumentation-flask
     #   opentelemetry-instrumentation-sqlalchemy
 pgvector==0.3.6
     # via fastapi-app (pyproject.toml)
-pillow==11.0.0
-    # via openai-messages-token-helper
-portalocker==2.10.1
-    # via msal-extensions
-propcache==0.2.1
+propcache==0.3.2
     # via
     #   aiohttp
     #   yarl
-psutil==5.9.8
+psutil==7.0.0
     # via azure-monitor-opentelemetry-exporter
 pycparser==2.22
     # via cffi
-pydantic==2.10.2
+pydantic==2.11.7
     # via
     #   fastapi
+    #   mcp
     #   openai
-pydantic-core==2.27.1
+    #   openai-agents
+    #   pydantic-settings
+pydantic-core==2.33.2
     # via pydantic
+pydantic-settings==2.10.1
+    # via mcp
 pyjwt==2.10.1
     # via msal
-python-dotenv==1.0.1
+python-dotenv==1.1.1
     # via
     #   fastapi-app (pyproject.toml)
     #   environs
+    #   pydantic-settings
+python-multipart==0.0.20
+    # via mcp
+referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
 regex==2024.11.6
     # via tiktoken
-requests==2.32.3
+requests==2.32.4
     # via
     #   azure-core
     #   msal
     #   msrest
+    #   openai-agents
     #   requests-oauthlib
     #   tiktoken
 requests-oauthlib==2.0.0
     # via msrest
-setuptools==75.6.0
-    # via opentelemetry-instrumentation
-six==1.16.0
+rpds-py==0.26.0
+    # via
+    #   jsonschema
+    #   referencing
+six==1.17.0
     # via azure-core
 sniffio==1.3.1
     # via
     #   anyio
     #   openai
-sqlalchemy==2.0.36
+sqlalchemy==2.0.41
     # via fastapi-app (pyproject.toml)
-starlette==0.41.3
-    # via fastapi
-tiktoken==0.7.0
+sse-starlette==2.4.1
+    # via mcp
+starlette==0.47.2
     # via
-    #   fastapi-app (pyproject.toml)
-    #   openai-messages-token-helper
-    #   opentelemetry-instrumentation-openai
+    #   fastapi
+    #   mcp
+tiktoken==0.9.0
+    # via opentelemetry-instrumentation-openai
 tqdm==4.67.1
     # via openai
-typing-extensions==4.12.2
+types-requests==2.32.4.20250611
+    # via openai-agents
+typing-extensions==4.14.1
     # via
+    #   aiosignal
     #   anyio
     #   asgiref
     #   azure-core
     #   azure-identity
+    #   environs
+    #   exceptiongroup
     #   fastapi
+    #   marshmallow
     #   multidict
     #   openai
+    #   openai-agents
     #   opentelemetry-sdk
     #   pydantic
     #   pydantic-core
+    #   referencing
     #   sqlalchemy
     #   starlette
+    #   typing-inspection
     #   uvicorn
-urllib3==2.2.3
-    # via requests
-uvicorn==0.32.1
-    # via fastapi-app (pyproject.toml)
-wrapt==1.17.0
+typing-inspection==0.4.1
+    # via
+    #   pydantic
+    #   pydantic-settings
+urllib3==2.5.0
+    # via
+    #   requests
+    #   types-requests
+uvicorn==0.35.0
+    # via
+    #   fastapi-app (pyproject.toml)
+    #   mcp
+wrapt==1.17.2
     # via
     #   deprecated
     #   opentelemetry-instrumentation
@@ -307,7 +358,7 @@ wrapt==1.17.0
     #   opentelemetry-instrumentation-dbapi
     #   opentelemetry-instrumentation-sqlalchemy
     #   opentelemetry-instrumentation-urllib3
-yarl==1.18.3
+yarl==1.20.1
     # via aiohttp
-zipp==3.21.0
+zipp==3.23.0
     # via importlib-metadata
diff --git a/src/frontend/package-lock.json b/src/frontend/package-lock.json
index 68827101..252ddabd 100644
--- a/src/frontend/package-lock.json
+++ b/src/frontend/package-lock.json
@@ -31,7 +31,7 @@
         "@vitejs/plugin-react": "^4.3.4",
         "prettier": "^3.0.3",
         "typescript": "^5.2.2",
-        "vite": "^6.2.4"
+        "vite": "^6.3.4"
       },
       "engines": {
         "node": ">=14.0.0"
@@ -308,11 +308,10 @@
       }
     },
     "node_modules/@babel/runtime": {
-      "version": "7.22.15",
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.27.1.tgz",
+      "integrity": "sha512-1x3D2xEk2fRo3PAhwQwu5UubzgiVWSXTBfWpVd2Mx2AzRqJuDJCsgaDVZ7HB5iGzDW1Hl1sWN2mFyKjmR9uAog==",
       "license": "MIT",
-      "dependencies": {
-        "regenerator-runtime": "^0.14.0"
-      },
       "engines": {
         "node": ">=6.9.0"
       }
@@ -4072,10 +4071,6 @@
         "node": ">=6"
       }
     },
-    "node_modules/regenerator-runtime": {
-      "version": "0.14.0",
-      "license": "MIT"
-    },
     "node_modules/resolve": {
       "version": "1.22.4",
       "license": "MIT",
@@ -4171,6 +4166,51 @@
         "tslib": "^2.3.1"
       }
     },
+    "node_modules/tinyglobby": {
+      "version": "0.2.13",
+      "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.13.tgz",
+      "integrity": "sha512-mEwzpUgrLySlveBwEVDMKk5B57bhLPYovRfPAXD5gA/98Opn0rCDj3GtLwFvCvH5RK9uPCExUROW5NjDwvqkxw==",
+      "dev": true,
+      "license": "MIT",
+      "dependencies": {
+        "fdir": "^6.4.4",
+        "picomatch": "^4.0.2"
+      },
+      "engines": {
+        "node": ">=12.0.0"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/SuperchupuDev"
+      }
+    },
+    "node_modules/tinyglobby/node_modules/fdir": {
+      "version": "6.4.4",
+      "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.4.tgz",
+      "integrity": "sha512-1NZP+GK4GfuAv3PqKvxQRDMjdSRZjnkq7KfhlNrCNNlZ0ygQFpebfrnfnq/W7fpUnAv9aGWmY1zKx7FYL3gwhg==",
+      "dev": true,
+      "license": "MIT",
+      "peerDependencies": {
+        "picomatch": "^3 || ^4"
+      },
+      "peerDependenciesMeta": {
+        "picomatch": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/tinyglobby/node_modules/picomatch": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz",
+      "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
     "node_modules/tslib": {
       "version": "2.6.3",
       "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.3.tgz",
@@ -4229,15 +4269,18 @@
       }
     },
     "node_modules/vite": {
-      "version": "6.2.4",
-      "resolved": "https://registry.npmjs.org/vite/-/vite-6.2.4.tgz",
-      "integrity": "sha512-veHMSew8CcRzhL5o8ONjy8gkfmFJAd5Ac16oxBUjlwgX3Gq2Wqr+qNC3TjPIpy7TPV/KporLga5GT9HqdrCizw==",
+      "version": "6.3.4",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-6.3.4.tgz",
+      "integrity": "sha512-BiReIiMS2fyFqbqNT/Qqt4CVITDU9M9vE+DKcVAsB+ZV0wvTKd+3hMbkpxz1b+NmEDMegpVbisKiAZOnvO92Sw==",
       "dev": true,
       "license": "MIT",
       "dependencies": {
         "esbuild": "^0.25.0",
+        "fdir": "^6.4.4",
+        "picomatch": "^4.0.2",
         "postcss": "^8.5.3",
-        "rollup": "^4.30.1"
+        "rollup": "^4.34.9",
+        "tinyglobby": "^0.2.13"
       },
       "bin": {
         "vite": "bin/vite.js"
@@ -4300,6 +4343,34 @@
         }
       }
     },
+    "node_modules/vite/node_modules/fdir": {
+      "version": "6.4.4",
+      "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.4.tgz",
+      "integrity": "sha512-1NZP+GK4GfuAv3PqKvxQRDMjdSRZjnkq7KfhlNrCNNlZ0ygQFpebfrnfnq/W7fpUnAv9aGWmY1zKx7FYL3gwhg==",
+      "dev": true,
+      "license": "MIT",
+      "peerDependencies": {
+        "picomatch": "^3 || ^4"
+      },
+      "peerDependenciesMeta": {
+        "picomatch": {
+          "optional": true
+        }
+      }
+    },
+    "node_modules/vite/node_modules/picomatch": {
+      "version": "4.0.2",
+      "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz",
+      "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
+      "dev": true,
+      "license": "MIT",
+      "engines": {
+        "node": ">=12"
+      },
+      "funding": {
+        "url": "https://github.com/sponsors/jonschlinkert"
+      }
+    },
     "node_modules/vite/node_modules/rollup": {
       "version": "4.36.0",
       "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.36.0.tgz",
@@ -4530,10 +4601,9 @@
       }
     },
     "@babel/runtime": {
-      "version": "7.22.15",
-      "requires": {
-        "regenerator-runtime": "^0.14.0"
-      }
+      "version": "7.27.1",
+      "resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.27.1.tgz",
+      "integrity": "sha512-1x3D2xEk2fRo3PAhwQwu5UubzgiVWSXTBfWpVd2Mx2AzRqJuDJCsgaDVZ7HB5iGzDW1Hl1sWN2mFyKjmR9uAog=="
     },
     "@babel/template": {
       "version": "7.26.9",
@@ -6848,9 +6918,6 @@
         }
       }
     },
-    "regenerator-runtime": {
-      "version": "0.14.0"
-    },
     "resolve": {
       "version": "1.22.4",
       "requires": {
@@ -6908,6 +6975,31 @@
         "tslib": "^2.3.1"
       }
     },
+    "tinyglobby": {
+      "version": "0.2.13",
+      "resolved": "https://registry.npmjs.org/tinyglobby/-/tinyglobby-0.2.13.tgz",
+      "integrity": "sha512-mEwzpUgrLySlveBwEVDMKk5B57bhLPYovRfPAXD5gA/98Opn0rCDj3GtLwFvCvH5RK9uPCExUROW5NjDwvqkxw==",
+      "dev": true,
+      "requires": {
+        "fdir": "^6.4.4",
+        "picomatch": "^4.0.2"
+      },
+      "dependencies": {
+        "fdir": {
+          "version": "6.4.4",
+          "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.4.tgz",
+          "integrity": "sha512-1NZP+GK4GfuAv3PqKvxQRDMjdSRZjnkq7KfhlNrCNNlZ0ygQFpebfrnfnq/W7fpUnAv9aGWmY1zKx7FYL3gwhg==",
+          "dev": true,
+          "requires": {}
+        },
+        "picomatch": {
+          "version": "4.0.2",
+          "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz",
+          "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
+          "dev": true
+        }
+      }
+    },
     "tslib": {
       "version": "2.6.3",
       "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.3.tgz",
@@ -6931,17 +7023,33 @@
       "requires": {}
     },
     "vite": {
-      "version": "6.2.4",
-      "resolved": "https://registry.npmjs.org/vite/-/vite-6.2.4.tgz",
-      "integrity": "sha512-veHMSew8CcRzhL5o8ONjy8gkfmFJAd5Ac16oxBUjlwgX3Gq2Wqr+qNC3TjPIpy7TPV/KporLga5GT9HqdrCizw==",
+      "version": "6.3.4",
+      "resolved": "https://registry.npmjs.org/vite/-/vite-6.3.4.tgz",
+      "integrity": "sha512-BiReIiMS2fyFqbqNT/Qqt4CVITDU9M9vE+DKcVAsB+ZV0wvTKd+3hMbkpxz1b+NmEDMegpVbisKiAZOnvO92Sw==",
       "dev": true,
       "requires": {
         "esbuild": "^0.25.0",
+        "fdir": "^6.4.4",
         "fsevents": "~2.3.3",
+        "picomatch": "^4.0.2",
         "postcss": "^8.5.3",
-        "rollup": "^4.30.1"
+        "rollup": "^4.34.9",
+        "tinyglobby": "^0.2.13"
       },
       "dependencies": {
+        "fdir": {
+          "version": "6.4.4",
+          "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.4.4.tgz",
+          "integrity": "sha512-1NZP+GK4GfuAv3PqKvxQRDMjdSRZjnkq7KfhlNrCNNlZ0ygQFpebfrnfnq/W7fpUnAv9aGWmY1zKx7FYL3gwhg==",
+          "dev": true,
+          "requires": {}
+        },
+        "picomatch": {
+          "version": "4.0.2",
+          "resolved": "https://registry.npmjs.org/picomatch/-/picomatch-4.0.2.tgz",
+          "integrity": "sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==",
+          "dev": true
+        },
         "rollup": {
           "version": "4.36.0",
           "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.36.0.tgz",
diff --git a/src/frontend/package.json b/src/frontend/package.json
index 40b6b5f7..5d8aecfd 100644
--- a/src/frontend/package.json
+++ b/src/frontend/package.json
@@ -35,6 +35,6 @@
     "prettier": "^3.0.3",
     "typescript": "^5.2.2",
     "@types/react-syntax-highlighter": "^15.5.7",
-    "vite": "^6.2.4"
+    "vite": "^6.3.4"
   }
 }
diff --git a/src/frontend/src/components/Answer/Answer.tsx b/src/frontend/src/components/Answer/Answer.tsx
index a542064c..01b8bd3f 100644
--- a/src/frontend/src/components/Answer/Answer.tsx
+++ b/src/frontend/src/components/Answer/Answer.tsx
@@ -1,4 +1,4 @@
-import { useMemo } from "react";
+import { useMemo, useState } from "react";
 import { Stack, IconButton } from "@fluentui/react";
 import DOMPurify from "dompurify";
 
@@ -29,6 +29,7 @@ export const Answer = ({
     onFollowupQuestionClicked,
     showFollowupQuestions
 }: Props) => {
+    const [isReferencesCollapsed, setIsReferencesCollapsed] = useState(true);
     const followupQuestions = answer.context.followup_questions;
     const messageContent = answer.message.content;
     const parsedAnswer = useMemo(() => parseAnswerToHtml(messageContent, isStreaming, onCitationClicked), [answer]);
@@ -60,22 +61,32 @@ export const Answer = ({
             {!!parsedAnswer.citations.length && (
                 <Stack.Item>
                     <Stack horizontal wrap tokens={{ childrenGap: 5 }}>
-                        <span className={styles.citationLearnMore}>References:</span>
+                        <Stack horizontal verticalAlign="center" tokens={{ childrenGap: 5 }}>
+                            <IconButton
+                                iconProps={{ iconName: isReferencesCollapsed ? "ChevronDown" : "ChevronUp" }}
+                                title={isReferencesCollapsed ? "Expand references" : "Collapse references"}
+                                ariaLabel={isReferencesCollapsed ? "Expand references" : "Collapse references"}
+                                onClick={() => setIsReferencesCollapsed(!isReferencesCollapsed)}
+                            />
+                            <span className={styles.citationLearnMore}>References:</span>
+                        </Stack>
+                    </Stack>
+                    {!isReferencesCollapsed && (
                         <ol>
-                        {parsedAnswer.citations.map((rowId, ind) => {
-                            const citation = answer.context.data_points[rowId];
-                            if (!citation) return null;
-                            return (
-                                <li key={rowId}>
-                                    <h4>{citation.name}</h4>
-                                    <p className={styles.referenceMetadata}>Brand: {citation.brand}</p>
-                                    <p className={styles.referenceMetadata}>Price: {citation.price}</p>
-                                    <p>{citation.description}</p>
-                                </li>
-                            );
-                        })}
+                            {parsedAnswer.citations.map((rowId, ind) => {
+                                const citation = answer.context.data_points[rowId];
+                                if (!citation) return null;
+                                return (
+                                    <li key={rowId}>
+                                        <h4>{citation.name}</h4>
+                                        <p className={styles.referenceMetadata}>Brand: {citation.brand}</p>
+                                        <p className={styles.referenceMetadata}>Price: {citation.price}</p>
+                                        <p>{citation.description}</p>
+                                    </li>
+                                );
+                            })}
                         </ol>
-                    </Stack>
+                    )}
                 </Stack.Item>
             )}
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 5bbff0f6..5fe67053 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,3 +1,4 @@
+import json
 import os
 from pathlib import Path
 from unittest import mock
@@ -13,6 +14,7 @@
     ChatCompletionMessage,
     Choice,
 )
+from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall, Function
 from openai.types.create_embedding_response import Usage
 from sqlalchemy.ext.asyncio import async_sessionmaker
 
@@ -232,6 +234,12 @@ def __init__(self, answer: str):
                     }
                 )
 
+        async def __aenter__(self):
+            return self
+
+        async def __aexit__(self, exc_type, exc_val, exc_tb):
+            return None
+
         def __aiter__(self):
             return self
 
@@ -244,9 +252,61 @@ async def __anext__(self):
     async def mock_acreate(*args, **kwargs):
         messages = kwargs["messages"]
         last_question = messages[-1]["content"]
-        if last_question == "Generate search query for: What is the capital of France?":
-            answer = "capital of France"
-        elif last_question == "Generate search query for: Are interest rates high?":
+        last_role = messages[-1]["role"]
+        if last_role == "tool":
+            items = json.loads(last_question)["items"]
+            arguments = {"query": "capital of France", "items": items, "filters": []}
+            return ChatCompletion(
+                object="chat.completion",
+                choices=[
+                    Choice(
+                        message=ChatCompletionMessage(
+                            role="assistant",
+                            tool_calls=[
+                                ChatCompletionMessageToolCall(
+                                    id="call_abc123final",
+                                    type="function",
+                                    function=Function(
+                                        name="final_result",
+                                        arguments=json.dumps(arguments),
+                                    ),
+                                )
+                            ],
+                        ),
+                        finish_reason="stop",
+                        index=0,
+                    )
+                ],
+                id="test-123final",
+                created=0,
+                model="test-model",
+            )
+        if last_question == "Find search results for user query: What is the capital of France?":
+            return ChatCompletion(
+                object="chat.completion",
+                choices=[
+                    Choice(
+                        message=ChatCompletionMessage(
+                            role="assistant",
+                            tool_calls=[
+                                ChatCompletionMessageToolCall(
+                                    id="call_abc123",
+                                    type="function",
+                                    function=Function(
+                                        name="search_database", arguments='{"search_query":"climbing gear outside"}'
+                                    ),
+                                )
+                            ],
+                        ),
+                        finish_reason="stop",
+                        index=0,
+                    )
+                ],
+                id="test-123",
+                created=0,
+                model="test-model",
+            )
+        elif last_question == "Find search results for user query: Are interest rates high?":
             answer = "interest rates"
         elif isinstance(last_question, list) and last_question[2].get("image_url"):
             answer = "From the provided sources, the impact of interest rates and GDP growth on "
diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json b/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
index d9f9762d..612be773 100644
--- a/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
+++ b/tests/snapshots/test_api_routes/test_advanced_chat_flow/advanced_chat_flow_response.json
@@ -19,56 +19,45 @@
                 "title": "Prompt to generate search arguments",
                 "description": [
                     {
-                        "role": "system",
-                        "content": "Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.\nYou have access to an Azure PostgreSQL database with an items table that has columns for title, description, brand, price, and type.\nGenerate a search query based on the conversation and the new question.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return the original user question.\nDO NOT return anything besides the query.\n"
+                        "content": "Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n  You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"
                     },
                     {
                         "role": "user",
                         "content": "good options for climbing gear that can be used outside?"
                     },
                     {
-                        "role": "assistant",
-                        "tool_calls": [
-                            {
-                                "id": "call_abc123",
-                                "type": "function",
-                                "function": {
-                                    "arguments": "{\"search_query\":\"climbing gear outside\"}",
-                                    "name": "search_database"
-                                }
-                            }
-                        ]
+                        "id": "madeup",
+                        "call_id": "call_abc123",
+                        "name": "search_database",
+                        "arguments": "{\"search_query\":\"climbing gear outside\"}",
+                        "type": "function_call"
                     },
                     {
-                        "role": "tool",
-                        "tool_call_id": "call_abc123",
-                        "content": "Search results for climbing gear that can be used outside: ..."
+                        "id": "madeupoutput",
+                        "call_id": "call_abc123",
+                        "output": "Search results for climbing gear that can be used outside: ...",
+                        "type": "function_call_output"
                     },
                     {
                         "role": "user",
                         "content": "are there any shoes less than $50?"
                     },
                     {
-                        "role": "assistant",
-                        "tool_calls": [
-                            {
-                                "id": "call_abc456",
-                                "type": "function",
-                                "function": {
-                                    "arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
-                                    "name": "search_database"
-                                }
-                            }
-                        ]
+                        "id": "madeup",
+                        "call_id": "call_abc456",
+                        "name": "search_database",
+                        "arguments": "{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}",
+                        "type": "function_call"
                     },
                     {
-                        "role": "tool",
-                        "tool_call_id": "call_abc456",
-                        "content": "Search results for shoes cheaper than 50: ..."
+                        "id": "madeupoutput",
+                        "call_id": "call_abc456",
+                        "output": "Search results for shoes cheaper than 50: ...",
+                        "type": "function_call_output"
                     },
                     {
                         "role": "user",
-                        "content": "What is the capital of France?"
+                        "content": "Find search results for user query: What is the capital of France?"
                     }
                 ],
                 "props": {
@@ -78,7 +67,7 @@
             },
             {
                 "title": "Search using generated search arguments",
-                "description": "The capital of France is Paris. [Benefit_Options-2.pdf].",
+                "description": "climbing gear outside",
                 "props": {
                     "top": 1,
                     "vector_search": true,
@@ -104,12 +93,11 @@
                 "title": "Prompt to generate answer",
                 "description": [
                     {
-                        "role": "system",
                         "content": "Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."
                     },
                     {
-                        "role": "user",
-                        "content": "What is the capital of France?\n\nSources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear\n\n"
+                        "content": "What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear",
+                        "role": "user"
                     }
                 ],
                 "props": {
diff --git a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
index 9f5aaa63..d29b85c4 100644
--- a/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
+++ b/tests/snapshots/test_api_routes/test_advanced_chat_streaming_flow/advanced_chat_streaming_flow_response.jsonlines
@@ -1,2 +1,2 @@
-{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":[{"role":"system","content":"Below is a history of the conversation so far, and a new question asked by the user that needs to be answered by searching database rows.\nYou have access to an Azure PostgreSQL database with an items table that has columns for title, description, brand, price, and type.\nGenerate a search query based on the conversation and the new question.\nIf the question is not in English, translate the question to English before generating the search query.\nIf you cannot generate a search query, return the original user question.\nDO NOT return anything besides the query.\n"},{"role":"user","content":"good options for climbing gear that can be used outside?"},{"role":"assistant","tool_calls":[{"id":"call_abc123","type":"function","function":{"arguments":"{\"search_query\":\"climbing gear outside\"}","name":"search_database"}}]},{"role":"tool","tool_call_id":"call_abc123","content":"Search results for climbing gear that can be used outside: ..."},{"role":"user","content":"are there any shoes less than $50?"},{"role":"assistant","tool_calls":[{"id":"call_abc456","type":"function","function":{"arguments":"{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}","name":"search_database"}}]},{"role":"tool","tool_call_id":"call_abc456","content":"Search results for shoes cheaper than 50: ..."},{"role":"user","content":"What is the capital of France?"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}},{"title":"Search using generated search arguments","description":"The capital of France is Paris. [Benefit_Options-2.pdf].","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"role":"system","content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"role":"user","content":"What is the capital of France?\n\nSources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear\n\n"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}}],"followup_questions":null},"sessionState":null}
+{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Prompt to generate search arguments","description":[{"content":"Your job is to find search results based off the user's question and past messages.\nYou have access to only these tools:\n1. **search_database**: This tool allows you to search a table for items based on a query.\n  You can pass in a search query and optional filters.\nOnce you get the search results, you're done.\n"},{"role":"user","content":"good options for climbing gear that can be used outside?"},{"id":"madeup","call_id":"call_abc123","name":"search_database","arguments":"{\"search_query\":\"climbing gear outside\"}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc123","output":"Search results for climbing gear that can be used outside: ...","type":"function_call_output"},{"role":"user","content":"are there any shoes less than $50?"},{"id":"madeup","call_id":"call_abc456","name":"search_database","arguments":"{\"search_query\":\"shoes\",\"price_filter\":{\"comparison_operator\":\"<\",\"value\":50}}","type":"function_call"},{"id":"madeupoutput","call_id":"call_abc456","output":"Search results for shoes cheaper than 50: ...","type":"function_call_output"},{"role":"user","content":"Find search results for user query: What is the capital of France?"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}},{"title":"Search using generated search arguments","description":"climbing gear outside","props":{"top":1,"vector_search":true,"text_search":true,"filters":[]}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}}],"followup_questions":null},"sessionState":null}
 {"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null}
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json b/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
index ca9bc1bb..e311917b 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
+++ b/tests/snapshots/test_api_routes/test_simple_chat_flow/simple_chat_flow_response.json
@@ -42,12 +42,11 @@
                 "title": "Prompt to generate answer",
                 "description": [
                     {
-                        "role": "system",
                         "content": "Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."
                     },
                     {
-                        "role": "user",
-                        "content": "What is the capital of France?\n\nSources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear\n\n"
+                        "content": "What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear",
+                        "role": "user"
                     }
                 ],
                 "props": {
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json b/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json
new file mode 100644
index 00000000..d0456cd7
--- /dev/null
+++ b/tests/snapshots/test_api_routes/test_simple_chat_flow_message_history/simple_chat_flow_message_history_response.json
@@ -0,0 +1,69 @@
+{
+    "message": {
+        "content": "The capital of France is Paris. [Benefit_Options-2.pdf].",
+        "role": "assistant"
+    },
+    "context": {
+        "data_points": {
+            "1": {
+                "id": 1,
+                "type": "Footwear",
+                "brand": "Daybird",
+                "name": "Wanderer Black Hiking Boots",
+                "description": "Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.",
+                "price": 109.99
+            }
+        },
+        "thoughts": [
+            {
+                "title": "Search query for database",
+                "description": "What is the capital of France?",
+                "props": {
+                    "top": 1,
+                    "vector_search": true,
+                    "text_search": true
+                }
+            },
+            {
+                "title": "Search results",
+                "description": [
+                    {
+                        "id": 1,
+                        "type": "Footwear",
+                        "brand": "Daybird",
+                        "name": "Wanderer Black Hiking Boots",
+                        "description": "Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.",
+                        "price": 109.99
+                    }
+                ],
+                "props": {}
+            },
+            {
+                "title": "Prompt to generate answer",
+                "description": [
+                    {
+                        "content": "Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."
+                    },
+                    {
+                        "content": "What is the capital of France?",
+                        "role": "user"
+                    },
+                    {
+                        "content": "The capital of France is Paris.",
+                        "role": "assistant"
+                    },
+                    {
+                        "content": "What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear",
+                        "role": "user"
+                    }
+                ],
+                "props": {
+                    "model": "gpt-4o-mini",
+                    "deployment": "gpt-4o-mini"
+                }
+            }
+        ],
+        "followup_questions": null
+    },
+    "sessionState": null
+}
\ No newline at end of file
diff --git a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
index e79e5461..65d3ae5b 100644
--- a/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
+++ b/tests/snapshots/test_api_routes/test_simple_chat_streaming_flow/simple_chat_streaming_flow_response.jsonlines
@@ -1,2 +1,2 @@
-{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Search query for database","description":"What is the capital of France?","props":{"top":1,"vector_search":true,"text_search":true}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"role":"system","content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"role":"user","content":"What is the capital of France?\n\nSources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear\n\n"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}}],"followup_questions":null},"sessionState":null}
+{"delta":null,"context":{"data_points":{"1":{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}},"thoughts":[{"title":"Search query for database","description":"What is the capital of France?","props":{"top":1,"vector_search":true,"text_search":true}},{"title":"Search results","description":[{"id":1,"type":"Footwear","brand":"Daybird","name":"Wanderer Black Hiking Boots","description":"Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long.","price":109.99}],"props":{}},{"title":"Prompt to generate answer","description":[{"content":"Assistant helps customers with questions about products.\nRespond as if you are a salesperson helping a customer in a store. Do NOT respond with tables.\nAnswer ONLY with the product details listed in the products.\nIf there isn't enough information below, say you don't know.\nDo not generate answers that don't use the sources below.\nEach product has an ID in brackets followed by colon and the product details.\nAlways include the product ID for each product you use in the response.\nUse square brackets to reference the source, for example [52].\nDon't combine citations, list each product separately, for example [27][51]."},{"content":"What is the capital of France?Sources:\n[1]:Name:Wanderer Black Hiking Boots Description:Daybird's Wanderer Hiking Boots in sleek black are perfect for all your outdoor adventures. These boots are made with a waterproof leather upper and a durable rubber sole for superior traction. With their cushioned insole and padded collar, these boots will keep you comfortable all day long. Price:109.99 Brand:Daybird Type:Footwear","role":"user"}],"props":{"model":"gpt-4o-mini","deployment":"gpt-4o-mini"}}],"followup_questions":null},"sessionState":null}
 {"delta":{"content":"The capital of France is Paris. [Benefit_Options-2.pdf].","role":"assistant"},"context":null,"sessionState":null}
diff --git a/tests/test_api_routes.py b/tests/test_api_routes.py
index c36a1617..55da4d6f 100644
--- a/tests/test_api_routes.py
+++ b/tests/test_api_routes.py
@@ -125,6 +125,29 @@ async def test_simple_chat_flow(test_client, snapshot):
     snapshot.assert_match(json.dumps(response_data, indent=4), "simple_chat_flow_response.json")
 
 
+@pytest.mark.asyncio
+async def test_simple_chat_flow_message_history(test_client, snapshot):
+    """test the simple chat flow route with hybrid retrieval mode"""
+    response = test_client.post(
+        "/chat",
+        json={
+            "context": {
+                "overrides": {"top": 1, "use_advanced_flow": False, "retrieval_mode": "hybrid", "temperature": 0.3}
+            },
+            "messages": [
+                {"content": "What is the capital of France?", "role": "user"},
+                {"content": "The capital of France is Paris.", "role": "assistant"},
+                {"content": "What is the capital of France?", "role": "user"},
+            ],
+        },
+    )
+    response_data = response.json()
+
+    assert response.status_code == 200
+    assert response.headers["Content-Type"] == "application/json"
+    snapshot.assert_match(json.dumps(response_data, indent=4), "simple_chat_flow_message_history_response.json")
+
+
 @pytest.mark.asyncio
 async def test_simple_chat_streaming_flow(test_client, snapshot):
     """test the simple chat streaming flow route with hybrid retrieval mode"""
diff --git a/tests/test_openai_clients.py b/tests/test_openai_clients.py
index ecac8759..47caba26 100644
--- a/tests/test_openai_clients.py
+++ b/tests/test_openai_clients.py
@@ -1,5 +1,6 @@
 import pytest
 
+from fastapi_app.dependencies import common_parameters
 from fastapi_app.openai_clients import create_openai_chat_client, create_openai_embed_client
 from tests.data import test_data
 
@@ -22,3 +23,44 @@ async def test_create_openai_chat_client(mock_azure_credential, mock_openai_chat
         model="gpt-4o-mini", messages=[{"content": "test", "role": "user"}]
     )
     assert response.choices[0].message.content == "The capital of France is Paris. [Benefit_Options-2.pdf]."
+
+
+@pytest.mark.asyncio
+async def test_github_models_configuration(monkeypatch):
+    """Test that GitHub Models uses the correct URLs and model names."""
+    # Set up environment for GitHub Models
+    monkeypatch.setenv("OPENAI_CHAT_HOST", "github")
+    monkeypatch.setenv("OPENAI_EMBED_HOST", "github")
+    monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
+    # Don't set GITHUB_MODEL to test defaults
+
+    # Test chat client configuration
+    chat_client = await create_openai_chat_client(None)
+    assert str(chat_client.base_url).rstrip("/") == "https://models.github.ai/inference"
+    assert chat_client.api_key == "fake-token"
+
+    # Test embed client configuration
+    embed_client = await create_openai_embed_client(None)
+    assert str(embed_client.base_url).rstrip("/") == "https://models.github.ai/inference"
+    assert embed_client.api_key == "fake-token"
+
+    # Test that dependencies use correct defaults
+    context = await common_parameters()
+    assert context.openai_chat_model == "openai/gpt-4o"
+    assert context.openai_embed_model == "openai/text-embedding-3-large"
+
+
+@pytest.mark.asyncio
+async def test_github_models_with_custom_values(monkeypatch):
+    """Test that GitHub Models respects custom environment values."""
+    # Set up environment for GitHub Models with custom values
+    monkeypatch.setenv("OPENAI_CHAT_HOST", "github")
+    monkeypatch.setenv("OPENAI_EMBED_HOST", "github")
+    monkeypatch.setenv("GITHUB_TOKEN", "fake-token")
+    monkeypatch.setenv("GITHUB_MODEL", "openai/gpt-4")
+    monkeypatch.setenv("GITHUB_EMBED_MODEL", "openai/text-embedding-ada-002")
+
+    # Test that dependencies use custom values
+    context = await common_parameters()
+    assert context.openai_chat_model == "openai/gpt-4"
+    assert context.openai_embed_model == "openai/text-embedding-ada-002"
diff --git a/tests/test_postgres_searcher.py b/tests/test_postgres_searcher.py
index ee2992e0..fff7fdfd 100644
--- a/tests/test_postgres_searcher.py
+++ b/tests/test_postgres_searcher.py
@@ -1,6 +1,6 @@
 import pytest
 
-from fastapi_app.api_models import ItemPublic
+from fastapi_app.api_models import Filter, ItemPublic
 from tests.data import test_data
 
 
@@ -10,9 +10,24 @@ def test_postgres_build_filter_clause_without_filters(postgres_searcher):
 
 
 def test_postgres_build_filter_clause_with_filters(postgres_searcher):
-    assert postgres_searcher.build_filter_clause([{"column": "id", "comparison_operator": "=", "value": 1}]) == (
-        "WHERE id = 1",
-        "AND id = 1",
+    assert postgres_searcher.build_filter_clause(
+        [
+            Filter(column="brand", comparison_operator="=", value="AirStrider"),
+        ]
+    ) == (
+        "WHERE brand = 'AirStrider'",
+        "AND brand = 'AirStrider'",
+    )
+
+
+def test_postgres_build_filter_clause_with_filters_numeric(postgres_searcher):
+    assert postgres_searcher.build_filter_clause(
+        [
+            Filter(column="price", comparison_operator="<", value=30),
+        ]
+    ) == (
+        "WHERE price < 30",
+        "AND price < 30",
     )