Add use_or parameter to build_filter_clause method + Fix run method in AdvancedRAGChat

chatrtham · chatrtham · commit 7a91d2ab7d38 · 2024-06-20T10:35:09.000Z
diff --git a/src/fastapi_app/postgres_searcher.py b/src/fastapi_app/postgres_searcher.py
@@ -22,15 +22,15 @@ def __init__(
         self.embed_deployment = embed_deployment
         self.embed_dimensions = embed_dimensions
 
-    def build_filter_clause(self, filters) -> tuple[str, str]:
+    def build_filter_clause(self, filters, use_or=False) -> tuple[str, str]:
         if filters is None:
             return "", ""
         filter_clauses = []
         for filter in filters:
             if isinstance(filter["value"], str):
                 filter["value"] = f"'{filter['value']}'"
             filter_clauses.append(f"{filter['column']} {filter['comparison_operator']} {filter['value']}")
-        filter_clause = " AND ".join(filter_clauses)
+        filter_clause = f" {'OR' if use_or else 'AND'} ".join(filter_clauses)
         if len(filter_clause) > 0:
             return f"WHERE {filter_clause}", f"AND {filter_clause}"
         return "", ""
@@ -290,11 +290,11 @@ async def simple_sql_search(
         """
         Search items by simple SQL query with filters.
         """
-        filter_clause_where, _ = self.build_filter_clause(filters)
+        filter_clause_where, _ = self.build_filter_clause(filters, use_or=True)
         sql = f"""
         SELECT id FROM packages
         {filter_clause_where}
-        LIMIT 1
+        LIMIT 2
         """
         
         async with self.async_session_maker() as session:
diff --git a/src/fastapi_app/rag_advanced.py b/src/fastapi_app/rag_advanced.py
@@ -15,9 +15,9 @@
 from .postgres_searcher import PostgresSearcher
 from .query_rewriter import (
     build_hybrid_search_function,
-    extract_search_arguments,
     build_specify_package_function,
-    handle_specify_package_function_call
+    extract_search_arguments,
+    handle_specify_package_function_call,
 )
 
 
@@ -40,6 +40,58 @@ def __init__(
         self.query_prompt_template = open(current_dir / "prompts/query.txt").read()
         self.answer_prompt_template = open(current_dir / "prompts/answer.txt").read()
 
+    async def hybrid_search(self, messages, top, vector_search, text_search):
+        # Generate an optimized keyword search query based on the chat history and the last question
+        query_messages = copy.deepcopy(messages)
+        query_messages.insert(0, {"role": "system", "content": self.query_prompt_template})
+        query_response_token_limit = 500
+
+        query_chat_completion: ChatCompletion = await self.openai_chat_client.chat.completions.create(
+            messages=query_messages,
+            model=self.chat_deployment if self.chat_deployment else self.chat_model,
+            temperature=0.0,
+            max_tokens=query_response_token_limit,
+            n=1,
+            tools=build_hybrid_search_function(),
+            tool_choice="auto",
+        )
+
+        query_text, filters = extract_search_arguments(query_chat_completion)
+
+        # Retrieve relevant items from the database with the GPT optimized query
+        results = await self.searcher.search_and_embed(
+            query_text,
+            top=top,
+            enable_vector_search=vector_search,
+            enable_text_search=text_search,
+            filters=filters,
+        )
+
+        sources_content = [f"[{(item.id)}]:{item.to_str_for_broad_rag()}\n\n" for item in results]
+
+        thought_steps = [
+            ThoughtStep(
+                title="Prompt to generate search arguments",
+                description=[str(message) for message in query_messages],
+                props={"model": self.chat_model, "deployment": self.chat_deployment} if self.chat_deployment else {"model": self.chat_model}
+            ),
+            ThoughtStep(
+                title="Generated search arguments",
+                description=query_text,
+                props={"filters": filters}
+            ),
+            ThoughtStep(
+                title="Hybrid Search results",
+                description=[result.to_dict() for result in results],
+                props={
+                    "top": top,
+                    "vector_search": vector_search,
+                    "text_search": text_search
+                }
+            )
+        ]
+        return sources_content, thought_steps
+
     async def run(
         self, messages: list[dict], overrides: dict[str, Any] = {}
     ) -> dict[str, Any] | AsyncGenerator[dict[str, Any], None]:
@@ -69,78 +121,34 @@ async def run(
 
         specify_package_filters = handle_specify_package_function_call(specify_package_chat_completion)
 
-        if specify_package_filters:
-            # Pass specify_package_filters to simple SQL search function
+        if specify_package_filters:  # Simple SQL search
             results = await self.searcher.simple_sql_search(filters=specify_package_filters)
-            sources_content = [f"[{(item.id)}]:{item.to_str_for_narrow_rag()}\n\n" for item in results]
-
-            thought_steps = [
-                ThoughtStep(
-                    title="Prompt to specify package",
-                    description=[str(message) for message in specify_package_messages],
-                    props={"model": self.chat_model, "deployment": self.chat_deployment} if self.chat_deployment else {"model": self.chat_model}
-                ),
-                ThoughtStep(
-                    title="Specified package filters",
-                    description=specify_package_filters,
-                    props={}
-                ),
-                ThoughtStep(
-                    title="SQL search results",
-                    description=[result.to_dict() for result in results],
-                    props={}
-                )
-            ]
-        else:
-            # Generate an optimized keyword search query based on the chat history and the last question
-            query_messages = copy.deepcopy(messages)
-            query_messages.insert(0, {"role": "system", "content": self.query_prompt_template})
-            query_response_token_limit = 500
-
-            query_chat_completion: ChatCompletion = await self.openai_chat_client.chat.completions.create(
-                messages=query_messages,
-                model=self.chat_deployment if self.chat_deployment else self.chat_model,
-                temperature=0.0,
-                max_tokens=query_response_token_limit,
-                n=1,
-                tools=build_hybrid_search_function(),
-                tool_choice="auto",
-            )
-
-            query_text, filters = extract_search_arguments(query_chat_completion)
-
-            # Retrieve relevant items from the database with the GPT optimized query
-            results = await self.searcher.search_and_embed(
-                query_text,
-                top=top,
-                enable_vector_search=vector_search,
-                enable_text_search=text_search,
-                filters=filters,
-            )
 
-            sources_content = [f"[{(item.id)}]:{item.to_str_for_broad_rag()}\n\n" for item in results]
-
-            thought_steps = [
-                ThoughtStep(
-                    title="Prompt to generate search arguments",
-                    description=[str(message) for message in query_messages],
-                    props={"model": self.chat_model, "deployment": self.chat_deployment} if self.chat_deployment else {"model": self.chat_model}
-                ),
-                ThoughtStep(
-                    title="Generated search arguments",
-                    description=query_text,
-                    props={"filters": filters}
-                ),
-                ThoughtStep(
-                    title="Hybrid Search results",
-                    description=[result.to_dict() for result in results],
-                    props={
-                        "top": top,
-                        "vector_search": vector_search,
-                        "text_search": text_search
-                    }
-                )
-            ]
+            if results:
+                sources_content = [f"[{(item.id)}]:{item.to_str_for_narrow_rag()}\n\n" for item in results]
+
+                thought_steps = [
+                    ThoughtStep(
+                        title="Prompt to specify package",
+                        description=[str(message) for message in specify_package_messages],
+                        props={"model": self.chat_model, "deployment": self.chat_deployment} if self.chat_deployment else {"model": self.chat_model}
+                    ),
+                    ThoughtStep(
+                        title="Specified package filters",
+                        description=specify_package_filters,
+                        props={}
+                    ),
+                    ThoughtStep(
+                        title="SQL search results",
+                        description=[result.to_dict() for result in results],
+                        props={}
+                    )
+                ]
+            else:
+                # No results found with SQL search, fall back to the hybrid search
+                sources_content, thought_steps = await self.hybrid_search(messages, top, vector_search, text_search)
+        else:  # Hybrid search
+            sources_content, thought_steps = await self.hybrid_search(messages, top, vector_search, text_search)
 
         content = "\n".join(sources_content)