From 0ab31c64175400076e1b5b2135329afbe74b893c Mon Sep 17 00:00:00 2001 From: Leah Bar-On Simmons Date: Wed, 5 Jun 2024 16:52:37 -0700 Subject: [PATCH] initial updates to aad auth - not tested --- src/copilot_flow/copilot.py | 90 +++++++++++++++++-------------------- src/deployment/deploy.py | 9 ++-- src/deployment/invoke.py | 8 ++-- src/evaluation/evaluate.py | 26 +++++------ src/helper_functions.py | 2 +- src/indexing/build_index.py | 51 +++++++++++---------- src/sample.env | 2 - 7 files changed, 86 insertions(+), 102 deletions(-) diff --git a/src/copilot_flow/copilot.py b/src/copilot_flow/copilot.py index f9a7a79..e2ee62a 100644 --- a/src/copilot_flow/copilot.py +++ b/src/copilot_flow/copilot.py @@ -1,36 +1,63 @@ -# --------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# --------------------------------------------------------- import os -# set environment variables before importing any other code from dotenv import load_dotenv load_dotenv() from pathlib import Path - from typing import TypedDict -from openai import AzureOpenAI - -from azure.core.credentials import AzureKeyCredential +from azure.identity import DefaultAzureCredential, get_bearer_token_provider from azure.search.documents import SearchClient from azure.search.documents.models import VectorizedQuery -from promptflow.tracing import trace +from openai import AzureOpenAI + from promptflow.core import Prompty, AzureOpenAIModelConfiguration +from promptflow.tracing import trace class ChatResponse(TypedDict): context: dict reply: str @trace -def get_chat_response(chat_input: str, chat_history: list = []) -> ChatResponse: +def get_documents(search_query: str, num_docs=3): + + index_name = os.getenv("AZUREAI_SEARCH_INDEX_NAME") + + # retrieve documents relevant to the user's question from Cognitive Search + search_client = SearchClient( + endpoint=os.getenv("AZURE_SEARCH_ENDPOINT"), + credential=DefaultAzureCredential(), + index_name=index_name) + + aoai_client = AzureOpenAI( + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), + azure_ad_token_provider=get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"), + api_version=os.getenv("AZURE_OPENAI_API_VERSION") + ) + + # generate a vector embedding of the user's question + embedding = aoai_client.embeddings.create(input=search_query, + model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")) + embedding_to_query = embedding.data[0].embedding + + context = "" + # use the vector embedding to do a vector search on the index + vector_query = VectorizedQuery(vector=embedding_to_query, k_nearest_neighbors=num_docs, fields="contentVector") + results = trace(search_client.search)( + search_text="", + vector_queries=[vector_query], + select=["id", "content"]) + + for result in results: + context += f"\n>>> From: {result['id']}\n{result['content']}" + return context + +def get_chat_response(chat_input: str, chat_history: list = []) -> ChatResponse: model_config = AzureOpenAIModelConfiguration( - azure_deployment=os.environ["AZURE_OPENAI_CHAT_DEPLOYMENT"], - api_version=os.environ["AZURE_OPENAI_API_VERSION"], - azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], - api_key=os.environ["AZURE_OPENAI_API_KEY"] + azure_deployment=os.getenv("AZURE_OPENAI_CHAT_DEPLOYMENT"), + api_version=os.getenv("AZURE_OPENAI_API_VERSION"), + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") ) searchQuery = chat_input @@ -67,38 +94,3 @@ def get_chat_response(chat_input: str, chat_history: list = []) -> ChatResponse: ) return dict(reply=result, context=documents) - -@trace -def get_documents(search_query: str, num_docs=3): - - index_name = os.environ["AZUREAI_SEARCH_INDEX_NAME"] - - # retrieve documents relevant to the user's query from Azure AI Search index - search_client = SearchClient( - endpoint=os.environ["AZURE_SEARCH_ENDPOINT"], - credential=AzureKeyCredential(os.environ["AZURE_SEARCH_KEY"]), - index_name=index_name) - - aoai_client = AzureOpenAI( - azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], - api_key=os.environ["AZURE_OPENAI_API_KEY"], - api_version=os.environ["AZURE_OPENAI_API_VERSION"] - ) - - # generate a vector embedding of the user's question - embedding = aoai_client.embeddings.create(input=search_query, - model=os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]) - embedding_to_query = embedding.data[0].embedding - - context = "" - # use the vector embedding to do a vector search on the index - vector_query = VectorizedQuery(vector=embedding_to_query, k_nearest_neighbors=num_docs, fields="contentVector") - results = trace(search_client.search)( - search_text="", - vector_queries=[vector_query], - select=["id", "content"]) - - for result in results: - context += f"\n>>> From: {result['id']}\n{result['content']}" - - return context diff --git a/src/deployment/deploy.py b/src/deployment/deploy.py index ca177b4..6529141 100644 --- a/src/deployment/deploy.py +++ b/src/deployment/deploy.py @@ -19,11 +19,12 @@ def deploy_flow(endpoint_name, deployment_name): endpoint = ManagedOnlineEndpoint( name=endpoint_name, properties={ - "enforce_access_to_default_secret_stores": "enabled" # if you want secret injection support - } + "enforce_access_to_default_secret_stores": "enabled" # if you want secret injection support + }, + auth_mode="aad_token" # using aad auth instead of key-based auth ) - deployment = ManagedOnlineDeployment( # defaults to key auth_mode + deployment = ManagedOnlineDeployment( name=deployment_name, endpoint_name=endpoint_name, model=Model( @@ -65,9 +66,7 @@ def deploy_flow(endpoint_name, deployment_name): # the following is enabled by secret injection # make sure your environment variables here match the environment variables your code depends on 'AZURE_OPENAI_ENDPOINT': os.getenv('AZURE_OPENAI_ENDPOINT'), - 'AZURE_OPENAI_API_KEY': os.getenv('AZURE_OPENAI_API_KEY'), 'AZURE_SEARCH_ENDPOINT': os.getenv('AZURE_SEARCH_ENDPOINT'), - 'AZURE_SEARCH_KEY': os.getenv('AZURE_SEARCH_KEY'), 'AZURE_OPENAI_API_VERSION': os.getenv('AZURE_OPENAI_API_VERSION'), 'AZURE_OPENAI_CHAT_DEPLOYMENT': os.getenv('AZURE_OPENAI_CHAT_DEPLOYMENT'), 'AZURE_OPENAI_EVALUATION_DEPLOYMENT': os.getenv('AZURE_OPENAI_EVALUATION_DEPLOYMENT'), diff --git a/src/deployment/invoke.py b/src/deployment/invoke.py index 1edd08b..fe4c67f 100644 --- a/src/deployment/invoke.py +++ b/src/deployment/invoke.py @@ -1,21 +1,19 @@ +import requests from helper_functions import get_client def invoke_deployment(endpoint_name: str, query: str, stream: bool = False): client = get_client() - import requests - if stream: accept_header = "text/event-stream" else: accept_header = "application/json" scoring_url = client.online_endpoints.get(endpoint_name).scoring_uri - primary_key = client.online_endpoints.get_keys(endpoint_name).primary_key headers = { "Content-Type": "application/json", - "Authorization": f"Bearer {primary_key}", + "Authorization": f"Bearer {client._credential.get_token('https://ml.azure.com/').token}", "Accept": accept_header } @@ -27,7 +25,7 @@ def invoke_deployment(endpoint_name: str, query: str, stream: bool = False): "stream": stream, }, ) - + if stream: for item in response.iter_lines(chunk_size=None): print(item) diff --git a/src/evaluation/evaluate.py b/src/evaluation/evaluate.py index a2d03d1..de05483 100644 --- a/src/evaluation/evaluate.py +++ b/src/evaluation/evaluate.py @@ -1,12 +1,10 @@ import json import pathlib - -# set environment variables before importing any other code +import os from dotenv import load_dotenv load_dotenv() -import os import pandas as pd from pprint import pprint @@ -31,12 +29,12 @@ def copilot_wrapper(*, chat_input, **kwargs): } return parsedResult -def run_evaluation(name, dataset_path): +def run_evaluation(eval_name, dataset_path): model_config = AzureOpenAIModelConfiguration( - azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"), - api_key=os.environ.get("AZURE_OPENAI_API_KEY"), - azure_deployment=os.environ.get("AZURE_OPENAI_EVALUATION_DEPLOYMENT"), + azure_deployment=os.getenv("AZURE_OPENAI_EVALUATION_DEPLOYMENT"), + api_version=os.getenv("AZURE_OPENAI_API_VERSION"), + azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT") ) # Initializing Evaluators @@ -49,7 +47,7 @@ def run_evaluation(name, dataset_path): result = evaluate( target=copilot_wrapper, - evaluation_name=name, + evaluation_name=eval_name, data=data_path, evaluators={ "relevance": relevance_eval, @@ -62,15 +60,15 @@ def run_evaluation(name, dataset_path): }, # to log evaluation to the cloud AI Studio project azure_ai_project = { - "subscription_id": os.environ["AZURE_SUBSCRIPTION_ID"], - "resource_group_name": os.environ["AZURE_RESOURCE_GROUP"], - "project_name": os.environ["AZUREAI_PROJECT_NAME"] + "subscription_id": os.getenv("AZURE_SUBSCRIPTION_ID"), + "resource_group_name": os.getenv("AZURE_RESOURCE_GROUP"), + "project_name": os.getenv("AZUREAI_PROJECT_NAME") } ) - - tabular_result = pd.DataFrame(result.get("rows")) - tabular_result.to_json(output_path, orient="records", lines=True) + tabular_result = pd.DataFrame(result.get("rows")) + tabular_result.to_json(output_path, orient="records", lines=True) + return result, tabular_result if __name__ == "__main__": diff --git a/src/helper_functions.py b/src/helper_functions.py index fa2074e..00975e5 100644 --- a/src/helper_functions.py +++ b/src/helper_functions.py @@ -7,7 +7,7 @@ def get_client() -> MLClient: # check if env variables are set and initialize client from those - client = MLClient(DefaultAzureCredential(), os.environ["AZURE_SUBSCRIPTION_ID"], os.environ["AZURE_RESOURCE_GROUP"], os.environ["AZUREAI_PROJECT_NAME"]) + client = MLClient(DefaultAzureCredential(), os.getenv("AZURE_SUBSCRIPTION_ID"), os.getenv("AZURE_RESOURCE_GROUP"), os.getenv("AZUREAI_PROJECT_NAME")) if client: return client diff --git a/src/indexing/build_index.py b/src/indexing/build_index.py index d5c95ef..9204c71 100644 --- a/src/indexing/build_index.py +++ b/src/indexing/build_index.py @@ -1,12 +1,11 @@ import os +from dotenv import load_dotenv +load_dotenv() -from promptflow.rag.config import LocalSource, AzureAISearchConfig, EmbeddingsModelConfig, ConnectionConfig -from promptflow.rag import build_index from azure.ai.ml.entities import Index -# set environment variables before importing any other code -from dotenv import load_dotenv -load_dotenv() +from promptflow.rag.config import LocalSource, AzureAISearchConfig, EmbeddingsModelConfig, ConnectionConfig +from promptflow.rag import build_index from helper_functions import get_client @@ -16,32 +15,32 @@ def build_aisearch_index(index_name, path_to_data): client = get_client() # Use the same index name when registering the index in AI Studio + index_name = "tutorial-index-2" # your desired index name index_path = build_index( - name=index_name, # name of your index - vector_store="azure_ai_search", # the type of vector store - in this case it is Azure AI Search. Users can also use "azure_cognitive search" - embeddings_model_config=EmbeddingsModelConfig( - model_name=os.environ['AZURE_OPENAI_EMBEDDING_DEPLOYMENT'], - deployment_name=os.environ['AZURE_OPENAI_EMBEDDING_DEPLOYMENT'], - connection_config=ConnectionConfig( + name=index_name, # name of your index + vector_store="azure_ai_search", # the type of vector store - in this case it is Azure AI Search. Users can also use "azure_cognitive search" + embeddings_model_config=EmbeddingsModelConfig( + model_name=os.getenv('AZURE_OPENAI_EMBEDDING_DEPLOYMENT'), + deployment_name=os.getenv('AZURE_OPENAI_EMBEDDING_DEPLOYMENT'), + connection_config=ConnectionConfig( + subscription_id=client.subscription_id, + resource_group_name=client.resource_group_name, + workspace_name=client.workspace_name, + connection_name=os.getenv('AZURE_OPENAI_CONNECTION_NAME') + ) + ), + input_source=LocalSource(input_data="./data/product-info/"), # the location of your files + index_config=AzureAISearchConfig( + ai_search_index_name=index_name, # the name of the index store inside the azure ai search service + ai_search_connection_config=ConnectionConfig( subscription_id=client.subscription_id, resource_group_name=client.resource_group_name, workspace_name=client.workspace_name, - connection_name=os.environ['AZURE_OPENAI_CONNECTION_NAME'] + connection_name=os.getenv('AZURE_SEARCH_CONNECTION_NAME') ) - ), - input_source=LocalSource(input_data=path_to_data), # the location of your file/folders - index_config=AzureAISearchConfig( - ai_search_index_name=index_name, # the name of the index store inside the azure ai search service - ai_search_connection_config=ConnectionConfig( - subscription_id=client.subscription_id, - resource_group_name=client.resource_group_name, - workspace_name=client.workspace_name, - connection_name=os.environ['AZURE_SEARCH_CONNECTION_NAME'] - ) - ), - embeddings_cache_path="./indexing", - tokens_per_chunk = 800, # Optional field - Maximum number of tokens per chunk - token_overlap_across_chunks = 0, # Optional field - Number of tokens to overlap between chunks + ), + tokens_per_chunk = 800, # Optional field - Maximum number of tokens per chunk + token_overlap_across_chunks = 0, # Optional field - Number of tokens to overlap between chunks ) print(f"Local Path: {index_path}") diff --git a/src/sample.env b/src/sample.env index 83d63f9..aa4dd30 100644 --- a/src/sample.env +++ b/src/sample.env @@ -3,10 +3,8 @@ AZURE_RESOURCE_GROUP= AZUREAI_HUB_NAME= AZUREAI_PROJECT_NAME= AZURE_OPENAI_ENDPOINT= -AZURE_OPENAI_API_KEY= AZURE_OPENAI_CONNECTION_NAME= AZURE_SEARCH_ENDPOINT= -AZURE_SEARCH_KEY= AZURE_SEARCH_CONNECTION_NAME= AZURE_OPENAI_API_VERSION= AZURE_OPENAI_CHAT_DEPLOYMENT=