From 4be8f2aa60d3c0cec2741bfec88d42fad6af1542 Mon Sep 17 00:00:00 2001 From: Johan Stenberg Date: Mon, 2 Oct 2023 21:43:24 -0700 Subject: [PATCH 01/10] Initial prototype v1 openai support for azure --- src/openai/azure.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 src/openai/azure.py diff --git a/src/openai/azure.py b/src/openai/azure.py new file mode 100644 index 0000000000..966c9c06ae --- /dev/null +++ b/src/openai/azure.py @@ -0,0 +1,38 @@ +import openai + +class TokenCredential: + + def __init__(self): + import azure.identity + self._credential = azure.identity.DefaultAzureCredential() + + def get_token(self): + return self._credential.get_token('https://cognitiveservices.azure.com/.default').token + + +class AzureClient(openai.Client): + + def __init__(self, *args, deployment: str, credential: TokenCredential | None, api_version: str = '2023-03-15-preview', **kwargs): + default_query = kwargs.get('default_query', {}) + default_query.setdefault('api-version', api_version) + kwargs['default_query'] = default_query + self.credential = credential + if credential: + kwargs['api_key'] = 'Placeholder: AAD' + super().__init__(*args, **kwargs) + self.deployment = deployment + + @property + def auth_headers(self) -> dict[str, str]: + if self.credential: + return { 'Authorization': f'Bearer {self.credential.get_token()}'} + return {"api-key": self.api_key} + + + def request(self, *args, **kwargs): + if self.deployment: + args = list(args) + options = args[1] if len(args) >= 2 else kwargs.get('options') + options.url = f'openai/deployments/{self.deployment}' + options.url + return super().request(*args, **kwargs) + From ec76eed7aef29f458410de30e3123b08c4e0f176 Mon Sep 17 00:00:00 2001 From: Krista Pratico Date: Wed, 4 Oct 2023 11:22:29 -0700 Subject: [PATCH 02/10] make AzureClient pass tests (except audio) (#8) * make AzureClient pass tests (except audio) * async client edits --- src/openai/__init__.py | 3 + src/openai/azure.py | 140 +++++++++++++++++++++++++++++++++++++++-- 2 files changed, 137 insertions(+), 6 deletions(-) diff --git a/src/openai/__init__.py b/src/openai/__init__.py index d011d416ac..b386c35fee 100644 --- a/src/openai/__init__.py +++ b/src/openai/__init__.py @@ -18,6 +18,7 @@ AsyncStream, RequestOptions, ) +from .azure import AzureClient, AzureAsyncClient from ._version import __title__, __version__ from ._exceptions import ( APIError, @@ -66,6 +67,8 @@ "OpenAI", "AsyncOpenAI", "file_from_path", + "AzureClient", + "AzureAsyncClient" ] from .version import VERSION as VERSION diff --git a/src/openai/azure.py b/src/openai/azure.py index 966c9c06ae..d9d82e2caf 100644 --- a/src/openai/azure.py +++ b/src/openai/azure.py @@ -1,4 +1,8 @@ import openai +import time +import httpx + +TIMEOUT_SECS = 600 class TokenCredential: @@ -12,7 +16,7 @@ def get_token(self): class AzureClient(openai.Client): - def __init__(self, *args, deployment: str, credential: TokenCredential | None, api_version: str = '2023-03-15-preview', **kwargs): + def __init__(self, *args, deployment: str = "", credential: TokenCredential | None = None, api_version: str = '2023-09-01-preview', **kwargs): default_query = kwargs.get('default_query', {}) default_query.setdefault('api-version', api_version) kwargs['default_query'] = default_query @@ -27,12 +31,136 @@ def auth_headers(self) -> dict[str, str]: if self.credential: return { 'Authorization': f'Bearer {self.credential.get_token()}'} return {"api-key": self.api_key} - + + def _check_polling_response(self, response, predicate): + if not predicate(response): + return + error_data = response.json()['error'] + message = error_data.get('message', 'Operation failed') + code = error_data.get('code') + raise openai.OpenAIError(message=message, code=code) + + def _poll( + self, + method, + url, + until, + failed, + cast_to, + interval = None, + delay = None, + ): + if delay: + time.sleep(delay) + + opts = openai._models.FinalRequestOptions.construct(method=method, url=url) + response = super().request(cast_to, opts) + self._check_polling_response(response, failed) + start_time = time.time() + while not until(response): + if time.time() - start_time > TIMEOUT_SECS: + raise openai.Timeout("Operation polling timed out.") + + time.sleep(interval or int(response.headers.get("retry-after")) or 10) + response = super().request(cast_to, opts) + self._check_polling_response(response, failed) + + response_json = response.json() + return openai.types.ImagesResponse.construct(**response_json["result"]) def request(self, *args, **kwargs): - if self.deployment: - args = list(args) - options = args[1] if len(args) >= 2 else kwargs.get('options') - options.url = f'openai/deployments/{self.deployment}' + options.url + args = list(args) + options = args[1] if len(args) >= 2 else kwargs.get('options') + if options.url == "/images/generations": + options.url = "openai/images/generations:submit" + response = super().request(*args, **kwargs) + operation_id = response.model_extra['id'] + return self._poll( + "get", f"openai/operations/images/{operation_id}", cast_to=httpx.Response, + until=lambda response: response.json()["status"] in ["succeeded"], + failed=lambda response: response.json()["status"] in ["failed"], + ) + elif options.extra_json and options.extra_json.get("dataSources"): + model = options.json_data["model"] + options.url = f'openai/deployments/{model}/extensions' + options.url + else: + model = options.json_data["model"] + options.url = f'openai/deployments/{model}' + options.url return super().request(*args, **kwargs) + + +class AzureAsyncClient(openai.AsyncClient): + + def __init__(self, *args, deployment: str = "", credential: TokenCredential | None = None, api_version: str = '2023-09-01-preview', **kwargs): + default_query = kwargs.get('default_query', {}) + default_query.setdefault('api-version', api_version) + kwargs['default_query'] = default_query + self.credential = credential + if credential: + kwargs['api_key'] = 'Placeholder: AAD' + super().__init__(*args, **kwargs) + self.deployment = deployment + + @property + def auth_headers(self) -> dict[str, str]: + if self.credential: + return { 'Authorization': f'Bearer {self.credential.get_token()}'} + return {"api-key": self.api_key} + + def _check_polling_response(self, response, predicate): + if not predicate(response): + return + error_data = response.json()['error'] + message = error_data.get('message', 'Operation failed') + code = error_data.get('code') + raise openai.OpenAIError(message=message, code=code) + + async def _poll( + self, + method, + url, + until, + failed, + cast_to, + interval = None, + delay = None, + ): + if delay: + time.sleep(delay) + + opts = openai._models.FinalRequestOptions.construct(method=method, url=url) + response = await super().request(cast_to, opts) + self._check_polling_response(response, failed) + start_time = time.time() + while not until(response): + if time.time() - start_time > TIMEOUT_SECS: + raise openai.Timeout("Operation polling timed out.") + + time.sleep(interval or int(response.headers.get("retry-after")) or 10) + response = await super().request(cast_to, opts) + self._check_polling_response(response, failed) + + response_json = response.json() + return openai.types.ImagesResponse.construct(**response_json["result"]) + + async def request(self, *args, **kwargs): + args = list(args) + options = args[1] if len(args) >= 2 else kwargs.get('options') + if options.url == "/images/generations": + options.url = "openai/images/generations:submit" + response = await super().request(*args, **kwargs) + operation_id = response.model_extra['id'] + return await self._poll( + "get", f"openai/operations/images/{operation_id}", cast_to=httpx.Response, + until=lambda response: response.json()["status"] in ["succeeded"], + failed=lambda response: response.json()["status"] in ["failed"], + ) + elif options.extra_json and options.extra_json.get("dataSources"): + model = options.json_data["model"] + options.url = f'openai/deployments/{model}/extensions' + options.url + else: + model = options.json_data["model"] + options.url = f'openai/deployments/{model}' + options.url + return await super().request(*args, **kwargs) + From ba7f7e15c263eb07f4ba2771a8cc5189f5577d68 Mon Sep 17 00:00:00 2001 From: Johan Stenberg Date: Thu, 5 Oct 2023 12:53:09 -0700 Subject: [PATCH 03/10] Split sync and async clients into separate modules --- src/openai/azure.py | 166 ----------- src/openai/azure/__init__.py | 9 + src/openai/azure/_async_client.py | 436 +++++++++++++++++++++++++++++ src/openai/azure/_azuremodels.py | 5 + src/openai/azure/_credential.py | 13 + src/openai/azure/_sync_client.py | 440 ++++++++++++++++++++++++++++++ 6 files changed, 903 insertions(+), 166 deletions(-) delete mode 100644 src/openai/azure.py create mode 100644 src/openai/azure/__init__.py create mode 100644 src/openai/azure/_async_client.py create mode 100644 src/openai/azure/_azuremodels.py create mode 100644 src/openai/azure/_credential.py create mode 100644 src/openai/azure/_sync_client.py diff --git a/src/openai/azure.py b/src/openai/azure.py deleted file mode 100644 index d9d82e2caf..0000000000 --- a/src/openai/azure.py +++ /dev/null @@ -1,166 +0,0 @@ -import openai -import time -import httpx - -TIMEOUT_SECS = 600 - -class TokenCredential: - - def __init__(self): - import azure.identity - self._credential = azure.identity.DefaultAzureCredential() - - def get_token(self): - return self._credential.get_token('https://cognitiveservices.azure.com/.default').token - - -class AzureClient(openai.Client): - - def __init__(self, *args, deployment: str = "", credential: TokenCredential | None = None, api_version: str = '2023-09-01-preview', **kwargs): - default_query = kwargs.get('default_query', {}) - default_query.setdefault('api-version', api_version) - kwargs['default_query'] = default_query - self.credential = credential - if credential: - kwargs['api_key'] = 'Placeholder: AAD' - super().__init__(*args, **kwargs) - self.deployment = deployment - - @property - def auth_headers(self) -> dict[str, str]: - if self.credential: - return { 'Authorization': f'Bearer {self.credential.get_token()}'} - return {"api-key": self.api_key} - - def _check_polling_response(self, response, predicate): - if not predicate(response): - return - error_data = response.json()['error'] - message = error_data.get('message', 'Operation failed') - code = error_data.get('code') - raise openai.OpenAIError(message=message, code=code) - - def _poll( - self, - method, - url, - until, - failed, - cast_to, - interval = None, - delay = None, - ): - if delay: - time.sleep(delay) - - opts = openai._models.FinalRequestOptions.construct(method=method, url=url) - response = super().request(cast_to, opts) - self._check_polling_response(response, failed) - start_time = time.time() - while not until(response): - if time.time() - start_time > TIMEOUT_SECS: - raise openai.Timeout("Operation polling timed out.") - - time.sleep(interval or int(response.headers.get("retry-after")) or 10) - response = super().request(cast_to, opts) - self._check_polling_response(response, failed) - - response_json = response.json() - return openai.types.ImagesResponse.construct(**response_json["result"]) - - def request(self, *args, **kwargs): - args = list(args) - options = args[1] if len(args) >= 2 else kwargs.get('options') - if options.url == "/images/generations": - options.url = "openai/images/generations:submit" - response = super().request(*args, **kwargs) - operation_id = response.model_extra['id'] - return self._poll( - "get", f"openai/operations/images/{operation_id}", cast_to=httpx.Response, - until=lambda response: response.json()["status"] in ["succeeded"], - failed=lambda response: response.json()["status"] in ["failed"], - ) - elif options.extra_json and options.extra_json.get("dataSources"): - model = options.json_data["model"] - options.url = f'openai/deployments/{model}/extensions' + options.url - else: - model = options.json_data["model"] - options.url = f'openai/deployments/{model}' + options.url - return super().request(*args, **kwargs) - - - -class AzureAsyncClient(openai.AsyncClient): - - def __init__(self, *args, deployment: str = "", credential: TokenCredential | None = None, api_version: str = '2023-09-01-preview', **kwargs): - default_query = kwargs.get('default_query', {}) - default_query.setdefault('api-version', api_version) - kwargs['default_query'] = default_query - self.credential = credential - if credential: - kwargs['api_key'] = 'Placeholder: AAD' - super().__init__(*args, **kwargs) - self.deployment = deployment - - @property - def auth_headers(self) -> dict[str, str]: - if self.credential: - return { 'Authorization': f'Bearer {self.credential.get_token()}'} - return {"api-key": self.api_key} - - def _check_polling_response(self, response, predicate): - if not predicate(response): - return - error_data = response.json()['error'] - message = error_data.get('message', 'Operation failed') - code = error_data.get('code') - raise openai.OpenAIError(message=message, code=code) - - async def _poll( - self, - method, - url, - until, - failed, - cast_to, - interval = None, - delay = None, - ): - if delay: - time.sleep(delay) - - opts = openai._models.FinalRequestOptions.construct(method=method, url=url) - response = await super().request(cast_to, opts) - self._check_polling_response(response, failed) - start_time = time.time() - while not until(response): - if time.time() - start_time > TIMEOUT_SECS: - raise openai.Timeout("Operation polling timed out.") - - time.sleep(interval or int(response.headers.get("retry-after")) or 10) - response = await super().request(cast_to, opts) - self._check_polling_response(response, failed) - - response_json = response.json() - return openai.types.ImagesResponse.construct(**response_json["result"]) - - async def request(self, *args, **kwargs): - args = list(args) - options = args[1] if len(args) >= 2 else kwargs.get('options') - if options.url == "/images/generations": - options.url = "openai/images/generations:submit" - response = await super().request(*args, **kwargs) - operation_id = response.model_extra['id'] - return await self._poll( - "get", f"openai/operations/images/{operation_id}", cast_to=httpx.Response, - until=lambda response: response.json()["status"] in ["succeeded"], - failed=lambda response: response.json()["status"] in ["failed"], - ) - elif options.extra_json and options.extra_json.get("dataSources"): - model = options.json_data["model"] - options.url = f'openai/deployments/{model}/extensions' + options.url - else: - model = options.json_data["model"] - options.url = f'openai/deployments/{model}' + options.url - return await super().request(*args, **kwargs) - diff --git a/src/openai/azure/__init__.py b/src/openai/azure/__init__.py new file mode 100644 index 0000000000..805d97a52f --- /dev/null +++ b/src/openai/azure/__init__.py @@ -0,0 +1,9 @@ +from ._sync_client import AzureOpenAIClient +from ._async_client import AsyncAzureOpenAIClient +from ._credential import TokenCredential + +__all__ = [ + "AzureOpenAIClient", + "TokenCredential", + "AsyncAzureOpenAIClient", +] \ No newline at end of file diff --git a/src/openai/azure/_async_client.py b/src/openai/azure/_async_client.py new file mode 100644 index 0000000000..a34c36eb62 --- /dev/null +++ b/src/openai/azure/_async_client.py @@ -0,0 +1,436 @@ +from typing_extensions import Literal, override +from typing import Any, Callable, cast, List, Mapping, Dict, Optional, overload, Type, Union +import time + +import httpx + +from openai import AsyncClient, OpenAIError +from openai.resources.chat import AsyncChat, AsyncCompletions +from openai.types import ImagesResponse +from openai.types.chat import ChatCompletionMessageParam, ChatCompletion, ChatCompletionChunk +from openai.types.chat.completion_create_params import FunctionCall, Function + +# These types are needed for correct typing of overrides +from openai._types import NotGiven, NOT_GIVEN, Headers, Query, Body, ResponseT + +# These are types used in the public API surface area that are not exported as public +from openai._models import FinalRequestOptions +from openai._streaming import AsyncStream + +# Azure specific types +from ._credential import TokenCredential +from ._azuremodels import ChatExtensionConfiguration + +TIMEOUT_SECS = 600 + +class AsyncAzureChat(AsyncChat): + + @property + def completions(self) -> "AsyncAzureCompletions": + return self._completions + + def __init__(self, client: "AsyncAzureOpenAIClient"): + self._completions = AsyncAzureCompletions(client) + +class AsyncAzureCompletions(AsyncCompletions): + + @overload + async def create( + self, + *, + messages: List[ChatCompletionMessageParam], + model: Union[ + str, + Literal[ + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0301", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k-0613", + ], + ], + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: FunctionCall | NotGiven = NOT_GIVEN, + functions: List[Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + data_sources: List[ChatExtensionConfiguration] | NotGiven = NOT_GIVEN, # TODO + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion: + """ + Creates a model response for the given chat conversation. + + Args: + messages: A list of messages comprising the conversation so far. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb). + + model: ID of the model to use. See the + [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility) + table for details on which models work with the Chat API. + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + function_call: Controls how the model responds to function calls. `none` means the model does + not call a function, and responds to the end-user. `auto` means the model can + pick between an end-user or calling a function. Specifying a particular function + via `{"name": "my_function"}` forces the model to call that function. `none` is + the default when no functions are present. `auto` is the default if functions + are present. + + functions: A list of functions the model may generate JSON inputs for. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a json object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion. + + The total length of input tokens and generated tokens is limited by the model's + context length. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) + for counting tokens. + + n: How many chat completion choices to generate for each input message. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + stop: Up to 4 sequences where the API will stop generating further tokens. + + stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be + sent as data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb). + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + messages: List[ChatCompletionMessageParam], + model: Union[ + str, + Literal[ + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0301", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k-0613", + ], + ], + stream: Literal[True], + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: FunctionCall | NotGiven = NOT_GIVEN, + functions: List[Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + data_sources: List[ChatExtensionConfiguration] | NotGiven = NOT_GIVEN, # TODO + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[ChatCompletionChunk]: + """ + Creates a model response for the given chat conversation. + + Args: + messages: A list of messages comprising the conversation so far. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb). + + model: ID of the model to use. See the + [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility) + table for details on which models work with the Chat API. + + stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be + sent as data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb). + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + function_call: Controls how the model responds to function calls. `none` means the model does + not call a function, and responds to the end-user. `auto` means the model can + pick between an end-user or calling a function. Specifying a particular function + via `{"name": "my_function"}` forces the model to call that function. `none` is + the default when no functions are present. `auto` is the default if functions + are present. + + functions: A list of functions the model may generate JSON inputs for. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a json object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion. + + The total length of input tokens and generated tokens is limited by the model's + context length. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) + for counting tokens. + + n: How many chat completion choices to generate for each input message. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + stop: Up to 4 sequences where the API will stop generating further tokens. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + @override + async def create( + self, + *, + messages: List[ChatCompletionMessageParam], + model: Union[ + str, + Literal[ + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0301", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k-0613", + ], + ], + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: FunctionCall | NotGiven = NOT_GIVEN, + functions: List[Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + data_sources: List[ChatExtensionConfiguration] | NotGiven = NOT_GIVEN, # TODO + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: + if data_sources: + if extra_body is None: + extra_body= {} + cast(Dict[str, Any], extra_body)['dataSources'] = data_sources + stream_dict: Dict[str, Literal[True]] = { # TODO: pylance is upset if I pass through the parameter value. Overload + override combination is problematic + "stream": True + } if stream else {} + response = await super().create( + messages=messages, + model=model, + frequency_penalty = frequency_penalty, + function_call=function_call, + functions=functions, + logit_bias=logit_bias, + max_tokens=max_tokens, + n=n, + presence_penalty=presence_penalty, + stop=stop, + **stream_dict, + temperature=temperature, + top_p=top_p, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout + ) + return response + +class AsyncAzureOpenAIClient(AsyncClient): + + @property + @override + def chat(self) -> AsyncAzureChat: + return self._chat + + def __init__(self, *args: Any, credential: "TokenCredential" | None = None, api_version: str = '2023-09-01-preview', **kwargs: Any): + default_query = kwargs.get('default_query', {}) + default_query.setdefault('api-version', api_version) + kwargs['default_query'] = default_query + self.credential = credential + if credential: + kwargs['api_key'] = 'Placeholder: AAD' # TODO: There is an assumption/validation there is always an API key. + super().__init__(*args, **kwargs) + self._chat = AsyncAzureChat(self) + + @property + def auth_headers(self) -> Dict[str, str]: + if self.credential: + return { 'Authorization': f'Bearer {self.credential.get_token()}'} + return {"api-key": self.api_key} + + + def _check_polling_response(self, response: httpx.Response, predicate: Callable[[httpx.Response], bool]) -> bool: + if not predicate(response): + return False + error_data = response.json()['error'] + message: str = cast(str, error_data.get('message', 'Operation failed')) + code = error_data.get('code') + raise OpenAIError(f'Error: {message} ({code})') + + async def _poll( + self, + method: str, + url: str, + until: Callable[[httpx.Response], bool], + failed: Callable[[httpx.Response], bool], + interval: Optional[float] = None, + delay: Optional[float] = None, + ) -> ImagesResponse: + if delay: + time.sleep(delay) + + opts = FinalRequestOptions.construct(method=method, url=url) + response = await super().request(httpx.Response, opts) + self._check_polling_response(response, failed) + start_time = time.time() + while not until(response): + if time.time() - start_time > TIMEOUT_SECS: + raise Exception("Operation polling timed out.") # TODO: Fix up exception type. + + time.sleep(interval or int(response.headers.get("retry-after")) or 10) + response = await super().request(httpx.Response, opts) + self._check_polling_response(response, failed) + + response_json = response.json() + return ImagesResponse.construct(**response_json["result"]) + + async def _request(self, cast_to: Type[ResponseT], options: FinalRequestOptions, **kwargs: Any) -> Any: + if options.url == "/images/generations": + options.url = "openai/images/generations:submit" + response = await super().request(httpx.Response, **kwargs) + operation_id = cast(Mapping[str, Any], getattr(response, 'model_extra')) or {} + return await self._poll( + "get", f"openai/operations/images/{operation_id}", + until=lambda response: response.json()["status"] in ["succeeded"], + failed=lambda response: response.json()["status"] in ["failed"], + ) + elif options.extra_json and options.extra_json.get("dataSources"): + assert isinstance(options.json_data, Mapping) + model = cast(str, options.json_data["model"]) + options.url = f'openai/deployments/{model}/extensions' + options.url + else: + assert isinstance(options.json_data, Mapping) + model = cast(str, options.json_data["model"]) + options.url = f'openai/deployments/{model}' + options.url + return await super().request(cast_to=cast_to, options=options, **kwargs) + diff --git a/src/openai/azure/_azuremodels.py b/src/openai/azure/_azuremodels.py new file mode 100644 index 0000000000..bfc2f31fd4 --- /dev/null +++ b/src/openai/azure/_azuremodels.py @@ -0,0 +1,5 @@ +from typing import TypedDict + +class ChatExtensionConfiguration(TypedDict): + type: str + parameters: object diff --git a/src/openai/azure/_credential.py b/src/openai/azure/_credential.py new file mode 100644 index 0000000000..075a7116cd --- /dev/null +++ b/src/openai/azure/_credential.py @@ -0,0 +1,13 @@ +class TokenCredential: + """Placeholder/example token credential class + + A real implementation would be compatible with e.g. azure-identity and also should be easily + adaptible to other token credential implementations. + """ + def __init__(self): + import azure.identity + self._credential = azure.identity.DefaultAzureCredential() + + def get_token(self): + return self._credential.get_token('https://cognitiveservices.azure.com/.default').token + diff --git a/src/openai/azure/_sync_client.py b/src/openai/azure/_sync_client.py new file mode 100644 index 0000000000..499c74e7ce --- /dev/null +++ b/src/openai/azure/_sync_client.py @@ -0,0 +1,440 @@ +from typing_extensions import Literal, override +from typing import Any, Callable, cast, List, Mapping, Dict, Optional, overload, Union +import time + +import httpx + +from openai import Client, OpenAIError +from openai.types import ImagesResponse + +# These are types used in the public API surface area that are not exported as public +from openai._models import FinalRequestOptions + +# These types are needed for correct typing of overrides +from openai._types import NotGiven, NOT_GIVEN, Headers, Query, Body +from openai._streaming import Stream + +from openai.resources.chat import Chat, Completions +from openai.types.chat import ChatCompletionMessageParam, ChatCompletion, ChatCompletionChunk +from openai.types.chat.completion_create_params import FunctionCall, Function + +# Azure specific types +from ._credential import TokenCredential +from ._azuremodels import ChatExtensionConfiguration + +TIMEOUT_SECS = 600 + +class AzureChat(Chat): + + @property + def completions(self) -> "AzureCompletions": + return self._completions + + def __init__(self, client: "AzureOpenAIClient"): + self._completions = AzureCompletions(client) + +class AzureCompletions(Completions): + + @overload + def create( + self, + *, + messages: List[ChatCompletionMessageParam], + model: Union[ + str, + Literal[ + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0301", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k-0613", + ], + ], + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: FunctionCall | NotGiven = NOT_GIVEN, + functions: List[Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + data_sources: List[ChatExtensionConfiguration] | NotGiven = NOT_GIVEN, # TODO + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion: + """ + Creates a model response for the given chat conversation. + + Args: + messages: A list of messages comprising the conversation so far. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb). + + model: ID of the model to use. See the + [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility) + table for details on which models work with the Chat API. + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + function_call: Controls how the model responds to function calls. `none` means the model does + not call a function, and responds to the end-user. `auto` means the model can + pick between an end-user or calling a function. Specifying a particular function + via `{"name": "my_function"}` forces the model to call that function. `none` is + the default when no functions are present. `auto` is the default if functions + are present. + + functions: A list of functions the model may generate JSON inputs for. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a json object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion. + + The total length of input tokens and generated tokens is limited by the model's + context length. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) + for counting tokens. + + n: How many chat completion choices to generate for each input message. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + stop: Up to 4 sequences where the API will stop generating further tokens. + + stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be + sent as data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb). + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + messages: List[ChatCompletionMessageParam], + model: Union[ + str, + Literal[ + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0301", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k-0613", + ], + ], + stream: Literal[True], + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: FunctionCall | NotGiven = NOT_GIVEN, + functions: List[Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + data_sources: List[ChatExtensionConfiguration] | NotGiven = NOT_GIVEN, # TODO + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> Stream[ChatCompletionChunk]: + """ + Creates a model response for the given chat conversation. + + Args: + messages: A list of messages comprising the conversation so far. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_format_inputs_to_ChatGPT_models.ipynb). + + model: ID of the model to use. See the + [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility) + table for details on which models work with the Chat API. + + stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be + sent as data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb). + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + function_call: Controls how the model responds to function calls. `none` means the model does + not call a function, and responds to the end-user. `auto` means the model can + pick between an end-user or calling a function. Specifying a particular function + via `{"name": "my_function"}` forces the model to call that function. `none` is + the default when no functions are present. `auto` is the default if functions + are present. + + functions: A list of functions the model may generate JSON inputs for. + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a json object that maps tokens (specified by their token ID in the + tokenizer) to an associated bias value from -100 to 100. Mathematically, the + bias is added to the logits generated by the model prior to sampling. The exact + effect will vary per model, but values between -1 and 1 should decrease or + increase likelihood of selection; values like -100 or 100 should result in a ban + or exclusive selection of the relevant token. + + max_tokens: The maximum number of [tokens](/tokenizer) to generate in the chat completion. + + The total length of input tokens and generated tokens is limited by the model's + context length. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) + for counting tokens. + + n: How many chat completion choices to generate for each input message. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + stop: Up to 4 sequences where the API will stop generating further tokens. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + @override + def create( + self, + *, + messages: List[ChatCompletionMessageParam], + model: Union[ + str, + Literal[ + "gpt-4", + "gpt-4-0314", + "gpt-4-0613", + "gpt-4-32k", + "gpt-4-32k-0314", + "gpt-4-32k-0613", + "gpt-3.5-turbo", + "gpt-3.5-turbo-16k", + "gpt-3.5-turbo-0301", + "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-16k-0613", + ], + ], + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + function_call: FunctionCall | NotGiven = NOT_GIVEN, + functions: List[Function] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + data_sources: List[ChatExtensionConfiguration] | NotGiven = NOT_GIVEN, # TODO + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> ChatCompletion | Stream[ChatCompletionChunk]: + if data_sources: + if extra_body is None: + extra_body= {} + cast(Dict[str, Any], extra_body)['dataSources'] = data_sources + stream_dict: Dict[str, Literal[True]] = { # TODO: pylance is upset if I pass through the parameter value. Overload + override combination is problematic + "stream": True + } if stream else {} + response = super().create( + messages=messages, + model=model, + frequency_penalty = frequency_penalty, + function_call=function_call, + functions=functions, + logit_bias=logit_bias, + max_tokens=max_tokens, + n=n, + presence_penalty=presence_penalty, + stop=stop, + **stream_dict, + temperature=temperature, + top_p=top_p, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout + ) + return response + + +class AzureOpenAIClient(Client): + + @property + @override + def chat(self) -> AzureChat: + return self._chat + + def __init__(self, *args: Any, credential: "TokenCredential" | None = None, api_version: str = '2023-09-01-preview', **kwargs: Any): + default_query = kwargs.get('default_query', {}) + default_query.setdefault('api-version', api_version) + kwargs['default_query'] = default_query + self.credential = credential + if credential: + kwargs['api_key'] = 'Placeholder: AAD' # TODO: There is an assumption/validation there is always an API key. + super().__init__(*args, **kwargs) + self._chat = AzureChat(self) + + @property + def auth_headers(self) -> Dict[str, str]: + if self.credential: + return { 'Authorization': f'Bearer {self.credential.get_token()}'} + return {"api-key": self.api_key} + + # NOTE: We override the internal method because overriding overloaded methods and keeping typing happy is a pain. Most typing tools are lacking... + def _request(self, *, options: FinalRequestOptions, **kwargs: Any) -> Any: + if options.url == "/images/generations": + options.url = "openai/images/generations:submit" + response = super().request(httpx.Response, **kwargs) + model_extra = cast(Mapping[str, Any], getattr(response, 'model_extra')) or {} + operation_id = cast(str, model_extra['id']) + return self._poll( + "get", f"openai/operations/images/{operation_id}", + until=lambda response: response.json()["status"] in ["succeeded"], + failed=lambda response: response.json()["status"] in ["failed"], + ) + if options.extra_json and options.extra_json.get("dataSources"): + assert isinstance(options.json_data, Mapping) + model = cast(str, options.json_data["model"]) + options.url = f'openai/deployments/{model}/extensions' + options.url + else: + assert isinstance(options.json_data, Mapping) + model = cast(str, options.json_data["model"]) + options.url = f'openai/deployments/{model}' + options.url + return super().request(options=options, **kwargs) + + # Internal azure specific "helper" methods + def _check_polling_response(self, response: httpx.Response, predicate: Callable[[httpx.Response], bool]) -> bool: + if not predicate(response): + return False + error_data = cast(Dict[str, Any], response.json()['error']) + message = error_data.get('message', 'Operation failed') + code = error_data.get('code') + raise OpenAIError(message, code) + + def _poll( + self, + method: str, + url: str, + until: Callable[[httpx.Response], bool], + failed: Callable[[httpx.Response], bool], + interval: Optional[float] = None, + delay: Optional[float] = None, + ) -> ImagesResponse: + if delay: + time.sleep(delay) + + opts = FinalRequestOptions.construct(method=method, url=url) + response = super().request(httpx.Response, opts) + self._check_polling_response(response, failed) + start_time = time.time() + while not until(response): + if time.time() - start_time > TIMEOUT_SECS: + raise OpenAIError("Operation polling timed out.") # TODO: Find the right exception + + time.sleep(interval or int(response.headers.get("retry-after")) or 10) + response = super().request(httpx.Response, opts) + self._check_polling_response(response, failed) + + response_json = response.json() + return ImagesResponse.construct(**response_json["result"]) + From 357c2ee0e7c167b4dc8abad59c7c5a5495fdcf71 Mon Sep 17 00:00:00 2001 From: Johan Stenberg Date: Thu, 5 Oct 2023 13:43:10 -0700 Subject: [PATCH 04/10] Change resources on client to be read-only properties --- src/openai/_client.py | 14 ++++++++++---- src/openai/resources/chat/chat.py | 16 +++++++++++----- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/src/openai/_client.py b/src/openai/_client.py index e0e5e37f4c..a1ee81e2ea 100644 --- a/src/openai/_client.py +++ b/src/openai/_client.py @@ -46,7 +46,11 @@ class OpenAI(SyncAPIClient): completions: resources.Completions - chat: resources.Chat + + @property + def chat(self) -> resources.chat.Chat: + return self._chat + # chat: resources.chat.Chat edits: resources.Edits embeddings: resources.Embeddings files: resources.Files @@ -122,7 +126,7 @@ def __init__( self._default_stream_cls = Stream self.completions = resources.Completions(self) - self.chat = resources.Chat(self) + self._chat = resources.Chat(self) self.edits = resources.Edits(self) self.embeddings = resources.Embeddings(self) self.files = resources.Files(self) @@ -244,7 +248,9 @@ def _make_status_error( class AsyncOpenAI(AsyncAPIClient): completions: resources.AsyncCompletions - chat: resources.AsyncChat + @property + def chat(self) -> resources.AsyncChat: + return self._chat edits: resources.AsyncEdits embeddings: resources.AsyncEmbeddings files: resources.AsyncFiles @@ -320,7 +326,7 @@ def __init__( self._default_stream_cls = AsyncStream self.completions = resources.AsyncCompletions(self) - self.chat = resources.AsyncChat(self) + self._chat = resources.AsyncChat(self) self.edits = resources.AsyncEdits(self) self.embeddings = resources.AsyncEmbeddings(self) self.files = resources.AsyncFiles(self) diff --git a/src/openai/resources/chat/chat.py b/src/openai/resources/chat/chat.py index 62bb796571..226b3d7add 100644 --- a/src/openai/resources/chat/chat.py +++ b/src/openai/resources/chat/chat.py @@ -14,16 +14,22 @@ class Chat(SyncAPIResource): - completions: Completions + + @property + def completions(self) -> Completions: + return self._completions def __init__(self, client: OpenAI) -> None: super().__init__(client) - self.completions = Completions(client) + self._completions = Completions(client) class AsyncChat(AsyncAPIResource): - completions: AsyncCompletions - + + @property + def completions(self) -> AsyncCompletions: + return self._completions + def __init__(self, client: AsyncOpenAI) -> None: super().__init__(client) - self.completions = AsyncCompletions(client) + self._completions = AsyncCompletions(client) From 016143e57d1ad164edbffceca274ba01667552c3 Mon Sep 17 00:00:00 2001 From: Johan Stenberg Date: Thu, 5 Oct 2023 13:47:33 -0700 Subject: [PATCH 05/10] Missed renames of azure client --- src/openai/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/openai/__init__.py b/src/openai/__init__.py index b386c35fee..ab7324fb1c 100644 --- a/src/openai/__init__.py +++ b/src/openai/__init__.py @@ -18,7 +18,7 @@ AsyncStream, RequestOptions, ) -from .azure import AzureClient, AzureAsyncClient +from .azure import AzureOpenAIClient, AsyncAzureOpenAIClient from ._version import __title__, __version__ from ._exceptions import ( APIError, @@ -67,8 +67,8 @@ "OpenAI", "AsyncOpenAI", "file_from_path", - "AzureClient", - "AzureAsyncClient" + "AzureOpenAIClient", + "AsyncAzureOpenAIClient" ] from .version import VERSION as VERSION From 38929f499fd07f425fcac95590df5184e5d102a4 Mon Sep 17 00:00:00 2001 From: Johan Stenberg Date: Thu, 5 Oct 2023 15:16:32 -0700 Subject: [PATCH 06/10] Fix url rebuilding for retries(?) --- src/openai/__init__.py | 5 +---- src/openai/azure/_async_client.py | 18 +++++++++--------- src/openai/azure/_sync_client.py | 21 ++++++++++----------- 3 files changed, 20 insertions(+), 24 deletions(-) diff --git a/src/openai/__init__.py b/src/openai/__init__.py index ab7324fb1c..7d2630f123 100644 --- a/src/openai/__init__.py +++ b/src/openai/__init__.py @@ -18,7 +18,6 @@ AsyncStream, RequestOptions, ) -from .azure import AzureOpenAIClient, AsyncAzureOpenAIClient from ._version import __title__, __version__ from ._exceptions import ( APIError, @@ -66,9 +65,7 @@ "AsyncStream", "OpenAI", "AsyncOpenAI", - "file_from_path", - "AzureOpenAIClient", - "AsyncAzureOpenAIClient" + "file_from_path" ] from .version import VERSION as VERSION diff --git a/src/openai/azure/_async_client.py b/src/openai/azure/_async_client.py index a34c36eb62..62445403a2 100644 --- a/src/openai/azure/_async_client.py +++ b/src/openai/azure/_async_client.py @@ -362,7 +362,7 @@ class AsyncAzureOpenAIClient(AsyncClient): def chat(self) -> AsyncAzureChat: return self._chat - def __init__(self, *args: Any, credential: "TokenCredential" | None = None, api_version: str = '2023-09-01-preview', **kwargs: Any): + def __init__(self, *args: Any, credential: Optional["TokenCredential"] = None, api_version: str = '2023-09-01-preview', **kwargs: Any): default_query = kwargs.get('default_query', {}) default_query.setdefault('api-version', api_version) kwargs['default_query'] = default_query @@ -414,6 +414,7 @@ async def _poll( response_json = response.json() return ImagesResponse.construct(**response_json["result"]) + # NOTE: We override the internal method because `@overrid`ing `@overload`ed methods and keeping typing happy is a pain. Most typing tools are lacking... async def _request(self, cast_to: Type[ResponseT], options: FinalRequestOptions, **kwargs: Any) -> Any: if options.url == "/images/generations": options.url = "openai/images/generations:submit" @@ -424,13 +425,12 @@ async def _request(self, cast_to: Type[ResponseT], options: FinalRequestOptions, until=lambda response: response.json()["status"] in ["succeeded"], failed=lambda response: response.json()["status"] in ["failed"], ) - elif options.extra_json and options.extra_json.get("dataSources"): - assert isinstance(options.json_data, Mapping) - model = cast(str, options.json_data["model"]) - options.url = f'openai/deployments/{model}/extensions' + options.url - else: - assert isinstance(options.json_data, Mapping) - model = cast(str, options.json_data["model"]) - options.url = f'openai/deployments/{model}' + options.url + if isinstance(options.json_data, Mapping): + model = cast(str, options.json_data["model"]) + if not options.url.startswith(f'openai/deployments/{model}'): + if options.extra_json and options.extra_json.get("dataSources"): + options.url = f'openai/deployments/{model}/extensions' + options.url + else: + options.url = f'openai/deployments/{model}' + options.url return await super().request(cast_to=cast_to, options=options, **kwargs) diff --git a/src/openai/azure/_sync_client.py b/src/openai/azure/_sync_client.py index 499c74e7ce..645f1e630d 100644 --- a/src/openai/azure/_sync_client.py +++ b/src/openai/azure/_sync_client.py @@ -364,14 +364,14 @@ class AzureOpenAIClient(Client): def chat(self) -> AzureChat: return self._chat - def __init__(self, *args: Any, credential: "TokenCredential" | None = None, api_version: str = '2023-09-01-preview', **kwargs: Any): + def __init__(self, *args: Any, base_url: str, credential: Optional["TokenCredential"] = None, api_version: str = '2023-09-01-preview', **kwargs: Any): default_query = kwargs.get('default_query', {}) default_query.setdefault('api-version', api_version) kwargs['default_query'] = default_query self.credential = credential if credential: kwargs['api_key'] = 'Placeholder: AAD' # TODO: There is an assumption/validation there is always an API key. - super().__init__(*args, **kwargs) + super().__init__(*args, base_url=base_url, **kwargs) self._chat = AzureChat(self) @property @@ -380,7 +380,7 @@ def auth_headers(self) -> Dict[str, str]: return { 'Authorization': f'Bearer {self.credential.get_token()}'} return {"api-key": self.api_key} - # NOTE: We override the internal method because overriding overloaded methods and keeping typing happy is a pain. Most typing tools are lacking... + # NOTE: We override the internal method because `@overrid`ing `@overload`ed methods and keeping typing happy is a pain. Most typing tools are lacking... def _request(self, *, options: FinalRequestOptions, **kwargs: Any) -> Any: if options.url == "/images/generations": options.url = "openai/images/generations:submit" @@ -392,14 +392,13 @@ def _request(self, *, options: FinalRequestOptions, **kwargs: Any) -> Any: until=lambda response: response.json()["status"] in ["succeeded"], failed=lambda response: response.json()["status"] in ["failed"], ) - if options.extra_json and options.extra_json.get("dataSources"): - assert isinstance(options.json_data, Mapping) - model = cast(str, options.json_data["model"]) - options.url = f'openai/deployments/{model}/extensions' + options.url - else: - assert isinstance(options.json_data, Mapping) - model = cast(str, options.json_data["model"]) - options.url = f'openai/deployments/{model}' + options.url + if isinstance(options.json_data, Mapping): + model = cast(str, options.json_data["model"]) + if not options.url.startswith(f'openai/deployments/{model}'): + if options.extra_json and options.extra_json.get("dataSources"): + options.url = f'openai/deployments/{model}/extensions' + options.url + else: + options.url = f'openai/deployments/{model}' + options.url return super().request(options=options, **kwargs) # Internal azure specific "helper" methods From 7ad129f082f703714356c253c909a96d2c233c89 Mon Sep 17 00:00:00 2001 From: Krista Pratico Date: Thu, 5 Oct 2023 16:26:19 -0700 Subject: [PATCH 07/10] few fixes after the refactor (#9) --- src/openai/azure/_async_client.py | 12 ++++++------ src/openai/azure/_sync_client.py | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/openai/azure/_async_client.py b/src/openai/azure/_async_client.py index 62445403a2..77708fa74f 100644 --- a/src/openai/azure/_async_client.py +++ b/src/openai/azure/_async_client.py @@ -418,19 +418,19 @@ async def _poll( async def _request(self, cast_to: Type[ResponseT], options: FinalRequestOptions, **kwargs: Any) -> Any: if options.url == "/images/generations": options.url = "openai/images/generations:submit" - response = await super().request(httpx.Response, **kwargs) - operation_id = cast(Mapping[str, Any], getattr(response, 'model_extra')) or {} + response = await super()._request(cast_to=cast_to, options=options, **kwargs) + model_extra = cast(Mapping[str, Any], getattr(response, 'model_extra')) or {} + operation_id = cast(str, model_extra['id']) return await self._poll( "get", f"openai/operations/images/{operation_id}", until=lambda response: response.json()["status"] in ["succeeded"], failed=lambda response: response.json()["status"] in ["failed"], ) if isinstance(options.json_data, Mapping): - model = cast(str, options.json_data["model"]) + model = cast(str, options.json_data["model"]) if not options.url.startswith(f'openai/deployments/{model}'): if options.extra_json and options.extra_json.get("dataSources"): options.url = f'openai/deployments/{model}/extensions' + options.url - else: + else: options.url = f'openai/deployments/{model}' + options.url - return await super().request(cast_to=cast_to, options=options, **kwargs) - + return await super()._request(cast_to=cast_to, options=options, **kwargs) \ No newline at end of file diff --git a/src/openai/azure/_sync_client.py b/src/openai/azure/_sync_client.py index 645f1e630d..ba7faccf20 100644 --- a/src/openai/azure/_sync_client.py +++ b/src/openai/azure/_sync_client.py @@ -384,7 +384,7 @@ def auth_headers(self) -> Dict[str, str]: def _request(self, *, options: FinalRequestOptions, **kwargs: Any) -> Any: if options.url == "/images/generations": options.url = "openai/images/generations:submit" - response = super().request(httpx.Response, **kwargs) + response = super()._request(options=options, **kwargs) model_extra = cast(Mapping[str, Any], getattr(response, 'model_extra')) or {} operation_id = cast(str, model_extra['id']) return self._poll( @@ -393,13 +393,13 @@ def _request(self, *, options: FinalRequestOptions, **kwargs: Any) -> Any: failed=lambda response: response.json()["status"] in ["failed"], ) if isinstance(options.json_data, Mapping): - model = cast(str, options.json_data["model"]) + model = cast(str, options.json_data["model"]) if not options.url.startswith(f'openai/deployments/{model}'): if options.extra_json and options.extra_json.get("dataSources"): options.url = f'openai/deployments/{model}/extensions' + options.url - else: + else: options.url = f'openai/deployments/{model}' + options.url - return super().request(options=options, **kwargs) + return super()._request(options=options, **kwargs) # Internal azure specific "helper" methods def _check_polling_response(self, response: httpx.Response, predicate: Callable[[httpx.Response], bool]) -> bool: From 664178c39c716efd771d2663b77cc0360f6de20d Mon Sep 17 00:00:00 2001 From: Krista Pratico Date: Fri, 6 Oct 2023 16:46:08 -0700 Subject: [PATCH 08/10] custom auth (#12) Change to use customAuth rather than "hiding" token credentials behind auth_headers. Added async support for token auth. --- src/openai/azure/_async_client.py | 8 +++++--- src/openai/azure/_credential.py | 33 +++++++++++++++++++++++++++++++ src/openai/azure/_sync_client.py | 9 ++++++--- 3 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/openai/azure/_async_client.py b/src/openai/azure/_async_client.py index 77708fa74f..8ec0cc6cae 100644 --- a/src/openai/azure/_async_client.py +++ b/src/openai/azure/_async_client.py @@ -18,7 +18,7 @@ from openai._streaming import AsyncStream # Azure specific types -from ._credential import TokenCredential +from ._credential import TokenCredential, TokenAuth from ._azuremodels import ChatExtensionConfiguration TIMEOUT_SECS = 600 @@ -374,10 +374,12 @@ def __init__(self, *args: Any, credential: Optional["TokenCredential"] = None, a @property def auth_headers(self) -> Dict[str, str]: - if self.credential: - return { 'Authorization': f'Bearer {self.credential.get_token()}'} return {"api-key": self.api_key} + @property + def custom_auth(self) -> httpx.Auth | None: + if self.credential: + return TokenAuth(self.credential) def _check_polling_response(self, response: httpx.Response, predicate: Callable[[httpx.Response], bool]) -> bool: if not predicate(response): diff --git a/src/openai/azure/_credential.py b/src/openai/azure/_credential.py index 075a7116cd..9d10e14909 100644 --- a/src/openai/azure/_credential.py +++ b/src/openai/azure/_credential.py @@ -1,3 +1,9 @@ +from typing import AsyncGenerator, Generator, Any +import time +import asyncio +import httpx + + class TokenCredential: """Placeholder/example token credential class @@ -11,3 +17,30 @@ def __init__(self): def get_token(self): return self._credential.get_token('https://cognitiveservices.azure.com/.default').token + +class TokenAuth(httpx.Auth): + def __init__(self, credential: "TokenCredential") -> None: + self._credential = credential + self._async_lock = asyncio.Lock() + self.cached_token = None + + def sync_get_token(self) -> str: + if not self.cached_token or self.cached_token.expires_on - time.time() < 300: + return self._credential.get_token("https://cognitiveservices.azure.com/.default").token + return self.cached_token.token + + def sync_auth_flow(self, request: httpx.Request) -> Generator[httpx.Request, Any, Any]: + token = self.sync_get_token() + request.headers["Authorization"] = f"Bearer {token}" + yield request + + async def async_get_token(self) -> str: + async with self._async_lock: + if not self.cached_token or self.cached_token.expires_on - time.time() < 300: + return (await self._credential.get_token("https://cognitiveservices.azure.com/.default")).token + return self.cached_token.token + + async def async_auth_flow(self, request: httpx.Request) -> AsyncGenerator[httpx.Request, Any]: + token = await self.async_get_token() + request.headers["Authorization"] = f"Bearer {token}" + yield request diff --git a/src/openai/azure/_sync_client.py b/src/openai/azure/_sync_client.py index ba7faccf20..3f904b482f 100644 --- a/src/openai/azure/_sync_client.py +++ b/src/openai/azure/_sync_client.py @@ -19,7 +19,7 @@ from openai.types.chat.completion_create_params import FunctionCall, Function # Azure specific types -from ._credential import TokenCredential +from ._credential import TokenCredential, TokenAuth from ._azuremodels import ChatExtensionConfiguration TIMEOUT_SECS = 600 @@ -376,10 +376,13 @@ def __init__(self, *args: Any, base_url: str, credential: Optional["TokenCredent @property def auth_headers(self) -> Dict[str, str]: - if self.credential: - return { 'Authorization': f'Bearer {self.credential.get_token()}'} return {"api-key": self.api_key} + @property + def custom_auth(self) -> httpx.Auth | None: + if self.credential: + return TokenAuth(self.credential) + # NOTE: We override the internal method because `@overrid`ing `@overload`ed methods and keeping typing happy is a pain. Most typing tools are lacking... def _request(self, *, options: FinalRequestOptions, **kwargs: Any) -> Any: if options.url == "/images/generations": From b92e77ecb3db21906fe98ff04cda338685358415 Mon Sep 17 00:00:00 2001 From: Krista Pratico Date: Fri, 6 Oct 2023 16:46:40 -0700 Subject: [PATCH 09/10] prepend openai to other models, files, fine-tuning, fine-tunes (#13) --- src/openai/azure/_async_client.py | 2 ++ src/openai/azure/_sync_client.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/src/openai/azure/_async_client.py b/src/openai/azure/_async_client.py index 8ec0cc6cae..64e9f232dc 100644 --- a/src/openai/azure/_async_client.py +++ b/src/openai/azure/_async_client.py @@ -435,4 +435,6 @@ async def _request(self, cast_to: Type[ResponseT], options: FinalRequestOptions, options.url = f'openai/deployments/{model}/extensions' + options.url else: options.url = f'openai/deployments/{model}' + options.url + if options.url.startswith(("/models", "/fine_tuning", "/files", "/fine-tunes")): + options.url = f"openai{options.url}" return await super()._request(cast_to=cast_to, options=options, **kwargs) \ No newline at end of file diff --git a/src/openai/azure/_sync_client.py b/src/openai/azure/_sync_client.py index 3f904b482f..44a48c2ef4 100644 --- a/src/openai/azure/_sync_client.py +++ b/src/openai/azure/_sync_client.py @@ -402,6 +402,8 @@ def _request(self, *, options: FinalRequestOptions, **kwargs: Any) -> Any: options.url = f'openai/deployments/{model}/extensions' + options.url else: options.url = f'openai/deployments/{model}' + options.url + if options.url.startswith(("/models", "/fine_tuning", "/files", "/fine-tunes")): + options.url = f"openai{options.url}" return super()._request(options=options, **kwargs) # Internal azure specific "helper" methods From be65dd4d9349556186e8da36a555b963076e57db Mon Sep 17 00:00:00 2001 From: Krista Pratico Date: Fri, 6 Oct 2023 17:48:09 -0700 Subject: [PATCH 10/10] capture azure-only properties (#10) Correctly type/"subclass" operations --- src/openai/azure/_async_client.py | 473 ++++++++++++++++++++++++++++-- src/openai/azure/_azuremodels.py | 81 ++++- src/openai/azure/_sync_client.py | 471 +++++++++++++++++++++++++++-- 3 files changed, 965 insertions(+), 60 deletions(-) diff --git a/src/openai/azure/_async_client.py b/src/openai/azure/_async_client.py index 64e9f232dc..2f9ca5bc80 100644 --- a/src/openai/azure/_async_client.py +++ b/src/openai/azure/_async_client.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing_extensions import Literal, override from typing import Any, Callable, cast, List, Mapping, Dict, Optional, overload, Type, Union import time @@ -6,9 +8,11 @@ from openai import AsyncClient, OpenAIError from openai.resources.chat import AsyncChat, AsyncCompletions +from openai.resources.completions import AsyncCompletions as AsyncCompletionsOperations from openai.types import ImagesResponse from openai.types.chat import ChatCompletionMessageParam, ChatCompletion, ChatCompletionChunk from openai.types.chat.completion_create_params import FunctionCall, Function +from openai.types.completion import Completion # These types are needed for correct typing of overrides from openai._types import NotGiven, NOT_GIVEN, Headers, Query, Body, ResponseT @@ -19,20 +23,26 @@ # Azure specific types from ._credential import TokenCredential, TokenAuth -from ._azuremodels import ChatExtensionConfiguration +from ._azuremodels import ( + ChatExtensionConfiguration, + AzureChatCompletion, + AzureChatCompletionChunk, + AzureCompletion, +) TIMEOUT_SECS = 600 class AsyncAzureChat(AsyncChat): @property - def completions(self) -> "AsyncAzureCompletions": + def completions(self) -> "AsyncAzureChatCompletions": return self._completions def __init__(self, client: "AsyncAzureOpenAIClient"): - self._completions = AsyncAzureCompletions(client) + self._completions = AsyncAzureChatCompletions(client) + -class AsyncAzureCompletions(AsyncCompletions): +class AsyncAzureChatCompletions(AsyncCompletions): @overload async def create( @@ -74,7 +84,7 @@ async def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion: + ) -> AzureChatCompletion: """ Creates a model response for the given chat conversation. @@ -200,7 +210,7 @@ async def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | None | NotGiven = NOT_GIVEN, - ) -> AsyncStream[ChatCompletionChunk]: + ) -> AsyncStream[AzureChatCompletionChunk]: """ Creates a model response for the given chat conversation. @@ -325,7 +335,7 @@ async def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]: + ) -> AzureChatCompletion | AsyncStream[AzureChatCompletionChunk]: if data_sources: if extra_body is None: extra_body= {} @@ -333,27 +343,423 @@ async def create( stream_dict: Dict[str, Literal[True]] = { # TODO: pylance is upset if I pass through the parameter value. Overload + override combination is problematic "stream": True } if stream else {} - response = await super().create( - messages=messages, - model=model, - frequency_penalty = frequency_penalty, - function_call=function_call, - functions=functions, - logit_bias=logit_bias, - max_tokens=max_tokens, - n=n, - presence_penalty=presence_penalty, - stop=stop, - **stream_dict, - temperature=temperature, - top_p=top_p, - user=user, - extra_headers=extra_headers, - extra_query=extra_query, - extra_body=extra_body, - timeout=timeout + response = cast( + Union[ChatCompletion, ChatCompletionChunk], + await super().create( + messages=messages, + model=model, + frequency_penalty = frequency_penalty, + function_call=function_call, + functions=functions, + logit_bias=logit_bias, + max_tokens=max_tokens, + n=n, + presence_penalty=presence_penalty, + stop=stop, + **stream_dict, + temperature=temperature, + top_p=top_p, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout + ) ) - return response + if isinstance(response, AsyncStream): + response._cast_to = AzureChatCompletionChunk # or rebuild the stream? + else: + response_json = response.model_dump(mode="json") + response = AzureChatCompletion.construct(**response_json) + return response # type: ignore + + +class AsyncAzureCompletions(AsyncCompletionsOperations): + @overload + async def create( + self, + *, + model: Union[ + str, + Literal[ + "babbage-002", + "davinci-002", + "gpt-3.5-turbo-instruct", + "text-davinci-003", + "text-davinci-002", + "text-davinci-001", + "code-davinci-002", + "text-curie-001", + "text-babbage-001", + "text-ada-001", + ], + ], + prompt: Union[str, List[str], List[int], List[List[int]], None], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> AzureCompletion: + """ + Creates a completion for the provided prompt and parameters. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models/overview) for + descriptions of them. + + prompt: The prompt(s) to generate completions for, encoded as a string, array of + strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during + training, so if a prompt is not specified the model will generate as if from the + beginning of a new document. + + best_of: Generates `best_of` completions server-side and returns the "best" (the one with + the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and + `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + echo: Echo back the prompt in addition to the completion + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a json object that maps tokens (specified by their token ID in the GPT + tokenizer) to an associated bias value from -100 to 100. You can use this + [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to + convert text to token IDs. Mathematically, the bias is added to the logits + generated by the model prior to sampling. The exact effect will vary per model, + but values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token + from being generated. + + logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the + chosen tokens. For example, if `logprobs` is 5, the API will return a list of + the 5 most likely tokens. The API will always return the `logprob` of the + sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. + + max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) + for counting tokens. + + n: How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + stream: Whether to stream back partial progress. If set, tokens will be sent as + data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb). + + suffix: The suffix that comes after a completion of inserted text. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + async def create( + self, + *, + model: Union[ + str, + Literal[ + "babbage-002", + "davinci-002", + "gpt-3.5-turbo-instruct", + "text-davinci-003", + "text-davinci-002", + "text-davinci-001", + "code-davinci-002", + "text-curie-001", + "text-babbage-001", + "text-ada-001", + ], + ], + prompt: Union[str, List[str], List[int], List[List[int]], None], + stream: Literal[True], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> AsyncStream[AzureCompletion]: + """ + Creates a completion for the provided prompt and parameters. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models/overview) for + descriptions of them. + + prompt: The prompt(s) to generate completions for, encoded as a string, array of + strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during + training, so if a prompt is not specified the model will generate as if from the + beginning of a new document. + + stream: Whether to stream back partial progress. If set, tokens will be sent as + data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb). + + best_of: Generates `best_of` completions server-side and returns the "best" (the one with + the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and + `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + echo: Echo back the prompt in addition to the completion + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a json object that maps tokens (specified by their token ID in the GPT + tokenizer) to an associated bias value from -100 to 100. You can use this + [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to + convert text to token IDs. Mathematically, the bias is added to the logits + generated by the model prior to sampling. The exact effect will vary per model, + but values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token + from being generated. + + logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the + chosen tokens. For example, if `logprobs` is 5, the API will return a list of + the 5 most likely tokens. The API will always return the `logprob` of the + sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. + + max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) + for counting tokens. + + n: How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + suffix: The suffix that comes after a completion of inserted text. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @override + async def create( + self, + *, + model: Union[ + str, + Literal[ + "babbage-002", + "davinci-002", + "gpt-3.5-turbo-instruct", + "text-davinci-003", + "text-davinci-002", + "text-davinci-001", + "code-davinci-002", + "text-curie-001", + "text-babbage-001", + "text-ada-001", + ], + ], + prompt: Union[str, List[str], List[int], List[List[int]], None], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> AzureCompletion | AsyncStream[AzureCompletion]: + stream_dict: Dict[str, Literal[True]] = { # TODO: pylance is upset if I pass through the parameter value. Overload + override combination is problematic + "stream": True + } if stream else {} + response = cast( + Union[Completion, AsyncStream[Completion]], + await super().create( + model=model, + prompt=prompt, + best_of=best_of, + echo=echo, + frequency_penalty = frequency_penalty, + logit_bias=logit_bias, + logprobs=logprobs, + max_tokens=max_tokens, + n=n, + presence_penalty=presence_penalty, + stop=stop, + **stream_dict, + suffix=suffix, + temperature=temperature, + top_p=top_p, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout + ) + ) + + if isinstance(response, AsyncStream): + response._cast_to = AzureCompletion + else: + response_json = response.model_dump(mode="json") + response = AzureCompletion.construct(**response_json) + return response # type: ignore + class AsyncAzureOpenAIClient(AsyncClient): @@ -361,7 +767,16 @@ class AsyncAzureOpenAIClient(AsyncClient): @override def chat(self) -> AsyncAzureChat: return self._chat - + + @property + @override + def completions(self) -> AsyncAzureCompletions: + return self._completions + + @completions.setter + def completions(self, value: AsyncAzureCompletions) -> None: + self._completions = value + def __init__(self, *args: Any, credential: Optional["TokenCredential"] = None, api_version: str = '2023-09-01-preview', **kwargs: Any): default_query = kwargs.get('default_query', {}) default_query.setdefault('api-version', api_version) @@ -437,4 +852,4 @@ async def _request(self, cast_to: Type[ResponseT], options: FinalRequestOptions, options.url = f'openai/deployments/{model}' + options.url if options.url.startswith(("/models", "/fine_tuning", "/files", "/fine-tunes")): options.url = f"openai{options.url}" - return await super()._request(cast_to=cast_to, options=options, **kwargs) \ No newline at end of file + return await super()._request(cast_to=cast_to, options=options, **kwargs) diff --git a/src/openai/azure/_azuremodels.py b/src/openai/azure/_azuremodels.py index bfc2f31fd4..841bd11d78 100644 --- a/src/openai/azure/_azuremodels.py +++ b/src/openai/azure/_azuremodels.py @@ -1,5 +1,82 @@ -from typing import TypedDict +from typing import List, Optional +from typing_extensions import TypedDict, Literal +from openai._models import BaseModel as BaseModel + +from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessage +from openai.types.chat.chat_completion import Choice as ChatChoice +from openai.types.chat.chat_completion_chunk import ChoiceDelta, Choice as ChatChoiceDelta +from openai.types.completion import Completion +from openai.types.completion_choice import CompletionChoice + + +AzureChatCompletionRole = Literal["system", "user", "assistant", "function", "tool"] + class ChatExtensionConfiguration(TypedDict): - type: str + type: Literal["AzureCognitiveSearch"] parameters: object + + +class ContentFilterResult(BaseModel): + severity: Literal["safe", "low", "medium", "high"] + filtered: bool + + +class Error(BaseModel): + code: str + message: str + + +class ContentFilterResults(BaseModel): + hate: Optional[ContentFilterResult] + self_harm: Optional[ContentFilterResult] + violence: Optional[ContentFilterResult] + sexual: Optional[ContentFilterResult] + error: Optional[Error] + + +class PromptFilterResult(BaseModel): + prompt_index: int + content_filter_results: Optional[ContentFilterResults] + + +class AzureChatExtensionsMessageContext(BaseModel): + messages: Optional[List[ChatCompletionMessage]] + + +class AzureChatCompletionMessage(ChatCompletionMessage): + context: Optional[AzureChatExtensionsMessageContext] + role: AzureChatCompletionRole # type: ignore + + +class AzureChatCompletionChoice(ChatChoice): + content_filter_results: Optional[ContentFilterResults] + message: AzureChatCompletionMessage # type: ignore + + +class AzureChatCompletion(ChatCompletion): + choices: List[AzureChatCompletionChoice] # type: ignore + prompt_filter_results: Optional[List[PromptFilterResult]] + + +class AzureChoiceDelta(ChoiceDelta): + context: Optional[AzureChatExtensionsMessageContext] + + +class AzureChatCompletionChoiceDelta(ChatChoiceDelta): + delta: AzureChoiceDelta # type: ignore + content_filter_results: Optional[ContentFilterResults] + + +class AzureChatCompletionChunk(ChatCompletionChunk): + choices: List[AzureChatCompletionChoiceDelta] # type: ignore + prompt_filter_results: Optional[List[PromptFilterResult]] + + +class AzureCompletionChoice(CompletionChoice): + content_filter_results: Optional[ContentFilterResults] + + +class AzureCompletion(Completion): + choices: List[AzureCompletionChoice] # type: ignore + prompt_filter_results: Optional[List[PromptFilterResult]] diff --git a/src/openai/azure/_sync_client.py b/src/openai/azure/_sync_client.py index 44a48c2ef4..677f9e8ac3 100644 --- a/src/openai/azure/_sync_client.py +++ b/src/openai/azure/_sync_client.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing_extensions import Literal, override from typing import Any, Callable, cast, List, Mapping, Dict, Optional, overload, Union import time @@ -15,25 +17,33 @@ from openai._streaming import Stream from openai.resources.chat import Chat, Completions +from openai.resources.completions import Completions as CompletionsOperations from openai.types.chat import ChatCompletionMessageParam, ChatCompletion, ChatCompletionChunk from openai.types.chat.completion_create_params import FunctionCall, Function +from openai.types.completion import Completion # Azure specific types from ._credential import TokenCredential, TokenAuth -from ._azuremodels import ChatExtensionConfiguration +from ._azuremodels import ( + ChatExtensionConfiguration, + AzureChatCompletion, + AzureChatCompletionChunk, + AzureCompletion, +) TIMEOUT_SECS = 600 class AzureChat(Chat): @property - def completions(self) -> "AzureCompletions": + def completions(self) -> "AzureChatCompletions": return self._completions def __init__(self, client: "AzureOpenAIClient"): - self._completions = AzureCompletions(client) + self._completions = AzureChatCompletions(client) + -class AzureCompletions(Completions): +class AzureChatCompletions(Completions): @overload def create( @@ -75,7 +85,7 @@ def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion: + ) -> AzureChatCompletion: """ Creates a model response for the given chat conversation. @@ -201,7 +211,7 @@ def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | None | NotGiven = NOT_GIVEN, - ) -> Stream[ChatCompletionChunk]: + ) -> Stream[AzureChatCompletionChunk]: """ Creates a model response for the given chat conversation. @@ -326,7 +336,7 @@ def create( extra_query: Query | None = None, extra_body: Body | None = None, timeout: float | None | NotGiven = NOT_GIVEN, - ) -> ChatCompletion | Stream[ChatCompletionChunk]: + ) -> AzureChatCompletion | Stream[AzureChatCompletionChunk]: if data_sources: if extra_body is None: extra_body= {} @@ -334,27 +344,422 @@ def create( stream_dict: Dict[str, Literal[True]] = { # TODO: pylance is upset if I pass through the parameter value. Overload + override combination is problematic "stream": True } if stream else {} - response = super().create( - messages=messages, - model=model, - frequency_penalty = frequency_penalty, - function_call=function_call, - functions=functions, - logit_bias=logit_bias, - max_tokens=max_tokens, - n=n, - presence_penalty=presence_penalty, - stop=stop, - **stream_dict, - temperature=temperature, - top_p=top_p, - user=user, - extra_headers=extra_headers, - extra_query=extra_query, - extra_body=extra_body, - timeout=timeout + response = cast( + Union[ChatCompletion, Stream[ChatCompletionChunk]], + super().create( + messages=messages, + model=model, + frequency_penalty = frequency_penalty, + function_call=function_call, + functions=functions, + logit_bias=logit_bias, + max_tokens=max_tokens, + n=n, + presence_penalty=presence_penalty, + stop=stop, + **stream_dict, + temperature=temperature, + top_p=top_p, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout + ) ) - return response + if isinstance(response, Stream): + response._cast_to = AzureChatCompletionChunk # or rebuild the stream? + else: + response_json = response.model_dump(mode="json") + response = AzureChatCompletion.construct(**response_json) + return response # type: ignore + + +class AzureCompletions(CompletionsOperations): + @overload + def create( + self, + *, + model: Union[ + str, + Literal[ + "babbage-002", + "davinci-002", + "gpt-3.5-turbo-instruct", + "text-davinci-003", + "text-davinci-002", + "text-davinci-001", + "code-davinci-002", + "text-curie-001", + "text-babbage-001", + "text-ada-001", + ], + ], + prompt: Union[str, List[str], List[int], List[List[int]], None], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> AzureCompletion: + """ + Creates a completion for the provided prompt and parameters. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models/overview) for + descriptions of them. + + prompt: The prompt(s) to generate completions for, encoded as a string, array of + strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during + training, so if a prompt is not specified the model will generate as if from the + beginning of a new document. + + best_of: Generates `best_of` completions server-side and returns the "best" (the one with + the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and + `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + echo: Echo back the prompt in addition to the completion + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a json object that maps tokens (specified by their token ID in the GPT + tokenizer) to an associated bias value from -100 to 100. You can use this + [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to + convert text to token IDs. Mathematically, the bias is added to the logits + generated by the model prior to sampling. The exact effect will vary per model, + but values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token + from being generated. + + logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the + chosen tokens. For example, if `logprobs` is 5, the API will return a list of + the 5 most likely tokens. The API will always return the `logprob` of the + sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. + + max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) + for counting tokens. + + n: How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + stream: Whether to stream back partial progress. If set, tokens will be sent as + data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb). + + suffix: The suffix that comes after a completion of inserted text. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @overload + def create( + self, + *, + model: Union[ + str, + Literal[ + "babbage-002", + "davinci-002", + "gpt-3.5-turbo-instruct", + "text-davinci-003", + "text-davinci-002", + "text-davinci-001", + "code-davinci-002", + "text-curie-001", + "text-babbage-001", + "text-ada-001", + ], + ], + prompt: Union[str, List[str], List[int], List[List[int]], None], + stream: Literal[True], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> Stream[AzureCompletion]: + """ + Creates a completion for the provided prompt and parameters. + + Args: + model: ID of the model to use. You can use the + [List models](https://platform.openai.com/docs/api-reference/models/list) API to + see all of your available models, or see our + [Model overview](https://platform.openai.com/docs/models/overview) for + descriptions of them. + + prompt: The prompt(s) to generate completions for, encoded as a string, array of + strings, array of tokens, or array of token arrays. + + Note that <|endoftext|> is the document separator that the model sees during + training, so if a prompt is not specified the model will generate as if from the + beginning of a new document. + + stream: Whether to stream back partial progress. If set, tokens will be sent as + data-only + [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) + as they become available, with the stream terminated by a `data: [DONE]` + message. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb). + + best_of: Generates `best_of` completions server-side and returns the "best" (the one with + the highest log probability per token). Results cannot be streamed. + + When used with `n`, `best_of` controls the number of candidate completions and + `n` specifies how many to return – `best_of` must be greater than `n`. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + echo: Echo back the prompt in addition to the completion + + frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their + existing frequency in the text so far, decreasing the model's likelihood to + repeat the same line verbatim. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + logit_bias: Modify the likelihood of specified tokens appearing in the completion. + + Accepts a json object that maps tokens (specified by their token ID in the GPT + tokenizer) to an associated bias value from -100 to 100. You can use this + [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to + convert text to token IDs. Mathematically, the bias is added to the logits + generated by the model prior to sampling. The exact effect will vary per model, + but values between -1 and 1 should decrease or increase likelihood of selection; + values like -100 or 100 should result in a ban or exclusive selection of the + relevant token. + + As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token + from being generated. + + logprobs: Include the log probabilities on the `logprobs` most likely tokens, as well the + chosen tokens. For example, if `logprobs` is 5, the API will return a list of + the 5 most likely tokens. The API will always return the `logprob` of the + sampled token, so there may be up to `logprobs+1` elements in the response. + + The maximum value for `logprobs` is 5. + + max_tokens: The maximum number of [tokens](/tokenizer) to generate in the completion. + + The token count of your prompt plus `max_tokens` cannot exceed the model's + context length. + [Example Python code](https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb) + for counting tokens. + + n: How many completions to generate for each prompt. + + **Note:** Because this parameter generates many completions, it can quickly + consume your token quota. Use carefully and ensure that you have reasonable + settings for `max_tokens` and `stop`. + + presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on + whether they appear in the text so far, increasing the model's likelihood to + talk about new topics. + + [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/gpt/parameter-details) + + stop: Up to 4 sequences where the API will stop generating further tokens. The + returned text will not contain the stop sequence. + + suffix: The suffix that comes after a completion of inserted text. + + temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will + make the output more random, while lower values like 0.2 will make it more + focused and deterministic. + + We generally recommend altering this or `top_p` but not both. + + top_p: An alternative to sampling with temperature, called nucleus sampling, where the + model considers the results of the tokens with top_p probability mass. So 0.1 + means only the tokens comprising the top 10% probability mass are considered. + + We generally recommend altering this or `temperature` but not both. + + user: A unique identifier representing your end-user, which can help OpenAI to monitor + and detect abuse. + [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids). + + extra_headers: Send extra headers + + extra_query: Add additional query parameters to the request + + extra_body: Add additional JSON properties to the request + + timeout: Override the client-level default timeout for this request, in seconds + """ + ... + + @override + def create( + self, + *, + model: Union[ + str, + Literal[ + "babbage-002", + "davinci-002", + "gpt-3.5-turbo-instruct", + "text-davinci-003", + "text-davinci-002", + "text-davinci-001", + "code-davinci-002", + "text-curie-001", + "text-babbage-001", + "text-ada-001", + ], + ], + prompt: Union[str, List[str], List[int], List[List[int]], None], + best_of: Optional[int] | NotGiven = NOT_GIVEN, + echo: Optional[bool] | NotGiven = NOT_GIVEN, + frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN, + logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN, + logprobs: Optional[int] | NotGiven = NOT_GIVEN, + max_tokens: Optional[int] | NotGiven = NOT_GIVEN, + n: Optional[int] | NotGiven = NOT_GIVEN, + presence_penalty: Optional[float] | NotGiven = NOT_GIVEN, + stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN, + stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN, + suffix: Optional[str] | NotGiven = NOT_GIVEN, + temperature: Optional[float] | NotGiven = NOT_GIVEN, + top_p: Optional[float] | NotGiven = NOT_GIVEN, + user: str | NotGiven = NOT_GIVEN, + # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs. + # The extra values given here take precedence over values defined on the client or passed to this method. + extra_headers: Headers | None = None, + extra_query: Query | None = None, + extra_body: Body | None = None, + timeout: float | None | NotGiven = NOT_GIVEN, + ) -> AzureCompletion | Stream[AzureCompletion]: + stream_dict: Dict[str, Literal[True]] = { # TODO: pylance is upset if I pass through the parameter value. Overload + override combination is problematic + "stream": True + } if stream else {} + response = cast( + Union[Completion, Stream[Completion]], + super().create( + model=model, + prompt=prompt, + best_of=best_of, + echo=echo, + frequency_penalty = frequency_penalty, + logit_bias=logit_bias, + logprobs=logprobs, + max_tokens=max_tokens, + n=n, + presence_penalty=presence_penalty, + stop=stop, + **stream_dict, + suffix=suffix, + temperature=temperature, + top_p=top_p, + user=user, + extra_headers=extra_headers, + extra_query=extra_query, + extra_body=extra_body, + timeout=timeout + ) + ) + + if isinstance(response, Stream): + response._cast_to = AzureCompletion + else: + response_json = response.model_dump(mode="json") + response = AzureCompletion.construct(**response_json) + return response # type: ignore class AzureOpenAIClient(Client): @@ -363,7 +768,16 @@ class AzureOpenAIClient(Client): @override def chat(self) -> AzureChat: return self._chat - + + @property + @override + def completions(self) -> AzureCompletions: + return self._completions + + @completions.setter + def completions(self, value: AzureCompletions) -> None: + self._completions = value + def __init__(self, *args: Any, base_url: str, credential: Optional["TokenCredential"] = None, api_version: str = '2023-09-01-preview', **kwargs: Any): default_query = kwargs.get('default_query', {}) default_query.setdefault('api-version', api_version) @@ -441,4 +855,3 @@ def _poll( response_json = response.json() return ImagesResponse.construct(**response_json["result"]) -