diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d86fc0ea53..d148b34a9e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,9 +10,9 @@ on:
 
 jobs:
   lint:
+    timeout-minutes: 10
     name: lint
     runs-on: ubuntu-latest
-
     steps:
       - uses: actions/checkout@v4
 
@@ -31,9 +31,9 @@ jobs:
         run: ./scripts/lint
 
   test:
+    timeout-minutes: 10
     name: test
     runs-on: ubuntu-latest
-
     steps:
       - uses: actions/checkout@v4
 
@@ -52,8 +52,10 @@ jobs:
         run: ./scripts/test
 
   examples:
+    timeout-minutes: 10
     name: examples
     runs-on: ubuntu-latest
+    if: github.repository == 'openai/openai-python'
 
     steps:
       - uses: actions/checkout@v4
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index ba5cbfb627..df3aaa16a7 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.70.0"
+  ".": "1.76.0"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index f6a90d2438..d92408173b 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,4 +1,4 @@
-configured_endpoints: 82
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-6663c59193eb95b201e492de17dcbd5e126ba03d18ce66287a3e2c632ca56fe7.yml
-openapi_spec_hash: 7996d2c34cc44fe2ce9ffe93c0ab774e
-config_hash: e25e31d8446b6bc0e3ef7103b6993cce
+configured_endpoints: 97
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-8b68ae6b807dca92e914da1dd9e835a20f69b075e79102a264367fd7fddddb33.yml
+openapi_spec_hash: b6ade5b1a6327339e6669e1134de2d03
+config_hash: b597cd9a31e9e5ec709e2eefb4c54122
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8954d86571..73d8f2bf6e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,123 @@
 # Changelog
 
+## 1.76.0 (2025-04-23)
+
+Full Changelog: [v1.75.0...v1.76.0](https://github.com/openai/openai-python/compare/v1.75.0...v1.76.0)
+
+### Features
+
+* **api:** adding new image model support ([74d7692](https://github.com/openai/openai-python/commit/74d7692e94c9dca96db8793809d75631c22dbb87))
+
+
+### Bug Fixes
+
+* **pydantic v1:** more robust `ModelField.annotation` check ([#2163](https://github.com/openai/openai-python/issues/2163)) ([7351b12](https://github.com/openai/openai-python/commit/7351b12bc981f56632b92342d9ef26f6fb28d540))
+* **pydantic v1:** more robust ModelField.annotation check ([eba7856](https://github.com/openai/openai-python/commit/eba7856db55afb8cb44376a0248587549f7bc65f))
+
+
+### Chores
+
+* **ci:** add timeout thresholds for CI jobs ([0997211](https://github.com/openai/openai-python/commit/09972119df5dd4c7c8db137c721364787e22d4c6))
+* **internal:** fix list file params ([da2113c](https://github.com/openai/openai-python/commit/da2113c60b50b4438459325fcd38d55df3f63d8e))
+* **internal:** import reformatting ([b425fb9](https://github.com/openai/openai-python/commit/b425fb906f62550c3669b09b9d8575f3d4d8496b))
+* **internal:** minor formatting changes ([aed1d76](https://github.com/openai/openai-python/commit/aed1d767898324cf90328db329e04e89a77579c3))
+* **internal:** refactor retries to not use recursion ([8cb8cfa](https://github.com/openai/openai-python/commit/8cb8cfab48a4fed70a756ce50036e7e56e1f9f87))
+* **internal:** update models test ([870ad4e](https://github.com/openai/openai-python/commit/870ad4ed3a284d75f44b825503750129284c7906))
+* update completion parse signature ([a44016c](https://github.com/openai/openai-python/commit/a44016c64cdefe404e97592808ed3c25411ab27b))
+
+## 1.75.0 (2025-04-16)
+
+Full Changelog: [v1.74.1...v1.75.0](https://github.com/openai/openai-python/compare/v1.74.1...v1.75.0)
+
+### Features
+
+* **api:** add o3 and o4-mini model IDs ([4bacbd5](https://github.com/openai/openai-python/commit/4bacbd5503137e266c127dc643ebae496cb4f158))
+
+## 1.74.1 (2025-04-16)
+
+Full Changelog: [v1.74.0...v1.74.1](https://github.com/openai/openai-python/compare/v1.74.0...v1.74.1)
+
+### Chores
+
+* **internal:** base client updates ([06303b5](https://github.com/openai/openai-python/commit/06303b501f8c17040c495971a4ee79ae340f6f4a))
+* **internal:** bump pyright version ([9fd1c77](https://github.com/openai/openai-python/commit/9fd1c778c3231616bf1331cb1daa86fdfca4cb7f))
+
+## 1.74.0 (2025-04-14)
+
+Full Changelog: [v1.73.0...v1.74.0](https://github.com/openai/openai-python/compare/v1.73.0...v1.74.0)
+
+### Features
+
+* **api:** adding gpt-4.1 family of model IDs ([d4dae55](https://github.com/openai/openai-python/commit/d4dae5553ff3a2879b9ab79a6423661b212421f9))
+
+
+### Bug Fixes
+
+* **chat:** skip azure async filter events ([#2255](https://github.com/openai/openai-python/issues/2255)) ([fd3a38b](https://github.com/openai/openai-python/commit/fd3a38b1ed30af0a9f3302c1cfc6be6b352e65de))
+
+
+### Chores
+
+* **client:** minor internal fixes ([6071ae5](https://github.com/openai/openai-python/commit/6071ae5e8b4faa465afc8d07370737e66901900a))
+* **internal:** update pyright settings ([c8f8beb](https://github.com/openai/openai-python/commit/c8f8bebf852380a224701bc36826291d6387c53d))
+
+## 1.73.0 (2025-04-12)
+
+Full Changelog: [v1.72.0...v1.73.0](https://github.com/openai/openai-python/compare/v1.72.0...v1.73.0)
+
+### Features
+
+* **api:** manual updates ([a3253dd](https://github.com/openai/openai-python/commit/a3253dd798c1eccd9810d4fc593e8c2a568bcf4f))
+
+
+### Bug Fixes
+
+* **perf:** optimize some hot paths ([f79d39f](https://github.com/openai/openai-python/commit/f79d39fbcaea8f366a9e48c06fb1696bab3e607d))
+* **perf:** skip traversing types for NotGiven values ([28d220d](https://github.com/openai/openai-python/commit/28d220de3b4a09d80450d0bcc9b347bbf68f81ec))
+
+
+### Chores
+
+* **internal:** expand CI branch coverage ([#2295](https://github.com/openai/openai-python/issues/2295)) ([0ae783b](https://github.com/openai/openai-python/commit/0ae783b99122975be521365de0b6d2bce46056c9))
+* **internal:** reduce CI branch coverage ([2fb7d42](https://github.com/openai/openai-python/commit/2fb7d425cda679a54aa3262090479fd747363bb4))
+* slight wording improvement in README ([#2291](https://github.com/openai/openai-python/issues/2291)) ([e020759](https://github.com/openai/openai-python/commit/e0207598d16a2a9cb3cb3a8e8e97fa9cfdccd5e8))
+* workaround build errors ([4e10c96](https://github.com/openai/openai-python/commit/4e10c96a483db28dedc2d8c2908765fb7317e049))
+
+## 1.72.0 (2025-04-08)
+
+Full Changelog: [v1.71.0...v1.72.0](https://github.com/openai/openai-python/compare/v1.71.0...v1.72.0)
+
+### Features
+
+* **api:** Add evalapi to sdk ([#2287](https://github.com/openai/openai-python/issues/2287)) ([35262fc](https://github.com/openai/openai-python/commit/35262fcef6ccb7d1f75c9abdfdc68c3dcf87ef53))
+
+
+### Chores
+
+* **internal:** fix examples ([#2288](https://github.com/openai/openai-python/issues/2288)) ([39defd6](https://github.com/openai/openai-python/commit/39defd61e81ea0ec6b898be12e9fb7e621c0e532))
+* **internal:** skip broken test ([#2289](https://github.com/openai/openai-python/issues/2289)) ([e2c9bce](https://github.com/openai/openai-python/commit/e2c9bce1f59686ee053b495d06ea118b4a89e09e))
+* **internal:** slight transform perf improvement ([#2284](https://github.com/openai/openai-python/issues/2284)) ([746174f](https://github.com/openai/openai-python/commit/746174fae7a018ece5dab54fb0b5a15fcdd18f2f))
+* **tests:** improve enum examples ([#2286](https://github.com/openai/openai-python/issues/2286)) ([c9dd81c](https://github.com/openai/openai-python/commit/c9dd81ce0277e8b1f5db5e0a39c4c2bcd9004bcc))
+
+## 1.71.0 (2025-04-07)
+
+Full Changelog: [v1.70.0...v1.71.0](https://github.com/openai/openai-python/compare/v1.70.0...v1.71.0)
+
+### Features
+
+* **api:** manual updates ([bf8b4b6](https://github.com/openai/openai-python/commit/bf8b4b69906bfaea622c9c644270e985d92e2df2))
+* **api:** manual updates ([3e37aa3](https://github.com/openai/openai-python/commit/3e37aa3e151d9738625a1daf75d6243d6fdbe8f2))
+* **api:** manual updates ([dba9b65](https://github.com/openai/openai-python/commit/dba9b656fa5955b6eba8f6910da836a34de8d59d))
+* **api:** manual updates ([f0c463b](https://github.com/openai/openai-python/commit/f0c463b47836666d091b5f616871f1b94646d346))
+
+
+### Chores
+
+* **deps:** allow websockets v15 ([#2281](https://github.com/openai/openai-python/issues/2281)) ([19c619e](https://github.com/openai/openai-python/commit/19c619ea95839129a86c19d5b60133e1ed9f2746))
+* **internal:** only run examples workflow in main repo ([#2282](https://github.com/openai/openai-python/issues/2282)) ([c3e0927](https://github.com/openai/openai-python/commit/c3e0927d3fbbb9f753ba12adfa682a4235ba530a))
+* **internal:** remove trailing character ([#2277](https://github.com/openai/openai-python/issues/2277)) ([5a21a2d](https://github.com/openai/openai-python/commit/5a21a2d7994e39bb0c86271eeb807983a9ae874a))
+* Remove deprecated/unused remote spec feature ([23f76eb](https://github.com/openai/openai-python/commit/23f76eb0b9ddf12bcb04a6ad3f3ec5e956d2863f))
+
 ## 1.70.0 (2025-03-31)
 
 Full Changelog: [v1.69.0...v1.70.0](https://github.com/openai/openai-python/compare/v1.69.0...v1.70.0)
diff --git a/README.md b/README.md
index c52bffbb5f..f7e0eb6467 100644
--- a/README.md
+++ b/README.md
@@ -351,7 +351,7 @@ response = client.chat.responses.create(
 
 ## File uploads
 
-Request parameters that correspond to file uploads can be passed as `bytes`, a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance or a tuple of `(filename, contents, media type)`.
+Request parameters that correspond to file uploads can be passed as `bytes`, or a [`PathLike`](https://docs.python.org/3/library/os.html#os.PathLike) instance or a tuple of `(filename, contents, media type)`.
 
 ```python
 from pathlib import Path
diff --git a/api.md b/api.md
index a5f81c624c..d04c76960e 100644
--- a/api.md
+++ b/api.md
@@ -259,6 +259,26 @@ Methods:
 
 - <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/checkpoints">client.fine_tuning.jobs.checkpoints.<a href="https://github.com/src/openai/resources/fine_tuning/jobs/checkpoints.py">list</a>(fine_tuning_job_id, \*\*<a href="https://github.com/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py">SyncCursorPage[FineTuningJobCheckpoint]</a></code>
 
+## Checkpoints
+
+### Permissions
+
+Types:
+
+```python
+from openai.types.fine_tuning.checkpoints import (
+    PermissionCreateResponse,
+    PermissionRetrieveResponse,
+    PermissionDeleteResponse,
+)
+```
+
+Methods:
+
+- <code title="post /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions">client.fine_tuning.checkpoints.permissions.<a href="https://github.com/src/openai/resources/fine_tuning/checkpoints/permissions.py">create</a>(fine_tuned_model_checkpoint, \*\*<a href="https://github.com/src/openai/types/fine_tuning/checkpoints/permission_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/fine_tuning/checkpoints/permission_create_response.py">SyncPage[PermissionCreateResponse]</a></code>
+- <code title="get /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions">client.fine_tuning.checkpoints.permissions.<a href="https://github.com/src/openai/resources/fine_tuning/checkpoints/permissions.py">retrieve</a>(fine_tuned_model_checkpoint, \*\*<a href="https://github.com/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py">params</a>) -> <a href="https://github.com/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py">PermissionRetrieveResponse</a></code>
+- <code title="delete /fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}">client.fine_tuning.checkpoints.permissions.<a href="https://github.com/src/openai/resources/fine_tuning/checkpoints/permissions.py">delete</a>(permission_id, \*, fine_tuned_model_checkpoint) -> <a href="https://github.com/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py">PermissionDeleteResponse</a></code>
+
 # VectorStores
 
 Types:
@@ -669,6 +689,10 @@ from openai.types.responses import (
     ResponseOutputRefusal,
     ResponseOutputText,
     ResponseReasoningItem,
+    ResponseReasoningSummaryPartAddedEvent,
+    ResponseReasoningSummaryPartDoneEvent,
+    ResponseReasoningSummaryTextDeltaEvent,
+    ResponseReasoningSummaryTextDoneEvent,
     ResponseRefusalDeltaEvent,
     ResponseRefusalDoneEvent,
     ResponseStatus,
@@ -706,3 +730,68 @@ from openai.types.responses import ResponseItemList
 Methods:
 
 - <code title="get /responses/{response_id}/input_items">client.responses.input_items.<a href="https://github.com/src/openai/resources/responses/input_items.py">list</a>(response_id, \*\*<a href="https://github.com/src/openai/types/responses/input_item_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/responses/response_item.py">SyncCursorPage[ResponseItem]</a></code>
+
+# Evals
+
+Types:
+
+```python
+from openai.types import (
+    EvalCustomDataSourceConfig,
+    EvalLabelModelGrader,
+    EvalStoredCompletionsDataSourceConfig,
+    EvalStringCheckGrader,
+    EvalTextSimilarityGrader,
+    EvalCreateResponse,
+    EvalRetrieveResponse,
+    EvalUpdateResponse,
+    EvalListResponse,
+    EvalDeleteResponse,
+)
+```
+
+Methods:
+
+- <code title="post /evals">client.evals.<a href="https://github.com/src/openai/resources/evals/evals.py">create</a>(\*\*<a href="https://github.com/src/openai/types/eval_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/eval_create_response.py">EvalCreateResponse</a></code>
+- <code title="get /evals/{eval_id}">client.evals.<a href="https://github.com/src/openai/resources/evals/evals.py">retrieve</a>(eval_id) -> <a href="https://github.com/src/openai/types/eval_retrieve_response.py">EvalRetrieveResponse</a></code>
+- <code title="post /evals/{eval_id}">client.evals.<a href="https://github.com/src/openai/resources/evals/evals.py">update</a>(eval_id, \*\*<a href="https://github.com/src/openai/types/eval_update_params.py">params</a>) -> <a href="https://github.com/src/openai/types/eval_update_response.py">EvalUpdateResponse</a></code>
+- <code title="get /evals">client.evals.<a href="https://github.com/src/openai/resources/evals/evals.py">list</a>(\*\*<a href="https://github.com/src/openai/types/eval_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/eval_list_response.py">SyncCursorPage[EvalListResponse]</a></code>
+- <code title="delete /evals/{eval_id}">client.evals.<a href="https://github.com/src/openai/resources/evals/evals.py">delete</a>(eval_id) -> <a href="https://github.com/src/openai/types/eval_delete_response.py">EvalDeleteResponse</a></code>
+
+## Runs
+
+Types:
+
+```python
+from openai.types.evals import (
+    CreateEvalCompletionsRunDataSource,
+    CreateEvalJSONLRunDataSource,
+    EvalAPIError,
+    RunCreateResponse,
+    RunRetrieveResponse,
+    RunListResponse,
+    RunDeleteResponse,
+    RunCancelResponse,
+)
+```
+
+Methods:
+
+- <code title="post /evals/{eval_id}/runs">client.evals.runs.<a href="https://github.com/src/openai/resources/evals/runs/runs.py">create</a>(eval_id, \*\*<a href="https://github.com/src/openai/types/evals/run_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/evals/run_create_response.py">RunCreateResponse</a></code>
+- <code title="get /evals/{eval_id}/runs/{run_id}">client.evals.runs.<a href="https://github.com/src/openai/resources/evals/runs/runs.py">retrieve</a>(run_id, \*, eval_id) -> <a href="https://github.com/src/openai/types/evals/run_retrieve_response.py">RunRetrieveResponse</a></code>
+- <code title="get /evals/{eval_id}/runs">client.evals.runs.<a href="https://github.com/src/openai/resources/evals/runs/runs.py">list</a>(eval_id, \*\*<a href="https://github.com/src/openai/types/evals/run_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/evals/run_list_response.py">SyncCursorPage[RunListResponse]</a></code>
+- <code title="delete /evals/{eval_id}/runs/{run_id}">client.evals.runs.<a href="https://github.com/src/openai/resources/evals/runs/runs.py">delete</a>(run_id, \*, eval_id) -> <a href="https://github.com/src/openai/types/evals/run_delete_response.py">RunDeleteResponse</a></code>
+- <code title="post /evals/{eval_id}/runs/{run_id}">client.evals.runs.<a href="https://github.com/src/openai/resources/evals/runs/runs.py">cancel</a>(run_id, \*, eval_id) -> <a href="https://github.com/src/openai/types/evals/run_cancel_response.py">RunCancelResponse</a></code>
+
+### OutputItems
+
+Types:
+
+```python
+from openai.types.evals.runs import OutputItemRetrieveResponse, OutputItemListResponse
+```
+
+Methods:
+
+- <code title="get /evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}">client.evals.runs.output_items.<a href="https://github.com/src/openai/resources/evals/runs/output_items.py">retrieve</a>(output_item_id, \*, eval_id, run_id) -> <a href="https://github.com/src/openai/types/evals/runs/output_item_retrieve_response.py">OutputItemRetrieveResponse</a></code>
+- <code title="get /evals/{eval_id}/runs/{run_id}/output_items">client.evals.runs.output_items.<a href="https://github.com/src/openai/resources/evals/runs/output_items.py">list</a>(run_id, \*, eval_id, \*\*<a href="https://github.com/src/openai/types/evals/runs/output_item_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/evals/runs/output_item_list_response.py">SyncCursorPage[OutputItemListResponse]</a></code>
diff --git a/pyproject.toml b/pyproject.toml
index 296d02e40b..947e082f78 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.70.0"
+version = "1.76.0"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
@@ -43,7 +43,7 @@ Repository = "https://github.com/openai/openai-python"
 openai = "openai.cli:main"
 
 [project.optional-dependencies]
-realtime = ["websockets >= 13, < 15"]
+realtime = ["websockets >= 13, < 16"]
 datalib = ["numpy >= 1", "pandas >= 1.2.3", "pandas-stubs >= 1.1.0.11"]
 voice_helpers = ["sounddevice>=0.5.1", "numpy>=2.0.2"]
 
@@ -51,7 +51,7 @@ voice_helpers = ["sounddevice>=0.5.1", "numpy>=2.0.2"]
 managed = true
 # version pins are in requirements-dev.lock
 dev-dependencies = [
-    "pyright>=1.1.359",
+    "pyright==1.1.399",
     "mypy",
     "respx",
     "pytest",
@@ -166,6 +166,7 @@ exclude = [
 ]
 
 reportImplicitOverride = true
+reportOverlappingOverload = false
 
 reportImportCycles = false
 reportPrivateUsage = false
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 0755ddb3c5..9875a2b860 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -126,7 +126,7 @@ pygments==2.18.0
     # via rich
 pyjwt==2.8.0
     # via msal
-pyright==1.1.392.post0
+pyright==1.1.399
 pytest==8.3.3
     # via pytest-asyncio
 pytest-asyncio==0.24.0
@@ -188,7 +188,7 @@ urllib3==2.2.1
     # via requests
 virtualenv==20.24.5
     # via nox
-websockets==14.2
+websockets==15.0.1
     # via openai
 zipp==3.17.0
     # via importlib-metadata
diff --git a/requirements.lock b/requirements.lock
index fa88e26c0f..467abc6e90 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -70,5 +70,5 @@ typing-extensions==4.12.2
     # via pydantic-core
 tzdata==2024.1
     # via pandas
-websockets==14.2
+websockets==15.0.1
     # via openai
diff --git a/src/openai/__init__.py b/src/openai/__init__.py
index 7ce6df0817..9e97098bb0 100644
--- a/src/openai/__init__.py
+++ b/src/openai/__init__.py
@@ -352,6 +352,7 @@ def _reset_client() -> None:  # type: ignore[reportUnusedFunction]
     beta as beta,
     chat as chat,
     audio as audio,
+    evals as evals,
     files as files,
     images as images,
     models as models,
diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py
index f31e5af54b..a0f9cce7d8 100644
--- a/src/openai/_base_client.py
+++ b/src/openai/_base_client.py
@@ -100,7 +100,11 @@
 _AsyncStreamT = TypeVar("_AsyncStreamT", bound=AsyncStream[Any])
 
 if TYPE_CHECKING:
-    from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
+    from httpx._config import (
+        DEFAULT_TIMEOUT_CONFIG,  # pyright: ignore[reportPrivateImportUsage]
+    )
+
+    HTTPX_DEFAULT_TIMEOUT = DEFAULT_TIMEOUT_CONFIG
 else:
     try:
         from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
@@ -117,6 +121,7 @@ class PageInfo:
 
     url: URL | NotGiven
     params: Query | NotGiven
+    json: Body | NotGiven
 
     @overload
     def __init__(
@@ -132,19 +137,30 @@ def __init__(
         params: Query,
     ) -> None: ...
 
+    @overload
+    def __init__(
+        self,
+        *,
+        json: Body,
+    ) -> None: ...
+
     def __init__(
         self,
         *,
         url: URL | NotGiven = NOT_GIVEN,
+        json: Body | NotGiven = NOT_GIVEN,
         params: Query | NotGiven = NOT_GIVEN,
     ) -> None:
         self.url = url
+        self.json = json
         self.params = params
 
     @override
     def __repr__(self) -> str:
         if self.url:
             return f"{self.__class__.__name__}(url={self.url})"
+        if self.json:
+            return f"{self.__class__.__name__}(json={self.json})"
         return f"{self.__class__.__name__}(params={self.params})"
 
 
@@ -193,6 +209,19 @@ def _info_to_options(self, info: PageInfo) -> FinalRequestOptions:
             options.url = str(url)
             return options
 
+        if not isinstance(info.json, NotGiven):
+            if not is_mapping(info.json):
+                raise TypeError("Pagination is only supported with mappings")
+
+            if not options.json_data:
+                options.json_data = {**info.json}
+            else:
+                if not is_mapping(options.json_data):
+                    raise TypeError("Pagination is only supported with mappings")
+
+                options.json_data = {**options.json_data, **info.json}
+            return options
+
         raise ValueError("Unexpected PageInfo state")
 
 
@@ -410,8 +439,8 @@ def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0
         headers = httpx.Headers(headers_dict)
 
         idempotency_header = self._idempotency_header
-        if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
-            headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
+        if idempotency_header and options.idempotency_key and idempotency_header not in headers:
+            headers[idempotency_header] = options.idempotency_key
 
         # Don't set these headers if they were already set or removed by the caller. We check
         # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
@@ -875,7 +904,6 @@ def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: Literal[True],
         stream_cls: Type[_StreamT],
@@ -886,7 +914,6 @@ def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: Literal[False] = False,
     ) -> ResponseT: ...
@@ -896,7 +923,6 @@ def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: bool = False,
         stream_cls: Type[_StreamT] | None = None,
@@ -906,122 +932,110 @@ def request(
         self,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
         *,
         stream: bool = False,
         stream_cls: type[_StreamT] | None = None,
     ) -> ResponseT | _StreamT:
-        if remaining_retries is not None:
-            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
-        else:
-            retries_taken = 0
-
-        return self._request(
-            cast_to=cast_to,
-            options=options,
-            stream=stream,
-            stream_cls=stream_cls,
-            retries_taken=retries_taken,
-        )
+        cast_to = self._maybe_override_cast_to(cast_to, options)
 
-    def _request(
-        self,
-        *,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        retries_taken: int,
-        stream: bool,
-        stream_cls: type[_StreamT] | None,
-    ) -> ResponseT | _StreamT:
         # create a copy of the options we were given so that if the
         # options are mutated later & we then retry, the retries are
         # given the original options
         input_options = model_copy(options)
+        if input_options.idempotency_key is None and input_options.method.lower() != "get":
+            # ensure the idempotency key is reused between requests
+            input_options.idempotency_key = self._idempotency_key()
 
-        cast_to = self._maybe_override_cast_to(cast_to, options)
-        options = self._prepare_options(options)
-
-        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
-        request = self._build_request(options, retries_taken=retries_taken)
-        self._prepare_request(request)
-
-        kwargs: HttpxSendArgs = {}
-        if self.custom_auth is not None:
-            kwargs["auth"] = self.custom_auth
+        response: httpx.Response | None = None
+        max_retries = input_options.get_max_retries(self.max_retries)
 
-        log.debug("Sending HTTP Request: %s %s", request.method, request.url)
+        retries_taken = 0
+        for retries_taken in range(max_retries + 1):
+            options = model_copy(input_options)
+            options = self._prepare_options(options)
 
-        try:
-            response = self._client.send(
-                request,
-                stream=stream or self._should_stream_response_body(request=request),
-                **kwargs,
-            )
-        except httpx.TimeoutException as err:
-            log.debug("Encountered httpx.TimeoutException", exc_info=True)
+            remaining_retries = max_retries - retries_taken
+            request = self._build_request(options, retries_taken=retries_taken)
+            self._prepare_request(request)
 
-            if remaining_retries > 0:
-                return self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
+            kwargs: HttpxSendArgs = {}
+            if self.custom_auth is not None:
+                kwargs["auth"] = self.custom_auth
 
-            log.debug("Raising timeout error")
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            log.debug("Encountered Exception", exc_info=True)
+            log.debug("Sending HTTP Request: %s %s", request.method, request.url)
 
-            if remaining_retries > 0:
-                return self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
+            response = None
+            try:
+                response = self._client.send(
+                    request,
+                    stream=stream or self._should_stream_response_body(request=request),
+                    **kwargs,
                 )
+            except httpx.TimeoutException as err:
+                log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+                if remaining_retries > 0:
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising timeout error")
+                raise APITimeoutError(request=request) from err
+            except Exception as err:
+                log.debug("Encountered Exception", exc_info=True)
+
+                if remaining_retries > 0:
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising connection error")
+                raise APIConnectionError(request=request) from err
+
+            log.debug(
+                'HTTP Response: %s %s "%i %s" %s',
+                request.method,
+                request.url,
+                response.status_code,
+                response.reason_phrase,
+                response.headers,
+            )
+            log.debug("request_id: %s", response.headers.get("x-request-id"))
 
-            log.debug("Raising connection error")
-            raise APIConnectionError(request=request) from err
-
-        log.debug(
-            'HTTP Response: %s %s "%i %s" %s',
-            request.method,
-            request.url,
-            response.status_code,
-            response.reason_phrase,
-            response.headers,
-        )
-        log.debug("request_id: %s", response.headers.get("x-request-id"))
+            try:
+                response.raise_for_status()
+            except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+                log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+                if remaining_retries > 0 and self._should_retry(err.response):
+                    err.response.close()
+                    self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=response,
+                    )
+                    continue
 
-        try:
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
-            if remaining_retries > 0 and self._should_retry(err.response):
-                err.response.close()
-                return self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    response_headers=err.response.headers,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                )
+                # If the response is streamed then we need to explicitly read the response
+                # to completion before attempting to access the response text.
+                if not err.response.is_closed:
+                    err.response.read()
 
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            if not err.response.is_closed:
-                err.response.read()
+                log.debug("Re-raising status error")
+                raise self._make_status_error_from_response(err.response) from None
 
-            log.debug("Re-raising status error")
-            raise self._make_status_error_from_response(err.response) from None
+            break
 
+        assert response is not None, "could not resolve response (should never happen)"
         return self._process_response(
             cast_to=cast_to,
             options=options,
@@ -1031,37 +1045,20 @@ def _request(
             retries_taken=retries_taken,
         )
 
-    def _retry_request(
-        self,
-        options: FinalRequestOptions,
-        cast_to: Type[ResponseT],
-        *,
-        retries_taken: int,
-        response_headers: httpx.Headers | None,
-        stream: bool,
-        stream_cls: type[_StreamT] | None,
-    ) -> ResponseT | _StreamT:
-        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+    def _sleep_for_retry(
+        self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+    ) -> None:
+        remaining_retries = max_retries - retries_taken
         if remaining_retries == 1:
             log.debug("1 retry left")
         else:
             log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
-        # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
-        # different thread if necessary.
         time.sleep(timeout)
 
-        return self._request(
-            options=options,
-            cast_to=cast_to,
-            retries_taken=retries_taken + 1,
-            stream=stream,
-            stream_cls=stream_cls,
-        )
-
     def _process_response(
         self,
         *,
@@ -1419,7 +1416,6 @@ async def request(
         options: FinalRequestOptions,
         *,
         stream: Literal[False] = False,
-        remaining_retries: Optional[int] = None,
     ) -> ResponseT: ...
 
     @overload
@@ -1430,7 +1426,6 @@ async def request(
         *,
         stream: Literal[True],
         stream_cls: type[_AsyncStreamT],
-        remaining_retries: Optional[int] = None,
     ) -> _AsyncStreamT: ...
 
     @overload
@@ -1441,7 +1436,6 @@ async def request(
         *,
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None = None,
-        remaining_retries: Optional[int] = None,
     ) -> ResponseT | _AsyncStreamT: ...
 
     async def request(
@@ -1451,116 +1445,112 @@ async def request(
         *,
         stream: bool = False,
         stream_cls: type[_AsyncStreamT] | None = None,
-        remaining_retries: Optional[int] = None,
-    ) -> ResponseT | _AsyncStreamT:
-        if remaining_retries is not None:
-            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
-        else:
-            retries_taken = 0
-
-        return await self._request(
-            cast_to=cast_to,
-            options=options,
-            stream=stream,
-            stream_cls=stream_cls,
-            retries_taken=retries_taken,
-        )
-
-    async def _request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        *,
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None,
-        retries_taken: int,
     ) -> ResponseT | _AsyncStreamT:
         if self._platform is None:
             # `get_platform` can make blocking IO calls so we
             # execute it earlier while we are in an async context
             self._platform = await asyncify(get_platform)()
 
+        cast_to = self._maybe_override_cast_to(cast_to, options)
+
         # create a copy of the options we were given so that if the
         # options are mutated later & we then retry, the retries are
         # given the original options
         input_options = model_copy(options)
+        if input_options.idempotency_key is None and input_options.method.lower() != "get":
+            # ensure the idempotency key is reused between requests
+            input_options.idempotency_key = self._idempotency_key()
 
-        cast_to = self._maybe_override_cast_to(cast_to, options)
-        options = await self._prepare_options(options)
+        response: httpx.Response | None = None
+        max_retries = input_options.get_max_retries(self.max_retries)
 
-        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
-        request = self._build_request(options, retries_taken=retries_taken)
-        await self._prepare_request(request)
+        retries_taken = 0
+        for retries_taken in range(max_retries + 1):
+            options = model_copy(input_options)
+            options = await self._prepare_options(options)
 
-        kwargs: HttpxSendArgs = {}
-        if self.custom_auth is not None:
-            kwargs["auth"] = self.custom_auth
+            remaining_retries = max_retries - retries_taken
+            request = self._build_request(options, retries_taken=retries_taken)
+            await self._prepare_request(request)
 
-        try:
-            response = await self._client.send(
-                request,
-                stream=stream or self._should_stream_response_body(request=request),
-                **kwargs,
-            )
-        except httpx.TimeoutException as err:
-            log.debug("Encountered httpx.TimeoutException", exc_info=True)
-
-            if remaining_retries > 0:
-                return await self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
+            kwargs: HttpxSendArgs = {}
+            if self.custom_auth is not None:
+                kwargs["auth"] = self.custom_auth
 
-            log.debug("Raising timeout error")
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            log.debug("Encountered Exception", exc_info=True)
+            log.debug("Sending HTTP Request: %s %s", request.method, request.url)
 
-            if remaining_retries > 0:
-                return await self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
+            response = None
+            try:
+                response = await self._client.send(
+                    request,
+                    stream=stream or self._should_stream_response_body(request=request),
+                    **kwargs,
                 )
+            except httpx.TimeoutException as err:
+                log.debug("Encountered httpx.TimeoutException", exc_info=True)
+
+                if remaining_retries > 0:
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising timeout error")
+                raise APITimeoutError(request=request) from err
+            except Exception as err:
+                log.debug("Encountered Exception", exc_info=True)
+
+                if remaining_retries > 0:
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=None,
+                    )
+                    continue
+
+                log.debug("Raising connection error")
+                raise APIConnectionError(request=request) from err
+
+            log.debug(
+                'HTTP Response: %s %s "%i %s" %s',
+                request.method,
+                request.url,
+                response.status_code,
+                response.reason_phrase,
+                response.headers,
+            )
+            log.debug("request_id: %s", response.headers.get("x-request-id"))
 
-            log.debug("Raising connection error")
-            raise APIConnectionError(request=request) from err
+            try:
+                response.raise_for_status()
+            except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
+                log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
+
+                if remaining_retries > 0 and self._should_retry(err.response):
+                    await err.response.aclose()
+                    await self._sleep_for_retry(
+                        retries_taken=retries_taken,
+                        max_retries=max_retries,
+                        options=input_options,
+                        response=response,
+                    )
+                    continue
 
-        log.debug(
-            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
-        )
+                # If the response is streamed then we need to explicitly read the response
+                # to completion before attempting to access the response text.
+                if not err.response.is_closed:
+                    await err.response.aread()
 
-        try:
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
-            if remaining_retries > 0 and self._should_retry(err.response):
-                await err.response.aclose()
-                return await self._retry_request(
-                    input_options,
-                    cast_to,
-                    retries_taken=retries_taken,
-                    response_headers=err.response.headers,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                )
+                log.debug("Re-raising status error")
+                raise self._make_status_error_from_response(err.response) from None
 
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            if not err.response.is_closed:
-                await err.response.aread()
-
-            log.debug("Re-raising status error")
-            raise self._make_status_error_from_response(err.response) from None
+            break
 
+        assert response is not None, "could not resolve response (should never happen)"
         return await self._process_response(
             cast_to=cast_to,
             options=options,
@@ -1570,35 +1560,20 @@ async def _request(
             retries_taken=retries_taken,
         )
 
-    async def _retry_request(
-        self,
-        options: FinalRequestOptions,
-        cast_to: Type[ResponseT],
-        *,
-        retries_taken: int,
-        response_headers: httpx.Headers | None,
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None,
-    ) -> ResponseT | _AsyncStreamT:
-        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+    async def _sleep_for_retry(
+        self, *, retries_taken: int, max_retries: int, options: FinalRequestOptions, response: httpx.Response | None
+    ) -> None:
+        remaining_retries = max_retries - retries_taken
         if remaining_retries == 1:
             log.debug("1 retry left")
         else:
             log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response.headers if response else None)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
         await anyio.sleep(timeout)
 
-        return await self._request(
-            options=options,
-            cast_to=cast_to,
-            retries_taken=retries_taken + 1,
-            stream=stream,
-            stream_cls=stream_cls,
-        )
-
     async def _process_response(
         self,
         *,
diff --git a/src/openai/_client.py b/src/openai/_client.py
index 18d96da9a3..3aca6cb124 100644
--- a/src/openai/_client.py
+++ b/src/openai/_client.py
@@ -36,6 +36,7 @@
 from .resources.beta import beta
 from .resources.chat import chat
 from .resources.audio import audio
+from .resources.evals import evals
 from .resources.uploads import uploads
 from .resources.responses import responses
 from .resources.fine_tuning import fine_tuning
@@ -59,6 +60,7 @@ class OpenAI(SyncAPIClient):
     batches: batches.Batches
     uploads: uploads.Uploads
     responses: responses.Responses
+    evals: evals.Evals
     with_raw_response: OpenAIWithRawResponse
     with_streaming_response: OpenAIWithStreamedResponse
 
@@ -158,6 +160,7 @@ def __init__(
         self.batches = batches.Batches(self)
         self.uploads = uploads.Uploads(self)
         self.responses = responses.Responses(self)
+        self.evals = evals.Evals(self)
         self.with_raw_response = OpenAIWithRawResponse(self)
         self.with_streaming_response = OpenAIWithStreamedResponse(self)
 
@@ -290,6 +293,7 @@ class AsyncOpenAI(AsyncAPIClient):
     batches: batches.AsyncBatches
     uploads: uploads.AsyncUploads
     responses: responses.AsyncResponses
+    evals: evals.AsyncEvals
     with_raw_response: AsyncOpenAIWithRawResponse
     with_streaming_response: AsyncOpenAIWithStreamedResponse
 
@@ -389,6 +393,7 @@ def __init__(
         self.batches = batches.AsyncBatches(self)
         self.uploads = uploads.AsyncUploads(self)
         self.responses = responses.AsyncResponses(self)
+        self.evals = evals.AsyncEvals(self)
         self.with_raw_response = AsyncOpenAIWithRawResponse(self)
         self.with_streaming_response = AsyncOpenAIWithStreamedResponse(self)
 
@@ -522,6 +527,7 @@ def __init__(self, client: OpenAI) -> None:
         self.batches = batches.BatchesWithRawResponse(client.batches)
         self.uploads = uploads.UploadsWithRawResponse(client.uploads)
         self.responses = responses.ResponsesWithRawResponse(client.responses)
+        self.evals = evals.EvalsWithRawResponse(client.evals)
 
 
 class AsyncOpenAIWithRawResponse:
@@ -540,6 +546,7 @@ def __init__(self, client: AsyncOpenAI) -> None:
         self.batches = batches.AsyncBatchesWithRawResponse(client.batches)
         self.uploads = uploads.AsyncUploadsWithRawResponse(client.uploads)
         self.responses = responses.AsyncResponsesWithRawResponse(client.responses)
+        self.evals = evals.AsyncEvalsWithRawResponse(client.evals)
 
 
 class OpenAIWithStreamedResponse:
@@ -558,6 +565,7 @@ def __init__(self, client: OpenAI) -> None:
         self.batches = batches.BatchesWithStreamingResponse(client.batches)
         self.uploads = uploads.UploadsWithStreamingResponse(client.uploads)
         self.responses = responses.ResponsesWithStreamingResponse(client.responses)
+        self.evals = evals.EvalsWithStreamingResponse(client.evals)
 
 
 class AsyncOpenAIWithStreamedResponse:
@@ -576,6 +584,7 @@ def __init__(self, client: AsyncOpenAI) -> None:
         self.batches = batches.AsyncBatchesWithStreamingResponse(client.batches)
         self.uploads = uploads.AsyncUploadsWithStreamingResponse(client.uploads)
         self.responses = responses.AsyncResponsesWithStreamingResponse(client.responses)
+        self.evals = evals.AsyncEvalsWithStreamingResponse(client.evals)
 
 
 Client = OpenAI
diff --git a/src/openai/_models.py b/src/openai/_models.py
index fc4f201e4e..e2fce49250 100644
--- a/src/openai/_models.py
+++ b/src/openai/_models.py
@@ -20,7 +20,6 @@
 )
 
 import pydantic
-import pydantic.generics
 from pydantic.fields import FieldInfo
 
 from ._types import (
@@ -652,8 +651,8 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any,
                 # Note: if one variant defines an alias then they all should
                 discriminator_alias = field_info.alias
 
-                if field_info.annotation and is_literal_type(field_info.annotation):
-                    for entry in get_args(field_info.annotation):
+                if (annotation := getattr(field_info, "annotation", None)) and is_literal_type(annotation):
+                    for entry in get_args(annotation):
                         if isinstance(entry, str):
                             mapping[entry] = variant
 
diff --git a/src/openai/_module_client.py b/src/openai/_module_client.py
index e7d2657860..cf12f7a31e 100644
--- a/src/openai/_module_client.py
+++ b/src/openai/_module_client.py
@@ -30,6 +30,12 @@ def __load__(self) -> resources.Audio:
         return _load_client().audio
 
 
+class EvalsProxy(LazyProxy[resources.Evals]):
+    @override
+    def __load__(self) -> resources.Evals:
+        return _load_client().evals
+
+
 class ImagesProxy(LazyProxy[resources.Images]):
     @override
     def __load__(self) -> resources.Images:
@@ -94,6 +100,7 @@ def __load__(self) -> resources.VectorStores:
 beta: resources.Beta = BetaProxy().__as_proxied__()
 files: resources.Files = FilesProxy().__as_proxied__()
 audio: resources.Audio = AudioProxy().__as_proxied__()
+evals: resources.Evals = EvalsProxy().__as_proxied__()
 images: resources.Images = ImagesProxy().__as_proxied__()
 models: resources.Models = ModelsProxy().__as_proxied__()
 batches: resources.Batches = BatchesProxy().__as_proxied__()
diff --git a/src/openai/_utils/_transform.py b/src/openai/_utils/_transform.py
index 7ac2e17fbb..b0cc20a735 100644
--- a/src/openai/_utils/_transform.py
+++ b/src/openai/_utils/_transform.py
@@ -5,13 +5,15 @@
 import pathlib
 from typing import Any, Mapping, TypeVar, cast
 from datetime import date, datetime
-from typing_extensions import Literal, get_args, override, get_type_hints
+from typing_extensions import Literal, get_args, override, get_type_hints as _get_type_hints
 
 import anyio
 import pydantic
 
 from ._utils import (
     is_list,
+    is_given,
+    lru_cache,
     is_mapping,
     is_iterable,
 )
@@ -108,6 +110,7 @@ class Params(TypedDict, total=False):
     return cast(_T, transformed)
 
 
+@lru_cache(maxsize=8096)
 def _get_annotated_type(type_: type) -> type | None:
     """If the given type is an `Annotated` type then it is returned, if not `None` is returned.
 
@@ -142,6 +145,10 @@ def _maybe_transform_key(key: str, type_: type) -> str:
     return key
 
 
+def _no_transform_needed(annotation: type) -> bool:
+    return annotation == float or annotation == int
+
+
 def _transform_recursive(
     data: object,
     *,
@@ -184,6 +191,15 @@ def _transform_recursive(
             return cast(object, data)
 
         inner_type = extract_type_arg(stripped_type, 0)
+        if _no_transform_needed(inner_type):
+            # for some types there is no need to transform anything, so we can get a small
+            # perf boost from skipping that work.
+            #
+            # but we still need to convert to a list to ensure the data is json-serializable
+            if is_list(data):
+                return data
+            return list(data)
+
         return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
     if is_union_type(stripped_type):
@@ -245,6 +261,11 @@ def _transform_typeddict(
     result: dict[str, object] = {}
     annotations = get_type_hints(expected_type, include_extras=True)
     for key, value in data.items():
+        if not is_given(value):
+            # we don't need to include `NotGiven` values here as they'll
+            # be stripped out before the request is sent anyway
+            continue
+
         type_ = annotations.get(key)
         if type_ is None:
             # we do not have a type annotation for this field, leave it as is
@@ -332,6 +353,15 @@ async def _async_transform_recursive(
             return cast(object, data)
 
         inner_type = extract_type_arg(stripped_type, 0)
+        if _no_transform_needed(inner_type):
+            # for some types there is no need to transform anything, so we can get a small
+            # perf boost from skipping that work.
+            #
+            # but we still need to convert to a list to ensure the data is json-serializable
+            if is_list(data):
+                return data
+            return list(data)
+
         return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
     if is_union_type(stripped_type):
@@ -393,6 +423,11 @@ async def _async_transform_typeddict(
     result: dict[str, object] = {}
     annotations = get_type_hints(expected_type, include_extras=True)
     for key, value in data.items():
+        if not is_given(value):
+            # we don't need to include `NotGiven` values here as they'll
+            # be stripped out before the request is sent anyway
+            continue
+
         type_ = annotations.get(key)
         if type_ is None:
             # we do not have a type annotation for this field, leave it as is
@@ -400,3 +435,13 @@ async def _async_transform_typeddict(
         else:
             result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_)
     return result
+
+
+@lru_cache(maxsize=8096)
+def get_type_hints(
+    obj: Any,
+    globalns: dict[str, Any] | None = None,
+    localns: Mapping[str, Any] | None = None,
+    include_extras: bool = False,
+) -> dict[str, Any]:
+    return _get_type_hints(obj, globalns=globalns, localns=localns, include_extras=include_extras)
diff --git a/src/openai/_utils/_typing.py b/src/openai/_utils/_typing.py
index 278749b147..1bac9542e2 100644
--- a/src/openai/_utils/_typing.py
+++ b/src/openai/_utils/_typing.py
@@ -13,6 +13,7 @@
     get_origin,
 )
 
+from ._utils import lru_cache
 from .._types import InheritsGeneric
 from .._compat import is_union as _is_union
 
@@ -66,6 +67,7 @@ def is_type_alias_type(tp: Any, /) -> TypeIs[typing_extensions.TypeAliasType]:
 
 
 # Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
+@lru_cache(maxsize=8096)
 def strip_annotated_type(typ: type) -> type:
     if is_required_type(typ) or is_annotated_type(typ):
         return strip_annotated_type(cast(type, get_args(typ)[0]))
@@ -108,7 +110,7 @@ class MyResponse(Foo[_T]):
     ```
     """
     cls = cast(object, get_origin(typ) or typ)
-    if cls in generic_bases:
+    if cls in generic_bases:  # pyright: ignore[reportUnnecessaryContains]
         # we're given the class directly
         return extract_type_arg(typ, index)
 
diff --git a/src/openai/_utils/_utils.py b/src/openai/_utils/_utils.py
index d6734e6b8f..1e7d013b51 100644
--- a/src/openai/_utils/_utils.py
+++ b/src/openai/_utils/_utils.py
@@ -76,8 +76,16 @@ def _extract_items(
         from .._files import assert_is_file_content
 
         # We have exhausted the path, return the entry we found.
-        assert_is_file_content(obj, key=flattened_key)
         assert flattened_key is not None
+
+        if is_list(obj):
+            files: list[tuple[str, FileTypes]] = []
+            for entry in obj:
+                assert_is_file_content(entry, key=flattened_key + "[]" if flattened_key else "")
+                files.append((flattened_key + "[]", cast(FileTypes, entry)))
+            return files
+
+        assert_is_file_content(obj, key=flattened_key)
         return [(flattened_key, cast(FileTypes, obj))]
 
     index += 1
diff --git a/src/openai/_version.py b/src/openai/_version.py
index 6b4385ec3c..ea6b974272 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.70.0"  # x-release-please-version
+__version__ = "1.76.0"  # x-release-please-version
diff --git a/src/openai/lib/streaming/chat/_completions.py b/src/openai/lib/streaming/chat/_completions.py
index 2146091354..f147696cca 100644
--- a/src/openai/lib/streaming/chat/_completions.py
+++ b/src/openai/lib/streaming/chat/_completions.py
@@ -113,6 +113,8 @@ def current_completion_snapshot(self) -> ParsedChatCompletionSnapshot:
 
     def __stream__(self) -> Iterator[ChatCompletionStreamEvent[ResponseFormatT]]:
         for sse_event in self._raw_stream:
+            if not _is_valid_chat_completion_chunk_weak(sse_event):
+                continue
             events_to_fire = self._state.handle_chunk(sse_event)
             for event in events_to_fire:
                 yield event
@@ -234,6 +236,8 @@ def current_completion_snapshot(self) -> ParsedChatCompletionSnapshot:
 
     async def __stream__(self) -> AsyncIterator[ChatCompletionStreamEvent[ResponseFormatT]]:
         async for sse_event in self._raw_stream:
+            if not _is_valid_chat_completion_chunk_weak(sse_event):
+                continue
             events_to_fire = self._state.handle_chunk(sse_event)
             for event in events_to_fire:
                 yield event
@@ -753,3 +757,12 @@ def _convert_initial_chunk_into_snapshot(chunk: ChatCompletionChunk) -> ParsedCh
             },
         ),
     )
+
+
+def _is_valid_chat_completion_chunk_weak(sse_event: ChatCompletionChunk) -> bool:
+    # Although the _raw_stream is always supposed to contain only objects adhering to ChatCompletionChunk schema,
+    # this is broken by the Azure OpenAI in case of Asynchronous Filter enabled.
+    # An easy filter is to check for the "object" property:
+    # - should be "chat.completion.chunk" for a ChatCompletionChunk;
+    # - is an empty string for Asynchronous Filter events.
+    return sse_event.object == "chat.completion.chunk"  # type: ignore # pylance reports this as a useless check
diff --git a/src/openai/resources/__init__.py b/src/openai/resources/__init__.py
index d3457cf319..ab9cd73e81 100644
--- a/src/openai/resources/__init__.py
+++ b/src/openai/resources/__init__.py
@@ -24,6 +24,14 @@
     AudioWithStreamingResponse,
     AsyncAudioWithStreamingResponse,
 )
+from .evals import (
+    Evals,
+    AsyncEvals,
+    EvalsWithRawResponse,
+    AsyncEvalsWithRawResponse,
+    EvalsWithStreamingResponse,
+    AsyncEvalsWithStreamingResponse,
+)
 from .files import (
     Files,
     AsyncFiles,
@@ -198,4 +206,10 @@
     "AsyncResponsesWithRawResponse",
     "ResponsesWithStreamingResponse",
     "AsyncResponsesWithStreamingResponse",
+    "Evals",
+    "AsyncEvals",
+    "EvalsWithRawResponse",
+    "AsyncEvalsWithRawResponse",
+    "EvalsWithStreamingResponse",
+    "AsyncEvalsWithStreamingResponse",
 ]
diff --git a/src/openai/resources/audio/speech.py b/src/openai/resources/audio/speech.py
index 1ee53db9d5..fad18dcdf5 100644
--- a/src/openai/resources/audio/speech.py
+++ b/src/openai/resources/audio/speech.py
@@ -9,10 +9,7 @@
 
 from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ..._utils import maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import (
diff --git a/src/openai/resources/audio/transcriptions.py b/src/openai/resources/audio/transcriptions.py
index 2a77f91d69..0c7ebca7a6 100644
--- a/src/openai/resources/audio/transcriptions.py
+++ b/src/openai/resources/audio/transcriptions.py
@@ -11,13 +11,7 @@
 from ... import _legacy_response
 from ...types import AudioResponseFormat
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import (
-    extract_files,
-    required_args,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
+from ..._utils import extract_files, required_args, maybe_transform, deepcopy_minimal, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -321,7 +315,12 @@ def create(
         extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(  # type: ignore[return-value]
             "/audio/transcriptions",
-            body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            body=maybe_transform(
+                body,
+                transcription_create_params.TranscriptionCreateParamsStreaming
+                if stream
+                else transcription_create_params.TranscriptionCreateParamsNonStreaming,
+            ),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -616,7 +615,12 @@ async def create(
         extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/audio/transcriptions",
-            body=await async_maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+            body=await async_maybe_transform(
+                body,
+                transcription_create_params.TranscriptionCreateParamsStreaming
+                if stream
+                else transcription_create_params.TranscriptionCreateParamsNonStreaming,
+            ),
             files=files,
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/openai/resources/audio/translations.py b/src/openai/resources/audio/translations.py
index f55dbd0ee5..28b577ce2e 100644
--- a/src/openai/resources/audio/translations.py
+++ b/src/openai/resources/audio/translations.py
@@ -10,12 +10,7 @@
 
 from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
+from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/resources/batches.py b/src/openai/resources/batches.py
index b7a299be12..26ea498b31 100644
--- a/src/openai/resources/batches.py
+++ b/src/openai/resources/batches.py
@@ -10,10 +10,7 @@
 from .. import _legacy_response
 from ..types import batch_list_params, batch_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py
index 1c7cbf3737..9059d93616 100644
--- a/src/openai/resources/beta/assistants.py
+++ b/src/openai/resources/beta/assistants.py
@@ -9,10 +9,7 @@
 
 from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ..._utils import maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -223,6 +220,12 @@ def update(
         model: Union[
             str,
             Literal[
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
                 "o3-mini",
                 "o3-mini-2025-01-31",
                 "o1",
@@ -666,6 +669,12 @@ async def update(
         model: Union[
             str,
             Literal[
+                "gpt-4.1",
+                "gpt-4.1-mini",
+                "gpt-4.1-nano",
+                "gpt-4.1-2025-04-14",
+                "gpt-4.1-mini-2025-04-14",
+                "gpt-4.1-nano-2025-04-14",
                 "o3-mini",
                 "o3-mini-2025-01-31",
                 "o1",
diff --git a/src/openai/resources/beta/chat/completions.py b/src/openai/resources/beta/chat/completions.py
index 545a3f4087..80e015615f 100644
--- a/src/openai/resources/beta/chat/completions.py
+++ b/src/openai/resources/beta/chat/completions.py
@@ -81,7 +81,7 @@ def parse(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -228,7 +228,7 @@ def stream(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -360,7 +360,7 @@ async def parse(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -507,7 +507,7 @@ def stream(
         presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
diff --git a/src/openai/resources/beta/realtime/realtime.py b/src/openai/resources/beta/realtime/realtime.py
index 76e57f8cb7..d39db48e05 100644
--- a/src/openai/resources/beta/realtime/realtime.py
+++ b/src/openai/resources/beta/realtime/realtime.py
@@ -233,6 +233,7 @@ class AsyncRealtimeConnection:
     response: AsyncRealtimeResponseResource
     input_audio_buffer: AsyncRealtimeInputAudioBufferResource
     conversation: AsyncRealtimeConversationResource
+    output_audio_buffer: AsyncRealtimeOutputAudioBufferResource
     transcription_session: AsyncRealtimeTranscriptionSessionResource
 
     _connection: AsyncWebsocketConnection
@@ -244,6 +245,7 @@ def __init__(self, connection: AsyncWebsocketConnection) -> None:
         self.response = AsyncRealtimeResponseResource(self)
         self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
         self.conversation = AsyncRealtimeConversationResource(self)
+        self.output_audio_buffer = AsyncRealtimeOutputAudioBufferResource(self)
         self.transcription_session = AsyncRealtimeTranscriptionSessionResource(self)
 
     async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
@@ -277,10 +279,6 @@ async def recv_bytes(self) -> bytes:
         """
         message = await self._connection.recv(decode=False)
         log.debug(f"Received websocket message: %s", message)
-        if not isinstance(message, bytes):
-            # passing `decode=False` should always result in us getting `bytes` back
-            raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
-
         return message
 
     async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
@@ -417,6 +415,7 @@ class RealtimeConnection:
     response: RealtimeResponseResource
     input_audio_buffer: RealtimeInputAudioBufferResource
     conversation: RealtimeConversationResource
+    output_audio_buffer: RealtimeOutputAudioBufferResource
     transcription_session: RealtimeTranscriptionSessionResource
 
     _connection: WebsocketConnection
@@ -428,6 +427,7 @@ def __init__(self, connection: WebsocketConnection) -> None:
         self.response = RealtimeResponseResource(self)
         self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
         self.conversation = RealtimeConversationResource(self)
+        self.output_audio_buffer = RealtimeOutputAudioBufferResource(self)
         self.transcription_session = RealtimeTranscriptionSessionResource(self)
 
     def __iter__(self) -> Iterator[RealtimeServerEvent]:
@@ -461,10 +461,6 @@ def recv_bytes(self) -> bytes:
         """
         message = self._connection.recv(decode=False)
         log.debug(f"Received websocket message: %s", message)
-        if not isinstance(message, bytes):
-            # passing `decode=False` should always result in us getting `bytes` back
-            raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
-
         return message
 
     def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
@@ -816,6 +812,21 @@ def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> Non
         )
 
 
+class RealtimeOutputAudioBufferResource(BaseRealtimeConnectionResource):
+    def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """**WebRTC Only:** Emit to cut off the current audio response.
+
+        This will trigger the server to
+        stop generating audio and emit a `output_audio_buffer.cleared` event. This
+        event should be preceded by a `response.cancel` client event to stop the
+        generation of the current response.
+        [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc).
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
+        )
+
+
 class RealtimeTranscriptionSessionResource(BaseRealtimeConnectionResource):
     def update(
         self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
@@ -1053,6 +1064,21 @@ async def retrieve(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN)
         )
 
 
+class AsyncRealtimeOutputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+    async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """**WebRTC Only:** Emit to cut off the current audio response.
+
+        This will trigger the server to
+        stop generating audio and emit a `output_audio_buffer.cleared` event. This
+        event should be preceded by a `response.cancel` client event to stop the
+        generation of the current response.
+        [Learn more](https://platform.openai.com/docs/guides/realtime-model-capabilities#client-and-server-events-for-audio-in-webrtc).
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "output_audio_buffer.clear", "event_id": event_id}))
+        )
+
+
 class AsyncRealtimeTranscriptionSessionResource(BaseAsyncRealtimeConnectionResource):
     async def update(
         self, *, session: transcription_session_update_param.Session, event_id: str | NotGiven = NOT_GIVEN
diff --git a/src/openai/resources/beta/realtime/sessions.py b/src/openai/resources/beta/realtime/sessions.py
index 3e1c956fe4..3c0d4d47c1 100644
--- a/src/openai/resources/beta/realtime/sessions.py
+++ b/src/openai/resources/beta/realtime/sessions.py
@@ -9,10 +9,7 @@
 
 from .... import _legacy_response
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ...._utils import maybe_transform, async_maybe_transform
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/resources/beta/realtime/transcription_sessions.py b/src/openai/resources/beta/realtime/transcription_sessions.py
index 0917da71fa..dbcb1bb33b 100644
--- a/src/openai/resources/beta/realtime/transcription_sessions.py
+++ b/src/openai/resources/beta/realtime/transcription_sessions.py
@@ -9,10 +9,7 @@
 
 from .... import _legacy_response
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ...._utils import maybe_transform, async_maybe_transform
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/resources/beta/threads/messages.py b/src/openai/resources/beta/threads/messages.py
index e3374aba37..3a8913ef16 100644
--- a/src/openai/resources/beta/threads/messages.py
+++ b/src/openai/resources/beta/threads/messages.py
@@ -9,10 +9,7 @@
 
 from .... import _legacy_response
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ...._utils import maybe_transform, async_maybe_transform
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index acb1c9b261..4d19010fea 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -587,7 +587,7 @@ def create(
                     "top_p": top_p,
                     "truncation_strategy": truncation_strategy,
                 },
-                run_create_params.RunCreateParams,
+                run_create_params.RunCreateParamsStreaming if stream else run_create_params.RunCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers,
@@ -1324,7 +1324,9 @@ def submit_tool_outputs(
                     "tool_outputs": tool_outputs,
                     "stream": stream,
                 },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParamsStreaming
+                if stream
+                else run_submit_tool_outputs_params.RunSubmitToolOutputsParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -1996,7 +1998,7 @@ async def create(
                     "top_p": top_p,
                     "truncation_strategy": truncation_strategy,
                 },
-                run_create_params.RunCreateParams,
+                run_create_params.RunCreateParamsStreaming if stream else run_create_params.RunCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers,
@@ -2732,7 +2734,9 @@ async def submit_tool_outputs(
                     "tool_outputs": tool_outputs,
                     "stream": stream,
                 },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParamsStreaming
+                if stream
+                else run_submit_tool_outputs_params.RunSubmitToolOutputsParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/openai/resources/beta/threads/runs/steps.py b/src/openai/resources/beta/threads/runs/steps.py
index 709c729d45..3d2148687b 100644
--- a/src/openai/resources/beta/threads/runs/steps.py
+++ b/src/openai/resources/beta/threads/runs/steps.py
@@ -9,10 +9,7 @@
 
 from ..... import _legacy_response
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ....._utils import maybe_transform, async_maybe_transform
 from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
 from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index d88559bdeb..22dc5fe0ea 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -18,11 +18,7 @@
     AsyncMessagesWithStreamingResponse,
 )
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
+from ...._utils import required_args, maybe_transform, async_maybe_transform
 from .runs.runs import (
     Runs,
     AsyncRuns,
@@ -54,6 +50,7 @@
 from ....types.shared.chat_model import ChatModel
 from ....types.beta.thread_deleted import ThreadDeleted
 from ....types.shared_params.metadata import Metadata
+from ....types.beta.assistant_tool_param import AssistantToolParam
 from ....types.beta.assistant_stream_event import AssistantStreamEvent
 from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
 from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
@@ -286,7 +283,7 @@ def create_and_run(
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -419,7 +416,7 @@ def create_and_run(
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -552,7 +549,7 @@ def create_and_run(
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -685,7 +682,7 @@ def create_and_run(
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -717,7 +714,9 @@ def create_and_run(
                     "top_p": top_p,
                     "truncation_strategy": truncation_strategy,
                 },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
+                thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
+                if stream
+                else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -1133,7 +1132,7 @@ async def create_and_run(
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1266,7 +1265,7 @@ async def create_and_run(
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1399,7 +1398,7 @@ async def create_and_run(
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1532,7 +1531,7 @@ async def create_and_run(
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
+        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -1564,7 +1563,9 @@ async def create_and_run(
                     "top_p": top_p,
                     "truncation_strategy": truncation_strategy,
                 },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
+                thread_create_and_run_params.ThreadCreateAndRunParamsStreaming
+                if stream
+                else thread_create_and_run_params.ThreadCreateAndRunParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/openai/resources/chat/completions/completions.py b/src/openai/resources/chat/completions/completions.py
index d28be012c9..0ab105a389 100644
--- a/src/openai/resources/chat/completions/completions.py
+++ b/src/openai/resources/chat/completions/completions.py
@@ -19,11 +19,7 @@
     AsyncMessagesWithStreamingResponse,
 )
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
+from ...._utils import required_args, maybe_transform, async_maybe_transform
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -99,7 +95,7 @@ def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -145,7 +141,7 @@ def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -201,7 +197,7 @@ def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -270,12 +266,17 @@ def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -364,7 +365,7 @@ def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -409,7 +410,7 @@ def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -474,7 +475,7 @@ def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -543,12 +544,17 @@ def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -628,7 +634,7 @@ def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -673,7 +679,7 @@ def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -738,7 +744,7 @@ def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -807,12 +813,17 @@ def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -891,7 +902,7 @@ def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
@@ -947,7 +958,9 @@ def create(
                     "user": user,
                     "web_search_options": web_search_options,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -1185,7 +1198,7 @@ async def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
@@ -1231,7 +1244,7 @@ async def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1287,7 +1300,7 @@ async def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -1356,12 +1369,17 @@ async def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1450,7 +1468,7 @@ async def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1495,7 +1513,7 @@ async def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1560,7 +1578,7 @@ async def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -1629,12 +1647,17 @@ async def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1714,7 +1737,7 @@ async def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
@@ -1759,7 +1782,7 @@ async def create(
               [images](https://platform.openai.com/docs/guides/vision), and
               [audio](https://platform.openai.com/docs/guides/audio).
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1824,7 +1847,7 @@ async def create(
 
               This value is now deprecated in favor of `max_completion_tokens`, and is not
               compatible with
-              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+              [o-series models](https://platform.openai.com/docs/guides/reasoning).
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
               for storing additional information about the object in a structured format, and
@@ -1893,12 +1916,17 @@ async def create(
                 latency guarentee.
               - If set to 'default', the request will be processed using the default service
                 tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
               - When not set, the default behavior is 'auto'.
 
               When this parameter is set, the response body will include the `service_tier`
               utilized.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1977,7 +2005,7 @@ async def create(
         reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
-        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
@@ -2033,7 +2061,9 @@ async def create(
                     "user": user,
                     "web_search_options": web_search_options,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/openai/resources/completions.py b/src/openai/resources/completions.py
index 171f509352..43b923b9b9 100644
--- a/src/openai/resources/completions.py
+++ b/src/openai/resources/completions.py
@@ -10,11 +10,7 @@
 from .. import _legacy_response
 from ..types import completion_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
+from .._utils import required_args, maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -159,7 +155,9 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -319,7 +317,9 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -472,7 +472,9 @@ def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -559,7 +561,9 @@ def create(
                     "top_p": top_p,
                     "user": user,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -701,7 +705,9 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream: Whether to stream back partial progress. If set, tokens will be sent as
@@ -861,7 +867,9 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1014,7 +1022,9 @@ async def create(
               Determinism is not guaranteed, and you should refer to the `system_fingerprint`
               response parameter to monitor changes in the backend.
 
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
+          stop: Not supported with latest reasoning models `o3` and `o4-mini`.
+
+              Up to 4 sequences where the API will stop generating further tokens. The
               returned text will not contain the stop sequence.
 
           stream_options: Options for streaming response. Only set this when you set `stream: true`.
@@ -1101,7 +1111,9 @@ async def create(
                     "top_p": top_p,
                     "user": user,
                 },
-                completion_create_params.CompletionCreateParams,
+                completion_create_params.CompletionCreateParamsStreaming
+                if stream
+                else completion_create_params.CompletionCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/openai/resources/evals/__init__.py b/src/openai/resources/evals/__init__.py
new file mode 100644
index 0000000000..84f707511d
--- /dev/null
+++ b/src/openai/resources/evals/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .evals import (
+    Evals,
+    AsyncEvals,
+    EvalsWithRawResponse,
+    AsyncEvalsWithRawResponse,
+    EvalsWithStreamingResponse,
+    AsyncEvalsWithStreamingResponse,
+)
+
+__all__ = [
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+    "Evals",
+    "AsyncEvals",
+    "EvalsWithRawResponse",
+    "AsyncEvalsWithRawResponse",
+    "EvalsWithStreamingResponse",
+    "AsyncEvalsWithStreamingResponse",
+]
diff --git a/src/openai/resources/evals/evals.py b/src/openai/resources/evals/evals.py
new file mode 100644
index 0000000000..c12562a86d
--- /dev/null
+++ b/src/openai/resources/evals/evals.py
@@ -0,0 +1,652 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable, Optional
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ...types import eval_list_params, eval_create_params, eval_update_params
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform, async_maybe_transform
+from ..._compat import cached_property
+from .runs.runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.eval_list_response import EvalListResponse
+from ...types.eval_create_response import EvalCreateResponse
+from ...types.eval_delete_response import EvalDeleteResponse
+from ...types.eval_update_response import EvalUpdateResponse
+from ...types.eval_retrieve_response import EvalRetrieveResponse
+from ...types.shared_params.metadata import Metadata
+
+__all__ = ["Evals", "AsyncEvals"]
+
+
+class Evals(SyncAPIResource):
+    @cached_property
+    def runs(self) -> Runs:
+        return Runs(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> EvalsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return EvalsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> EvalsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return EvalsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        data_source_config: eval_create_params.DataSourceConfig,
+        testing_criteria: Iterable[eval_create_params.TestingCriterion],
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalCreateResponse:
+        """
+        Create the structure of an evaluation that can be used to test a model's
+        performance. An evaluation is a set of testing criteria and a datasource. After
+        creating an evaluation, you can run it on different models and model parameters.
+        We support several types of graders and datasources. For more information, see
+        the [Evals guide](https://platform.openai.com/docs/guides/evals).
+
+        Args:
+          data_source_config: The configuration for the data source used for the evaluation runs.
+
+          testing_criteria: A list of graders for all eval runs in this group.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/evals",
+            body=maybe_transform(
+                {
+                    "data_source_config": data_source_config,
+                    "testing_criteria": testing_criteria,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_create_params.EvalCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalRetrieveResponse:
+        """
+        Get an evaluation by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._get(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalRetrieveResponse,
+        )
+
+    def update(
+        self,
+        eval_id: str,
+        *,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalUpdateResponse:
+        """
+        Update certain properties of an evaluation.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: Rename the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._post(
+            f"/evals/{eval_id}",
+            body=maybe_transform(
+                {
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_update_params.EvalUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalUpdateResponse,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        order_by: Literal["created_at", "updated_at"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[EvalListResponse]:
+        """
+        List evaluations for a project.
+
+        Args:
+          after: Identifier for the last eval from the previous pagination request.
+
+          limit: Number of evals to retrieve.
+
+          order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
+              descending order.
+
+          order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
+              creation time or `updated_at` for last updated time.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/evals",
+            page=SyncCursorPage[EvalListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "order_by": order_by,
+                    },
+                    eval_list_params.EvalListParams,
+                ),
+            ),
+            model=EvalListResponse,
+        )
+
+    def delete(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalDeleteResponse:
+        """
+        Delete an evaluation.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._delete(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalDeleteResponse,
+        )
+
+
+class AsyncEvals(AsyncAPIResource):
+    @cached_property
+    def runs(self) -> AsyncRuns:
+        return AsyncRuns(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncEvalsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncEvalsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncEvalsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncEvalsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        data_source_config: eval_create_params.DataSourceConfig,
+        testing_criteria: Iterable[eval_create_params.TestingCriterion],
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalCreateResponse:
+        """
+        Create the structure of an evaluation that can be used to test a model's
+        performance. An evaluation is a set of testing criteria and a datasource. After
+        creating an evaluation, you can run it on different models and model parameters.
+        We support several types of graders and datasources. For more information, see
+        the [Evals guide](https://platform.openai.com/docs/guides/evals).
+
+        Args:
+          data_source_config: The configuration for the data source used for the evaluation runs.
+
+          testing_criteria: A list of graders for all eval runs in this group.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/evals",
+            body=await async_maybe_transform(
+                {
+                    "data_source_config": data_source_config,
+                    "testing_criteria": testing_criteria,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_create_params.EvalCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalRetrieveResponse:
+        """
+        Get an evaluation by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._get(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalRetrieveResponse,
+        )
+
+    async def update(
+        self,
+        eval_id: str,
+        *,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalUpdateResponse:
+        """
+        Update certain properties of an evaluation.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: Rename the evaluation.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._post(
+            f"/evals/{eval_id}",
+            body=await async_maybe_transform(
+                {
+                    "metadata": metadata,
+                    "name": name,
+                },
+                eval_update_params.EvalUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalUpdateResponse,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        order_by: Literal["created_at", "updated_at"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[EvalListResponse, AsyncCursorPage[EvalListResponse]]:
+        """
+        List evaluations for a project.
+
+        Args:
+          after: Identifier for the last eval from the previous pagination request.
+
+          limit: Number of evals to retrieve.
+
+          order: Sort order for evals by timestamp. Use `asc` for ascending order or `desc` for
+              descending order.
+
+          order_by: Evals can be ordered by creation time or last updated time. Use `created_at` for
+              creation time or `updated_at` for last updated time.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/evals",
+            page=AsyncCursorPage[EvalListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "order_by": order_by,
+                    },
+                    eval_list_params.EvalListParams,
+                ),
+            ),
+            model=EvalListResponse,
+        )
+
+    async def delete(
+        self,
+        eval_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> EvalDeleteResponse:
+        """
+        Delete an evaluation.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._delete(
+            f"/evals/{eval_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EvalDeleteResponse,
+        )
+
+
+class EvalsWithRawResponse:
+    def __init__(self, evals: Evals) -> None:
+        self._evals = evals
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            evals.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            evals.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithRawResponse:
+        return RunsWithRawResponse(self._evals.runs)
+
+
+class AsyncEvalsWithRawResponse:
+    def __init__(self, evals: AsyncEvals) -> None:
+        self._evals = evals
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            evals.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            evals.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithRawResponse:
+        return AsyncRunsWithRawResponse(self._evals.runs)
+
+
+class EvalsWithStreamingResponse:
+    def __init__(self, evals: Evals) -> None:
+        self._evals = evals
+
+        self.create = to_streamed_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            evals.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            evals.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> RunsWithStreamingResponse:
+        return RunsWithStreamingResponse(self._evals.runs)
+
+
+class AsyncEvalsWithStreamingResponse:
+    def __init__(self, evals: AsyncEvals) -> None:
+        self._evals = evals
+
+        self.create = async_to_streamed_response_wrapper(
+            evals.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            evals.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            evals.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            evals.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            evals.delete,
+        )
+
+    @cached_property
+    def runs(self) -> AsyncRunsWithStreamingResponse:
+        return AsyncRunsWithStreamingResponse(self._evals.runs)
diff --git a/src/openai/resources/evals/runs/__init__.py b/src/openai/resources/evals/runs/__init__.py
new file mode 100644
index 0000000000..d189f16fb7
--- /dev/null
+++ b/src/openai/resources/evals/runs/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
+from .output_items import (
+    OutputItems,
+    AsyncOutputItems,
+    OutputItemsWithRawResponse,
+    AsyncOutputItemsWithRawResponse,
+    OutputItemsWithStreamingResponse,
+    AsyncOutputItemsWithStreamingResponse,
+)
+
+__all__ = [
+    "OutputItems",
+    "AsyncOutputItems",
+    "OutputItemsWithRawResponse",
+    "AsyncOutputItemsWithRawResponse",
+    "OutputItemsWithStreamingResponse",
+    "AsyncOutputItemsWithStreamingResponse",
+    "Runs",
+    "AsyncRuns",
+    "RunsWithRawResponse",
+    "AsyncRunsWithRawResponse",
+    "RunsWithStreamingResponse",
+    "AsyncRunsWithStreamingResponse",
+]
diff --git a/src/openai/resources/evals/runs/output_items.py b/src/openai/resources/evals/runs/output_items.py
new file mode 100644
index 0000000000..8fd0fdea92
--- /dev/null
+++ b/src/openai/resources/evals/runs/output_items.py
@@ -0,0 +1,315 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.evals.runs import output_item_list_params
+from ....types.evals.runs.output_item_list_response import OutputItemListResponse
+from ....types.evals.runs.output_item_retrieve_response import OutputItemRetrieveResponse
+
+__all__ = ["OutputItems", "AsyncOutputItems"]
+
+
+class OutputItems(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> OutputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return OutputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> OutputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return OutputItemsWithStreamingResponse(self)
+
+    def retrieve(
+        self,
+        output_item_id: str,
+        *,
+        eval_id: str,
+        run_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> OutputItemRetrieveResponse:
+        """
+        Get an evaluation run output item by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not output_item_id:
+            raise ValueError(f"Expected a non-empty value for `output_item_id` but received {output_item_id!r}")
+        return self._get(
+            f"/evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=OutputItemRetrieveResponse,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        status: Literal["fail", "pass"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[OutputItemListResponse]:
+        """
+        Get a list of output items for an evaluation run.
+
+        Args:
+          after: Identifier for the last output item from the previous pagination request.
+
+          limit: Number of output items to retrieve.
+
+          order: Sort order for output items by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          status: Filter output items by status. Use `failed` to filter by failed output items or
+              `pass` to filter by passed output items.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs/{run_id}/output_items",
+            page=SyncCursorPage[OutputItemListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    output_item_list_params.OutputItemListParams,
+                ),
+            ),
+            model=OutputItemListResponse,
+        )
+
+
+class AsyncOutputItems(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncOutputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncOutputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncOutputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncOutputItemsWithStreamingResponse(self)
+
+    async def retrieve(
+        self,
+        output_item_id: str,
+        *,
+        eval_id: str,
+        run_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> OutputItemRetrieveResponse:
+        """
+        Get an evaluation run output item by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        if not output_item_id:
+            raise ValueError(f"Expected a non-empty value for `output_item_id` but received {output_item_id!r}")
+        return await self._get(
+            f"/evals/{eval_id}/runs/{run_id}/output_items/{output_item_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=OutputItemRetrieveResponse,
+        )
+
+    def list(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        status: Literal["fail", "pass"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[OutputItemListResponse, AsyncCursorPage[OutputItemListResponse]]:
+        """
+        Get a list of output items for an evaluation run.
+
+        Args:
+          after: Identifier for the last output item from the previous pagination request.
+
+          limit: Number of output items to retrieve.
+
+          order: Sort order for output items by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          status: Filter output items by status. Use `failed` to filter by failed output items or
+              `pass` to filter by passed output items.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs/{run_id}/output_items",
+            page=AsyncCursorPage[OutputItemListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    output_item_list_params.OutputItemListParams,
+                ),
+            ),
+            model=OutputItemListResponse,
+        )
+
+
+class OutputItemsWithRawResponse:
+    def __init__(self, output_items: OutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            output_items.list,
+        )
+
+
+class AsyncOutputItemsWithRawResponse:
+    def __init__(self, output_items: AsyncOutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            output_items.list,
+        )
+
+
+class OutputItemsWithStreamingResponse:
+    def __init__(self, output_items: OutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = to_streamed_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            output_items.list,
+        )
+
+
+class AsyncOutputItemsWithStreamingResponse:
+    def __init__(self, output_items: AsyncOutputItems) -> None:
+        self._output_items = output_items
+
+        self.retrieve = async_to_streamed_response_wrapper(
+            output_items.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            output_items.list,
+        )
diff --git a/src/openai/resources/evals/runs/runs.py b/src/openai/resources/evals/runs/runs.py
new file mode 100644
index 0000000000..d74c91e3c4
--- /dev/null
+++ b/src/openai/resources/evals/runs/runs.py
@@ -0,0 +1,632 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .output_items import (
+    OutputItems,
+    AsyncOutputItems,
+    OutputItemsWithRawResponse,
+    AsyncOutputItemsWithRawResponse,
+    OutputItemsWithStreamingResponse,
+    AsyncOutputItemsWithStreamingResponse,
+)
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.evals import run_list_params, run_create_params
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.shared_params.metadata import Metadata
+from ....types.evals.run_list_response import RunListResponse
+from ....types.evals.run_cancel_response import RunCancelResponse
+from ....types.evals.run_create_response import RunCreateResponse
+from ....types.evals.run_delete_response import RunDeleteResponse
+from ....types.evals.run_retrieve_response import RunRetrieveResponse
+
+__all__ = ["Runs", "AsyncRuns"]
+
+
+class Runs(SyncAPIResource):
+    @cached_property
+    def output_items(self) -> OutputItems:
+        return OutputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RunsWithStreamingResponse(self)
+
+    def create(
+        self,
+        eval_id: str,
+        *,
+        data_source: run_create_params.DataSource,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunCreateResponse:
+        """Create a new evaluation run.
+
+        This is the endpoint that will kick off grading.
+
+        Args:
+          data_source: Details about the run's data source.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._post(
+            f"/evals/{eval_id}/runs",
+            body=maybe_transform(
+                {
+                    "data_source": data_source,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCreateResponse,
+        )
+
+    def retrieve(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunRetrieveResponse:
+        """
+        Get an evaluation run by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._get(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunRetrieveResponse,
+        )
+
+    def list(
+        self,
+        eval_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        status: Literal["queued", "in_progress", "completed", "canceled", "failed"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[RunListResponse]:
+        """
+        Get a list of runs for an evaluation.
+
+        Args:
+          after: Identifier for the last run from the previous pagination request.
+
+          limit: Number of runs to retrieve.
+
+          order: Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for
+              descending order. Defaults to `asc`.
+
+          status: Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed`
+              | `canceled`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs",
+            page=SyncCursorPage[RunListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=RunListResponse,
+        )
+
+    def delete(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunDeleteResponse:
+        """
+        Delete an eval run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._delete(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunDeleteResponse,
+        )
+
+    def cancel(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunCancelResponse:
+        """
+        Cancel an ongoing evaluation run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return self._post(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCancelResponse,
+        )
+
+
+class AsyncRuns(AsyncAPIResource):
+    @cached_property
+    def output_items(self) -> AsyncOutputItems:
+        return AsyncOutputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRunsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRunsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        eval_id: str,
+        *,
+        data_source: run_create_params.DataSource,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        name: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunCreateResponse:
+        """Create a new evaluation run.
+
+        This is the endpoint that will kick off grading.
+
+        Args:
+          data_source: Details about the run's data source.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          name: The name of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return await self._post(
+            f"/evals/{eval_id}/runs",
+            body=await async_maybe_transform(
+                {
+                    "data_source": data_source,
+                    "metadata": metadata,
+                    "name": name,
+                },
+                run_create_params.RunCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCreateResponse,
+        )
+
+    async def retrieve(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunRetrieveResponse:
+        """
+        Get an evaluation run by ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return await self._get(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunRetrieveResponse,
+        )
+
+    def list(
+        self,
+        eval_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        status: Literal["queued", "in_progress", "completed", "canceled", "failed"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[RunListResponse, AsyncCursorPage[RunListResponse]]:
+        """
+        Get a list of runs for an evaluation.
+
+        Args:
+          after: Identifier for the last run from the previous pagination request.
+
+          limit: Number of runs to retrieve.
+
+          order: Sort order for runs by timestamp. Use `asc` for ascending order or `desc` for
+              descending order. Defaults to `asc`.
+
+          status: Filter runs by status. One of `queued` | `in_progress` | `failed` | `completed`
+              | `canceled`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        return self._get_api_list(
+            f"/evals/{eval_id}/runs",
+            page=AsyncCursorPage[RunListResponse],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "status": status,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=RunListResponse,
+        )
+
+    async def delete(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunDeleteResponse:
+        """
+        Delete an eval run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return await self._delete(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunDeleteResponse,
+        )
+
+    async def cancel(
+        self,
+        run_id: str,
+        *,
+        eval_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> RunCancelResponse:
+        """
+        Cancel an ongoing evaluation run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not eval_id:
+            raise ValueError(f"Expected a non-empty value for `eval_id` but received {eval_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        return await self._post(
+            f"/evals/{eval_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=RunCancelResponse,
+        )
+
+
+class RunsWithRawResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            runs.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> OutputItemsWithRawResponse:
+        return OutputItemsWithRawResponse(self._runs.output_items)
+
+
+class AsyncRunsWithRawResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            runs.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> AsyncOutputItemsWithRawResponse:
+        return AsyncOutputItemsWithRawResponse(self._runs.output_items)
+
+
+class RunsWithStreamingResponse:
+    def __init__(self, runs: Runs) -> None:
+        self._runs = runs
+
+        self.create = to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> OutputItemsWithStreamingResponse:
+        return OutputItemsWithStreamingResponse(self._runs.output_items)
+
+
+class AsyncRunsWithStreamingResponse:
+    def __init__(self, runs: AsyncRuns) -> None:
+        self._runs = runs
+
+        self.create = async_to_streamed_response_wrapper(
+            runs.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            runs.retrieve,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            runs.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            runs.delete,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            runs.cancel,
+        )
+
+    @cached_property
+    def output_items(self) -> AsyncOutputItemsWithStreamingResponse:
+        return AsyncOutputItemsWithStreamingResponse(self._runs.output_items)
diff --git a/src/openai/resources/files.py b/src/openai/resources/files.py
index 2eaa4a6401..179af870ba 100644
--- a/src/openai/resources/files.py
+++ b/src/openai/resources/files.py
@@ -12,12 +12,7 @@
 from .. import _legacy_response
 from ..types import FilePurpose, file_list_params, file_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
+from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import (
diff --git a/src/openai/resources/fine_tuning/__init__.py b/src/openai/resources/fine_tuning/__init__.py
index 7765231fee..ed7db4f4e0 100644
--- a/src/openai/resources/fine_tuning/__init__.py
+++ b/src/openai/resources/fine_tuning/__init__.py
@@ -8,6 +8,14 @@
     JobsWithStreamingResponse,
     AsyncJobsWithStreamingResponse,
 )
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
 from .fine_tuning import (
     FineTuning,
     AsyncFineTuning,
@@ -24,6 +32,12 @@
     "AsyncJobsWithRawResponse",
     "JobsWithStreamingResponse",
     "AsyncJobsWithStreamingResponse",
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
     "FineTuning",
     "AsyncFineTuning",
     "FineTuningWithRawResponse",
diff --git a/src/openai/resources/fine_tuning/checkpoints/__init__.py b/src/openai/resources/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..fdc37940f9
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
+from .permissions import (
+    Permissions,
+    AsyncPermissions,
+    PermissionsWithRawResponse,
+    AsyncPermissionsWithRawResponse,
+    PermissionsWithStreamingResponse,
+    AsyncPermissionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Permissions",
+    "AsyncPermissions",
+    "PermissionsWithRawResponse",
+    "AsyncPermissionsWithRawResponse",
+    "PermissionsWithStreamingResponse",
+    "AsyncPermissionsWithStreamingResponse",
+    "Checkpoints",
+    "AsyncCheckpoints",
+    "CheckpointsWithRawResponse",
+    "AsyncCheckpointsWithRawResponse",
+    "CheckpointsWithStreamingResponse",
+    "AsyncCheckpointsWithStreamingResponse",
+]
diff --git a/src/openai/resources/fine_tuning/checkpoints/checkpoints.py b/src/openai/resources/fine_tuning/checkpoints/checkpoints.py
new file mode 100644
index 0000000000..f59976a264
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/checkpoints.py
@@ -0,0 +1,102 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from ...._compat import cached_property
+from .permissions import (
+    Permissions,
+    AsyncPermissions,
+    PermissionsWithRawResponse,
+    AsyncPermissionsWithRawResponse,
+    PermissionsWithStreamingResponse,
+    AsyncPermissionsWithStreamingResponse,
+)
+from ...._resource import SyncAPIResource, AsyncAPIResource
+
+__all__ = ["Checkpoints", "AsyncCheckpoints"]
+
+
+class Checkpoints(SyncAPIResource):
+    @cached_property
+    def permissions(self) -> Permissions:
+        return Permissions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> CheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CheckpointsWithStreamingResponse(self)
+
+
+class AsyncCheckpoints(AsyncAPIResource):
+    @cached_property
+    def permissions(self) -> AsyncPermissions:
+        return AsyncPermissions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCheckpointsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCheckpointsWithStreamingResponse(self)
+
+
+class CheckpointsWithRawResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> PermissionsWithRawResponse:
+        return PermissionsWithRawResponse(self._checkpoints.permissions)
+
+
+class AsyncCheckpointsWithRawResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> AsyncPermissionsWithRawResponse:
+        return AsyncPermissionsWithRawResponse(self._checkpoints.permissions)
+
+
+class CheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: Checkpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> PermissionsWithStreamingResponse:
+        return PermissionsWithStreamingResponse(self._checkpoints.permissions)
+
+
+class AsyncCheckpointsWithStreamingResponse:
+    def __init__(self, checkpoints: AsyncCheckpoints) -> None:
+        self._checkpoints = checkpoints
+
+    @cached_property
+    def permissions(self) -> AsyncPermissionsWithStreamingResponse:
+        return AsyncPermissionsWithStreamingResponse(self._checkpoints.permissions)
diff --git a/src/openai/resources/fine_tuning/checkpoints/permissions.py b/src/openai/resources/fine_tuning/checkpoints/permissions.py
new file mode 100644
index 0000000000..547e42ecac
--- /dev/null
+++ b/src/openai/resources/fine_tuning/checkpoints/permissions.py
@@ -0,0 +1,419 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform, async_maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncPage, AsyncPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.fine_tuning.checkpoints import permission_create_params, permission_retrieve_params
+from ....types.fine_tuning.checkpoints.permission_create_response import PermissionCreateResponse
+from ....types.fine_tuning.checkpoints.permission_delete_response import PermissionDeleteResponse
+from ....types.fine_tuning.checkpoints.permission_retrieve_response import PermissionRetrieveResponse
+
+__all__ = ["Permissions", "AsyncPermissions"]
+
+
+class Permissions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> PermissionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return PermissionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> PermissionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return PermissionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        project_ids: List[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[PermissionCreateResponse]:
+        """
+        **NOTE:** Calling this endpoint requires an [admin API key](../admin-api-keys).
+
+        This enables organization owners to share fine-tuned models with other projects
+        in their organization.
+
+        Args:
+          project_ids: The project identifiers to grant access to.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return self._get_api_list(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            page=SyncPage[PermissionCreateResponse],
+            body=maybe_transform({"project_ids": project_ids}, permission_create_params.PermissionCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=PermissionCreateResponse,
+            method="post",
+        )
+
+    def retrieve(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["ascending", "descending"] | NotGiven = NOT_GIVEN,
+        project_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PermissionRetrieveResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to view all permissions for a
+        fine-tuned model checkpoint.
+
+        Args:
+          after: Identifier for the last permission ID from the previous pagination request.
+
+          limit: Number of permissions to retrieve.
+
+          order: The order in which to retrieve permissions.
+
+          project_id: The ID of the project to get permissions for.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return self._get(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "project_id": project_id,
+                    },
+                    permission_retrieve_params.PermissionRetrieveParams,
+                ),
+            ),
+            cast_to=PermissionRetrieveResponse,
+        )
+
+    def delete(
+        self,
+        permission_id: str,
+        *,
+        fine_tuned_model_checkpoint: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PermissionDeleteResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to delete a permission for a
+        fine-tuned model checkpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        if not permission_id:
+            raise ValueError(f"Expected a non-empty value for `permission_id` but received {permission_id!r}")
+        return self._delete(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PermissionDeleteResponse,
+        )
+
+
+class AsyncPermissions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncPermissionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncPermissionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncPermissionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncPermissionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        project_ids: List[str],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[PermissionCreateResponse, AsyncPage[PermissionCreateResponse]]:
+        """
+        **NOTE:** Calling this endpoint requires an [admin API key](../admin-api-keys).
+
+        This enables organization owners to share fine-tuned models with other projects
+        in their organization.
+
+        Args:
+          project_ids: The project identifiers to grant access to.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return self._get_api_list(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            page=AsyncPage[PermissionCreateResponse],
+            body=maybe_transform({"project_ids": project_ids}, permission_create_params.PermissionCreateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=PermissionCreateResponse,
+            method="post",
+        )
+
+    async def retrieve(
+        self,
+        fine_tuned_model_checkpoint: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["ascending", "descending"] | NotGiven = NOT_GIVEN,
+        project_id: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PermissionRetrieveResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to view all permissions for a
+        fine-tuned model checkpoint.
+
+        Args:
+          after: Identifier for the last permission ID from the previous pagination request.
+
+          limit: Number of permissions to retrieve.
+
+          order: The order in which to retrieve permissions.
+
+          project_id: The ID of the project to get permissions for.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        return await self._get(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "project_id": project_id,
+                    },
+                    permission_retrieve_params.PermissionRetrieveParams,
+                ),
+            ),
+            cast_to=PermissionRetrieveResponse,
+        )
+
+    async def delete(
+        self,
+        permission_id: str,
+        *,
+        fine_tuned_model_checkpoint: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> PermissionDeleteResponse:
+        """
+        **NOTE:** This endpoint requires an [admin API key](../admin-api-keys).
+
+        Organization owners can use this endpoint to delete a permission for a
+        fine-tuned model checkpoint.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not fine_tuned_model_checkpoint:
+            raise ValueError(
+                f"Expected a non-empty value for `fine_tuned_model_checkpoint` but received {fine_tuned_model_checkpoint!r}"
+            )
+        if not permission_id:
+            raise ValueError(f"Expected a non-empty value for `permission_id` but received {permission_id!r}")
+        return await self._delete(
+            f"/fine_tuning/checkpoints/{fine_tuned_model_checkpoint}/permissions/{permission_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=PermissionDeleteResponse,
+        )
+
+
+class PermissionsWithRawResponse:
+    def __init__(self, permissions: Permissions) -> None:
+        self._permissions = permissions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            permissions.delete,
+        )
+
+
+class AsyncPermissionsWithRawResponse:
+    def __init__(self, permissions: AsyncPermissions) -> None:
+        self._permissions = permissions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            permissions.delete,
+        )
+
+
+class PermissionsWithStreamingResponse:
+    def __init__(self, permissions: Permissions) -> None:
+        self._permissions = permissions
+
+        self.create = to_streamed_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = to_streamed_response_wrapper(
+            permissions.delete,
+        )
+
+
+class AsyncPermissionsWithStreamingResponse:
+    def __init__(self, permissions: AsyncPermissions) -> None:
+        self._permissions = permissions
+
+        self.create = async_to_streamed_response_wrapper(
+            permissions.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            permissions.retrieve,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            permissions.delete,
+        )
diff --git a/src/openai/resources/fine_tuning/fine_tuning.py b/src/openai/resources/fine_tuning/fine_tuning.py
index eebde07d81..1388c8230c 100644
--- a/src/openai/resources/fine_tuning/fine_tuning.py
+++ b/src/openai/resources/fine_tuning/fine_tuning.py
@@ -12,6 +12,14 @@
     AsyncJobsWithStreamingResponse,
 )
 from ..._resource import SyncAPIResource, AsyncAPIResource
+from .checkpoints.checkpoints import (
+    Checkpoints,
+    AsyncCheckpoints,
+    CheckpointsWithRawResponse,
+    AsyncCheckpointsWithRawResponse,
+    CheckpointsWithStreamingResponse,
+    AsyncCheckpointsWithStreamingResponse,
+)
 
 __all__ = ["FineTuning", "AsyncFineTuning"]
 
@@ -21,6 +29,10 @@ class FineTuning(SyncAPIResource):
     def jobs(self) -> Jobs:
         return Jobs(self._client)
 
+    @cached_property
+    def checkpoints(self) -> Checkpoints:
+        return Checkpoints(self._client)
+
     @cached_property
     def with_raw_response(self) -> FineTuningWithRawResponse:
         """
@@ -46,6 +58,10 @@ class AsyncFineTuning(AsyncAPIResource):
     def jobs(self) -> AsyncJobs:
         return AsyncJobs(self._client)
 
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpoints:
+        return AsyncCheckpoints(self._client)
+
     @cached_property
     def with_raw_response(self) -> AsyncFineTuningWithRawResponse:
         """
@@ -74,6 +90,10 @@ def __init__(self, fine_tuning: FineTuning) -> None:
     def jobs(self) -> JobsWithRawResponse:
         return JobsWithRawResponse(self._fine_tuning.jobs)
 
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithRawResponse:
+        return CheckpointsWithRawResponse(self._fine_tuning.checkpoints)
+
 
 class AsyncFineTuningWithRawResponse:
     def __init__(self, fine_tuning: AsyncFineTuning) -> None:
@@ -83,6 +103,10 @@ def __init__(self, fine_tuning: AsyncFineTuning) -> None:
     def jobs(self) -> AsyncJobsWithRawResponse:
         return AsyncJobsWithRawResponse(self._fine_tuning.jobs)
 
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithRawResponse:
+        return AsyncCheckpointsWithRawResponse(self._fine_tuning.checkpoints)
+
 
 class FineTuningWithStreamingResponse:
     def __init__(self, fine_tuning: FineTuning) -> None:
@@ -92,6 +116,10 @@ def __init__(self, fine_tuning: FineTuning) -> None:
     def jobs(self) -> JobsWithStreamingResponse:
         return JobsWithStreamingResponse(self._fine_tuning.jobs)
 
+    @cached_property
+    def checkpoints(self) -> CheckpointsWithStreamingResponse:
+        return CheckpointsWithStreamingResponse(self._fine_tuning.checkpoints)
+
 
 class AsyncFineTuningWithStreamingResponse:
     def __init__(self, fine_tuning: AsyncFineTuning) -> None:
@@ -100,3 +128,7 @@ def __init__(self, fine_tuning: AsyncFineTuning) -> None:
     @cached_property
     def jobs(self) -> AsyncJobsWithStreamingResponse:
         return AsyncJobsWithStreamingResponse(self._fine_tuning.jobs)
+
+    @cached_property
+    def checkpoints(self) -> AsyncCheckpointsWithStreamingResponse:
+        return AsyncCheckpointsWithStreamingResponse(self._fine_tuning.checkpoints)
diff --git a/src/openai/resources/fine_tuning/jobs/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
index bbeff60bc6..90619c8609 100644
--- a/src/openai/resources/fine_tuning/jobs/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -9,10 +9,7 @@
 
 from .... import _legacy_response
 from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ...._utils import maybe_transform, async_maybe_transform
 from ...._compat import cached_property
 from .checkpoints import (
     Checkpoints,
diff --git a/src/openai/resources/images.py b/src/openai/resources/images.py
index 30473c14f7..e59d0ce35c 100644
--- a/src/openai/resources/images.py
+++ b/src/openai/resources/images.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Union, Mapping, Optional, cast
+from typing import List, Union, Mapping, Optional, cast
 from typing_extensions import Literal
 
 import httpx
@@ -10,12 +10,7 @@
 from .. import _legacy_response
 from ..types import image_edit_params, image_generate_params, image_create_variation_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
+from .._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -62,8 +57,9 @@ def create_variation(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ImagesResponse:
-        """
-        Creates a variation of a given image.
+        """Creates a variation of a given image.
+
+        This endpoint only supports `dall-e-2`.
 
         Args:
           image: The image to use as the basis for the variation(s). Must be a valid PNG file,
@@ -72,8 +68,7 @@ def create_variation(
           model: The model to use for image generation. Only `dall-e-2` is supported at this
               time.
 
-          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
-              `n=1` is supported.
+          n: The number of images to generate. Must be between 1 and 10.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
               `b64_json`. URLs are only valid for 60 minutes after the image has been
@@ -122,11 +117,12 @@ def create_variation(
     def edit(
         self,
         *,
-        image: FileTypes,
+        image: Union[FileTypes, List[FileTypes]],
         prompt: str,
         mask: FileTypes | NotGiven = NOT_GIVEN,
         model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
         size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -137,31 +133,43 @@ def edit(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ImagesResponse:
-        """
-        Creates an edited or extended image given an original image and a prompt.
+        """Creates an edited or extended image given one or more source images and a
+        prompt.
+
+        This endpoint only supports `gpt-image-1` and `dall-e-2`.
 
         Args:
-          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
-              is not provided, image must have transparency, which will be used as the mask.
+          image: The image(s) to edit. Must be a supported image file or an array of images. For
+              `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+              25MB. For `dall-e-2`, you can only provide one image, and it should be a square
+              `png` file less than 4MB.
 
           prompt: A text description of the desired image(s). The maximum length is 1000
-              characters.
+              characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
-              indicate where `image` should be edited. Must be a valid PNG file, less than
+              indicate where `image` should be edited. If there are multiple images provided,
+              the mask will be applied on the first image. Must be a valid PNG file, less than
               4MB, and have the same dimensions as `image`.
 
-          model: The model to use for image generation. Only `dall-e-2` is supported at this
-              time.
+          model: The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are
+              supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1`
+              is used.
 
           n: The number of images to generate. Must be between 1 and 10.
 
+          quality: The quality of the image that will be generated. `high`, `medium` and `low` are
+              only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
+              Defaults to `auto`.
+
           response_format: The format in which the generated images are returned. Must be one of `url` or
               `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
+              generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1`
+              will always return base64-encoded images.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024`.
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
@@ -182,12 +190,13 @@ def edit(
                 "mask": mask,
                 "model": model,
                 "n": n,
+                "quality": quality,
                 "response_format": response_format,
                 "size": size,
                 "user": user,
             }
         )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["image", "<array>"], ["mask"]])
         # It should be noted that the actual Content-Type header that will be
         # sent to the server will contain a `boundary` parameter, e.g.
         # multipart/form-data; boundary=---abc--
@@ -206,11 +215,18 @@ def generate(
         self,
         *,
         prompt: str,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | NotGiven = NOT_GIVEN,
         model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        moderation: Optional[Literal["low", "auto"]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
-        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
+        output_compression: Optional[int] | NotGiven = NOT_GIVEN,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | NotGiven = NOT_GIVEN,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | NotGiven = NOT_GIVEN,
         style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -222,32 +238,60 @@ def generate(
     ) -> ImagesResponse:
         """
         Creates an image given a prompt.
+        [Learn more](https://platform.openai.com/docs/guides/images).
 
         Args:
-          prompt: A text description of the desired image(s). The maximum length is 1000
-              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+          prompt: A text description of the desired image(s). The maximum length is 32000
+              characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters
+              for `dall-e-3`.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
 
-          model: The model to use for image generation.
+          model: The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or
+              `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to
+              `gpt-image-1` is used.
+
+          moderation: Control the content-moderation level for images generated by `gpt-image-1`. Must
+              be either `low` for less restrictive filtering or `auto` (default value).
 
           n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
               `n=1` is supported.
 
-          quality: The quality of the image that will be generated. `hd` creates images with finer
-              details and greater consistency across the image. This param is only supported
-              for `dall-e-3`.
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
 
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
-              `1024x1792` for `dall-e-3` models.
+          quality: The quality of the image that will be generated.
+
+              - `auto` (default value) will automatically select the best quality for the
+                given model.
+              - `high`, `medium` and `low` are supported for `gpt-image-1`.
+              - `hd` and `standard` are supported for `dall-e-3`.
+              - `standard` is the only option for `dall-e-2`.
+
+          response_format: The format in which generated images with `dall-e-2` and `dall-e-3` are
+              returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes
+              after the image has been generated. This parameter isn't supported for
+              `gpt-image-1` which will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and
+              one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
 
-          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
-              causes the model to lean towards generating hyper-real and dramatic images.
-              Natural causes the model to produce more natural, less hyper-real looking
-              images. This param is only supported for `dall-e-3`.
+          style: The style of the generated images. This parameter is only supported for
+              `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
+              towards generating hyper-real and dramatic images. Natural causes the model to
+              produce more natural, less hyper-real looking images.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
@@ -266,8 +310,12 @@ def generate(
             body=maybe_transform(
                 {
                     "prompt": prompt,
+                    "background": background,
                     "model": model,
+                    "moderation": moderation,
                     "n": n,
+                    "output_compression": output_compression,
+                    "output_format": output_format,
                     "quality": quality,
                     "response_format": response_format,
                     "size": size,
@@ -319,8 +367,9 @@ async def create_variation(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ImagesResponse:
-        """
-        Creates a variation of a given image.
+        """Creates a variation of a given image.
+
+        This endpoint only supports `dall-e-2`.
 
         Args:
           image: The image to use as the basis for the variation(s). Must be a valid PNG file,
@@ -329,8 +378,7 @@ async def create_variation(
           model: The model to use for image generation. Only `dall-e-2` is supported at this
               time.
 
-          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
-              `n=1` is supported.
+          n: The number of images to generate. Must be between 1 and 10.
 
           response_format: The format in which the generated images are returned. Must be one of `url` or
               `b64_json`. URLs are only valid for 60 minutes after the image has been
@@ -379,11 +427,12 @@ async def create_variation(
     async def edit(
         self,
         *,
-        image: FileTypes,
+        image: Union[FileTypes, List[FileTypes]],
         prompt: str,
         mask: FileTypes | NotGiven = NOT_GIVEN,
         model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
+        quality: Optional[Literal["standard", "low", "medium", "high", "auto"]] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
         size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -394,31 +443,43 @@ async def edit(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ImagesResponse:
-        """
-        Creates an edited or extended image given an original image and a prompt.
+        """Creates an edited or extended image given one or more source images and a
+        prompt.
+
+        This endpoint only supports `gpt-image-1` and `dall-e-2`.
 
         Args:
-          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
-              is not provided, image must have transparency, which will be used as the mask.
+          image: The image(s) to edit. Must be a supported image file or an array of images. For
+              `gpt-image-1`, each image should be a `png`, `webp`, or `jpg` file less than
+              25MB. For `dall-e-2`, you can only provide one image, and it should be a square
+              `png` file less than 4MB.
 
           prompt: A text description of the desired image(s). The maximum length is 1000
-              characters.
+              characters for `dall-e-2`, and 32000 characters for `gpt-image-1`.
 
           mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
-              indicate where `image` should be edited. Must be a valid PNG file, less than
+              indicate where `image` should be edited. If there are multiple images provided,
+              the mask will be applied on the first image. Must be a valid PNG file, less than
               4MB, and have the same dimensions as `image`.
 
-          model: The model to use for image generation. Only `dall-e-2` is supported at this
-              time.
+          model: The model to use for image generation. Only `dall-e-2` and `gpt-image-1` are
+              supported. Defaults to `dall-e-2` unless a parameter specific to `gpt-image-1`
+              is used.
 
           n: The number of images to generate. Must be between 1 and 10.
 
+          quality: The quality of the image that will be generated. `high`, `medium` and `low` are
+              only supported for `gpt-image-1`. `dall-e-2` only supports `standard` quality.
+              Defaults to `auto`.
+
           response_format: The format in which the generated images are returned. Must be one of `url` or
               `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
+              generated. This parameter is only supported for `dall-e-2`, as `gpt-image-1`
+              will always return base64-encoded images.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024`.
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, and one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
@@ -439,12 +500,13 @@ async def edit(
                 "mask": mask,
                 "model": model,
                 "n": n,
+                "quality": quality,
                 "response_format": response_format,
                 "size": size,
                 "user": user,
             }
         )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
+        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["image", "<array>"], ["mask"]])
         # It should be noted that the actual Content-Type header that will be
         # sent to the server will contain a `boundary` parameter, e.g.
         # multipart/form-data; boundary=---abc--
@@ -463,11 +525,18 @@ async def generate(
         self,
         *,
         prompt: str,
+        background: Optional[Literal["transparent", "opaque", "auto"]] | NotGiven = NOT_GIVEN,
         model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
+        moderation: Optional[Literal["low", "auto"]] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
-        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
+        output_compression: Optional[int] | NotGiven = NOT_GIVEN,
+        output_format: Optional[Literal["png", "jpeg", "webp"]] | NotGiven = NOT_GIVEN,
+        quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
+        size: Optional[
+            Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+        ]
+        | NotGiven = NOT_GIVEN,
         style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -479,32 +548,60 @@ async def generate(
     ) -> ImagesResponse:
         """
         Creates an image given a prompt.
+        [Learn more](https://platform.openai.com/docs/guides/images).
 
         Args:
-          prompt: A text description of the desired image(s). The maximum length is 1000
-              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
+          prompt: A text description of the desired image(s). The maximum length is 32000
+              characters for `gpt-image-1`, 1000 characters for `dall-e-2` and 4000 characters
+              for `dall-e-3`.
+
+          background: Allows to set transparency for the background of the generated image(s). This
+              parameter is only supported for `gpt-image-1`. Must be one of `transparent`,
+              `opaque` or `auto` (default value). When `auto` is used, the model will
+              automatically determine the best background for the image.
+
+              If `transparent`, the output format needs to support transparency, so it should
+              be set to either `png` (default value) or `webp`.
 
-          model: The model to use for image generation.
+          model: The model to use for image generation. One of `dall-e-2`, `dall-e-3`, or
+              `gpt-image-1`. Defaults to `dall-e-2` unless a parameter specific to
+              `gpt-image-1` is used.
+
+          moderation: Control the content-moderation level for images generated by `gpt-image-1`. Must
+              be either `low` for less restrictive filtering or `auto` (default value).
 
           n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
               `n=1` is supported.
 
-          quality: The quality of the image that will be generated. `hd` creates images with finer
-              details and greater consistency across the image. This param is only supported
-              for `dall-e-3`.
+          output_compression: The compression level (0-100%) for the generated images. This parameter is only
+              supported for `gpt-image-1` with the `webp` or `jpeg` output formats, and
+              defaults to 100.
 
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
+          output_format: The format in which the generated images are returned. This parameter is only
+              supported for `gpt-image-1`. Must be one of `png`, `jpeg`, or `webp`.
 
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
-              `1024x1792` for `dall-e-3` models.
+          quality: The quality of the image that will be generated.
+
+              - `auto` (default value) will automatically select the best quality for the
+                given model.
+              - `high`, `medium` and `low` are supported for `gpt-image-1`.
+              - `hd` and `standard` are supported for `dall-e-3`.
+              - `standard` is the only option for `dall-e-2`.
+
+          response_format: The format in which generated images with `dall-e-2` and `dall-e-3` are
+              returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes
+              after the image has been generated. This parameter isn't supported for
+              `gpt-image-1` which will always return base64-encoded images.
+
+          size: The size of the generated images. Must be one of `1024x1024`, `1536x1024`
+              (landscape), `1024x1536` (portrait), or `auto` (default value) for
+              `gpt-image-1`, one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`, and
+              one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3`.
 
-          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
-              causes the model to lean towards generating hyper-real and dramatic images.
-              Natural causes the model to produce more natural, less hyper-real looking
-              images. This param is only supported for `dall-e-3`.
+          style: The style of the generated images. This parameter is only supported for
+              `dall-e-3`. Must be one of `vivid` or `natural`. Vivid causes the model to lean
+              towards generating hyper-real and dramatic images. Natural causes the model to
+              produce more natural, less hyper-real looking images.
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
@@ -523,8 +620,12 @@ async def generate(
             body=await async_maybe_transform(
                 {
                     "prompt": prompt,
+                    "background": background,
                     "model": model,
+                    "moderation": moderation,
                     "n": n,
+                    "output_compression": output_compression,
+                    "output_format": output_format,
                     "quality": quality,
                     "response_format": response_format,
                     "size": size,
diff --git a/src/openai/resources/moderations.py b/src/openai/resources/moderations.py
index a8f03142bc..f7a8b52c23 100644
--- a/src/openai/resources/moderations.py
+++ b/src/openai/resources/moderations.py
@@ -9,10 +9,7 @@
 from .. import _legacy_response
 from ..types import moderation_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from .._utils import maybe_transform, async_maybe_transform
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/resources/responses/responses.py b/src/openai/resources/responses/responses.py
index 29ed3de42a..4a0687f9f3 100644
--- a/src/openai/resources/responses/responses.py
+++ b/src/openai/resources/responses/responses.py
@@ -10,12 +10,7 @@
 
 from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
-from ..._utils import (
-    is_given,
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
+from ..._utils import is_given, required_args, maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -89,6 +84,7 @@ def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -130,7 +126,7 @@ def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -174,6 +170,24 @@ def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           stream: If set to true, the model response data will be streamed to the client as it is
@@ -255,6 +269,7 @@ def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -295,7 +310,7 @@ def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -346,6 +361,24 @@ def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -420,6 +453,7 @@ def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -460,7 +494,7 @@ def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -511,6 +545,24 @@ def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -584,6 +636,7 @@ def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -613,6 +666,7 @@ def create(
                     "parallel_tool_calls": parallel_tool_calls,
                     "previous_response_id": previous_response_id,
                     "reasoning": reasoning,
+                    "service_tier": service_tier,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
@@ -623,7 +677,9 @@ def create(
                     "truncation": truncation,
                     "user": user,
                 },
-                response_create_params.ResponseCreateParams,
+                response_create_params.ResponseCreateParamsStreaming
+                if stream
+                else response_create_params.ResponseCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
@@ -901,6 +957,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -942,7 +999,7 @@ async def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -986,6 +1043,24 @@ async def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           stream: If set to true, the model response data will be streamed to the client as it is
@@ -1067,6 +1142,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -1107,7 +1183,7 @@ async def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1158,6 +1234,24 @@ async def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -1232,6 +1326,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
@@ -1272,7 +1367,7 @@ async def create(
               - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
               - [Function calling](https://platform.openai.com/docs/guides/function-calling)
 
-          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+          model: Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
               wide range of models with different capabilities, performance characteristics,
               and price points. Refer to the
               [model guide](https://platform.openai.com/docs/models) to browse and compare
@@ -1323,6 +1418,24 @@ async def create(
               Configuration options for
               [reasoning models](https://platform.openai.com/docs/guides/reasoning).
 
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - If set to 'flex', the request will be processed with the Flex Processing
+                service tier.
+                [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
           store: Whether to store the generated model response for later retrieval via API.
 
           temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
@@ -1396,6 +1509,7 @@ async def create(
         parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
         previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
         reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
         store: Optional[bool] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1425,6 +1539,7 @@ async def create(
                     "parallel_tool_calls": parallel_tool_calls,
                     "previous_response_id": previous_response_id,
                     "reasoning": reasoning,
+                    "service_tier": service_tier,
                     "store": store,
                     "stream": stream,
                     "temperature": temperature,
@@ -1435,7 +1550,9 @@ async def create(
                     "truncation": truncation,
                     "user": user,
                 },
-                response_create_params.ResponseCreateParams,
+                response_create_params.ResponseCreateParamsStreaming
+                if stream
+                else response_create_params.ResponseCreateParamsNonStreaming,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
diff --git a/src/openai/resources/uploads/parts.py b/src/openai/resources/uploads/parts.py
index 777469ac8e..a32f4eb1d2 100644
--- a/src/openai/resources/uploads/parts.py
+++ b/src/openai/resources/uploads/parts.py
@@ -8,12 +8,7 @@
 
 from ... import _legacy_response
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
+from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/resources/uploads/uploads.py b/src/openai/resources/uploads/uploads.py
index 9297dbc2c3..ecfcee4800 100644
--- a/src/openai/resources/uploads/uploads.py
+++ b/src/openai/resources/uploads/uploads.py
@@ -23,10 +23,7 @@
 )
 from ...types import FilePurpose, upload_create_params, upload_complete_params
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ..._utils import maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/resources/vector_stores/file_batches.py b/src/openai/resources/vector_stores/file_batches.py
index 9b4b64d35e..4dd4430b71 100644
--- a/src/openai/resources/vector_stores/file_batches.py
+++ b/src/openai/resources/vector_stores/file_batches.py
@@ -13,11 +13,7 @@
 from ... import _legacy_response
 from ...types import FileChunkingStrategyParam
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import (
-    is_given,
-    maybe_transform,
-    async_maybe_transform,
-)
+from ..._utils import is_given, maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/resources/vector_stores/files.py b/src/openai/resources/vector_stores/files.py
index 7d93798adf..f860384629 100644
--- a/src/openai/resources/vector_stores/files.py
+++ b/src/openai/resources/vector_stores/files.py
@@ -10,11 +10,7 @@
 from ... import _legacy_response
 from ...types import FileChunkingStrategyParam
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import (
-    is_given,
-    maybe_transform,
-    async_maybe_transform,
-)
+from ..._utils import is_given, maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/resources/vector_stores/vector_stores.py b/src/openai/resources/vector_stores/vector_stores.py
index aaa6ed2757..9fc17b183b 100644
--- a/src/openai/resources/vector_stores/vector_stores.py
+++ b/src/openai/resources/vector_stores/vector_stores.py
@@ -24,10 +24,7 @@
     vector_store_update_params,
 )
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
+from ..._utils import maybe_transform, async_maybe_transform
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
diff --git a/src/openai/types/__init__.py b/src/openai/types/__init__.py
index 11761534c9..57c91811b9 100644
--- a/src/openai/types/__init__.py
+++ b/src/openai/types/__init__.py
@@ -38,22 +38,32 @@
 from .embedding_model import EmbeddingModel as EmbeddingModel
 from .images_response import ImagesResponse as ImagesResponse
 from .completion_usage import CompletionUsage as CompletionUsage
+from .eval_list_params import EvalListParams as EvalListParams
 from .file_list_params import FileListParams as FileListParams
 from .moderation_model import ModerationModel as ModerationModel
 from .batch_list_params import BatchListParams as BatchListParams
 from .completion_choice import CompletionChoice as CompletionChoice
 from .image_edit_params import ImageEditParams as ImageEditParams
+from .eval_create_params import EvalCreateParams as EvalCreateParams
+from .eval_list_response import EvalListResponse as EvalListResponse
+from .eval_update_params import EvalUpdateParams as EvalUpdateParams
 from .file_create_params import FileCreateParams as FileCreateParams
 from .batch_create_params import BatchCreateParams as BatchCreateParams
 from .batch_request_counts import BatchRequestCounts as BatchRequestCounts
+from .eval_create_response import EvalCreateResponse as EvalCreateResponse
+from .eval_delete_response import EvalDeleteResponse as EvalDeleteResponse
+from .eval_update_response import EvalUpdateResponse as EvalUpdateResponse
 from .upload_create_params import UploadCreateParams as UploadCreateParams
 from .vector_store_deleted import VectorStoreDeleted as VectorStoreDeleted
 from .audio_response_format import AudioResponseFormat as AudioResponseFormat
 from .image_generate_params import ImageGenerateParams as ImageGenerateParams
+from .eval_retrieve_response import EvalRetrieveResponse as EvalRetrieveResponse
 from .file_chunking_strategy import FileChunkingStrategy as FileChunkingStrategy
 from .upload_complete_params import UploadCompleteParams as UploadCompleteParams
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
+from .eval_label_model_grader import EvalLabelModelGrader as EvalLabelModelGrader
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .eval_string_check_grader import EvalStringCheckGrader as EvalStringCheckGrader
 from .moderation_create_params import ModerationCreateParams as ModerationCreateParams
 from .vector_store_list_params import VectorStoreListParams as VectorStoreListParams
 from .create_embedding_response import CreateEmbeddingResponse as CreateEmbeddingResponse
@@ -61,18 +71,25 @@
 from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCreateParams
 from .vector_store_search_params import VectorStoreSearchParams as VectorStoreSearchParams
 from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
+from .eval_text_similarity_grader import EvalTextSimilarityGrader as EvalTextSimilarityGrader
 from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam
 from .file_chunking_strategy_param import FileChunkingStrategyParam as FileChunkingStrategyParam
 from .vector_store_search_response import VectorStoreSearchResponse as VectorStoreSearchResponse
 from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions
 from .image_create_variation_params import ImageCreateVariationParams as ImageCreateVariationParams
 from .static_file_chunking_strategy import StaticFileChunkingStrategy as StaticFileChunkingStrategy
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig as EvalCustomDataSourceConfig
+from .eval_string_check_grader_param import EvalStringCheckGraderParam as EvalStringCheckGraderParam
 from .moderation_image_url_input_param import ModerationImageURLInputParam as ModerationImageURLInputParam
 from .auto_file_chunking_strategy_param import AutoFileChunkingStrategyParam as AutoFileChunkingStrategyParam
+from .eval_text_similarity_grader_param import EvalTextSimilarityGraderParam as EvalTextSimilarityGraderParam
 from .moderation_multi_modal_input_param import ModerationMultiModalInputParam as ModerationMultiModalInputParam
 from .other_file_chunking_strategy_object import OtherFileChunkingStrategyObject as OtherFileChunkingStrategyObject
 from .static_file_chunking_strategy_param import StaticFileChunkingStrategyParam as StaticFileChunkingStrategyParam
 from .static_file_chunking_strategy_object import StaticFileChunkingStrategyObject as StaticFileChunkingStrategyObject
+from .eval_stored_completions_data_source_config import (
+    EvalStoredCompletionsDataSourceConfig as EvalStoredCompletionsDataSourceConfig,
+)
 from .static_file_chunking_strategy_object_param import (
     StaticFileChunkingStrategyObjectParam as StaticFileChunkingStrategyObjectParam,
 )
diff --git a/src/openai/types/audio/transcription_word.py b/src/openai/types/audio/transcription_word.py
index 969da32509..2ce682f957 100644
--- a/src/openai/types/audio/transcription_word.py
+++ b/src/openai/types/audio/transcription_word.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from ..._models import BaseModel
 
 __all__ = ["TranscriptionWord"]
diff --git a/src/openai/types/audio/translation.py b/src/openai/types/audio/translation.py
index 7c0e905189..efc56f7f9b 100644
--- a/src/openai/types/audio/translation.py
+++ b/src/openai/types/audio/translation.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from ..._models import BaseModel
 
 __all__ = ["Translation"]
diff --git a/src/openai/types/batch_request_counts.py b/src/openai/types/batch_request_counts.py
index 7e1d49fb88..068b071af1 100644
--- a/src/openai/types/batch_request_counts.py
+++ b/src/openai/types/batch_request_counts.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from .._models import BaseModel
 
 __all__ = ["BatchRequestCounts"]
diff --git a/src/openai/types/beta/assistant_tool_choice_function.py b/src/openai/types/beta/assistant_tool_choice_function.py
index 0c896d8087..87f38310ca 100644
--- a/src/openai/types/beta/assistant_tool_choice_function.py
+++ b/src/openai/types/beta/assistant_tool_choice_function.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from ..._models import BaseModel
 
 __all__ = ["AssistantToolChoiceFunction"]
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index d3ec7614fd..b28094a6a5 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -36,6 +36,12 @@ class AssistantUpdateParams(TypedDict, total=False):
     model: Union[
         str,
         Literal[
+            "gpt-4.1",
+            "gpt-4.1-mini",
+            "gpt-4.1-nano",
+            "gpt-4.1-2025-04-14",
+            "gpt-4.1-mini-2025-04-14",
+            "gpt-4.1-nano-2025-04-14",
             "o3-mini",
             "o3-mini-2025-01-31",
             "o1",
diff --git a/src/openai/types/beta/realtime/realtime_client_event.py b/src/openai/types/beta/realtime/realtime_client_event.py
index f962a505cd..5f4858d688 100644
--- a/src/openai/types/beta/realtime/realtime_client_event.py
+++ b/src/openai/types/beta/realtime/realtime_client_event.py
@@ -1,9 +1,10 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Union
-from typing_extensions import Annotated, TypeAlias
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
 
 from ...._utils import PropertyInfo
+from ...._models import BaseModel
 from .session_update_event import SessionUpdateEvent
 from .response_cancel_event import ResponseCancelEvent
 from .response_create_event import ResponseCreateEvent
@@ -16,7 +17,16 @@
 from .conversation_item_retrieve_event import ConversationItemRetrieveEvent
 from .conversation_item_truncate_event import ConversationItemTruncateEvent
 
-__all__ = ["RealtimeClientEvent"]
+__all__ = ["RealtimeClientEvent", "OutputAudioBufferClear"]
+
+
+class OutputAudioBufferClear(BaseModel):
+    type: Literal["output_audio_buffer.clear"]
+    """The event type, must be `output_audio_buffer.clear`."""
+
+    event_id: Optional[str] = None
+    """The unique ID of the client event used for error handling."""
+
 
 RealtimeClientEvent: TypeAlias = Annotated[
     Union[
@@ -26,6 +36,7 @@
         ConversationItemTruncateEvent,
         InputAudioBufferAppendEvent,
         InputAudioBufferClearEvent,
+        OutputAudioBufferClear,
         InputAudioBufferCommitEvent,
         ResponseCancelEvent,
         ResponseCreateEvent,
diff --git a/src/openai/types/beta/realtime/realtime_client_event_param.py b/src/openai/types/beta/realtime/realtime_client_event_param.py
index 6fdba4b87c..e7dfba241e 100644
--- a/src/openai/types/beta/realtime/realtime_client_event_param.py
+++ b/src/openai/types/beta/realtime/realtime_client_event_param.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from typing import Union
-from typing_extensions import TypeAlias
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from .session_update_event_param import SessionUpdateEventParam
 from .response_cancel_event_param import ResponseCancelEventParam
@@ -17,7 +17,16 @@
 from .conversation_item_retrieve_event_param import ConversationItemRetrieveEventParam
 from .conversation_item_truncate_event_param import ConversationItemTruncateEventParam
 
-__all__ = ["RealtimeClientEventParam"]
+__all__ = ["RealtimeClientEventParam", "OutputAudioBufferClear"]
+
+
+class OutputAudioBufferClear(TypedDict, total=False):
+    type: Required[Literal["output_audio_buffer.clear"]]
+    """The event type, must be `output_audio_buffer.clear`."""
+
+    event_id: str
+    """The unique ID of the client event used for error handling."""
+
 
 RealtimeClientEventParam: TypeAlias = Union[
     ConversationItemCreateEventParam,
@@ -26,6 +35,7 @@
     ConversationItemTruncateEventParam,
     InputAudioBufferAppendEventParam,
     InputAudioBufferClearEventParam,
+    OutputAudioBufferClear,
     InputAudioBufferCommitEventParam,
     ResponseCancelEventParam,
     ResponseCreateEventParam,
diff --git a/src/openai/types/beta/realtime/realtime_server_event.py b/src/openai/types/beta/realtime/realtime_server_event.py
index ba1d324445..c12f5df977 100644
--- a/src/openai/types/beta/realtime/realtime_server_event.py
+++ b/src/openai/types/beta/realtime/realtime_server_event.py
@@ -39,7 +39,13 @@
     ConversationItemInputAudioTranscriptionCompletedEvent,
 )
 
-__all__ = ["RealtimeServerEvent", "ConversationItemRetrieved"]
+__all__ = [
+    "RealtimeServerEvent",
+    "ConversationItemRetrieved",
+    "OutputAudioBufferStarted",
+    "OutputAudioBufferStopped",
+    "OutputAudioBufferCleared",
+]
 
 
 class ConversationItemRetrieved(BaseModel):
@@ -53,6 +59,39 @@ class ConversationItemRetrieved(BaseModel):
     """The event type, must be `conversation.item.retrieved`."""
 
 
+class OutputAudioBufferStarted(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response_id: str
+    """The unique ID of the response that produced the audio."""
+
+    type: Literal["output_audio_buffer.started"]
+    """The event type, must be `output_audio_buffer.started`."""
+
+
+class OutputAudioBufferStopped(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response_id: str
+    """The unique ID of the response that produced the audio."""
+
+    type: Literal["output_audio_buffer.stopped"]
+    """The event type, must be `output_audio_buffer.stopped`."""
+
+
+class OutputAudioBufferCleared(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response_id: str
+    """The unique ID of the response that produced the audio."""
+
+    type: Literal["output_audio_buffer.cleared"]
+    """The event type, must be `output_audio_buffer.cleared`."""
+
+
 RealtimeServerEvent: TypeAlias = Annotated[
     Union[
         ConversationCreatedEvent,
@@ -86,6 +125,9 @@ class ConversationItemRetrieved(BaseModel):
         SessionCreatedEvent,
         SessionUpdatedEvent,
         TranscriptionSessionUpdatedEvent,
+        OutputAudioBufferStarted,
+        OutputAudioBufferStopped,
+        OutputAudioBufferCleared,
     ],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index 065c390f4e..d813710579 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -6,8 +6,7 @@
 from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from ..shared.chat_model import ChatModel
-from .function_tool_param import FunctionToolParam
-from .file_search_tool_param import FileSearchToolParam
+from .assistant_tool_param import AssistantToolParam
 from ..shared_params.metadata import Metadata
 from .code_interpreter_tool_param import CodeInterpreterToolParam
 from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
@@ -32,7 +31,6 @@
     "ToolResources",
     "ToolResourcesCodeInterpreter",
     "ToolResourcesFileSearch",
-    "Tool",
     "TruncationStrategy",
     "ThreadCreateAndRunParamsNonStreaming",
     "ThreadCreateAndRunParamsStreaming",
@@ -153,7 +151,7 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     tool requires a list of vector store IDs.
     """
 
-    tools: Optional[Iterable[Tool]]
+    tools: Optional[Iterable[AssistantToolParam]]
     """Override the tools the assistant can use for this run.
 
     This is useful for modifying the behavior on a per-run basis.
@@ -360,9 +358,6 @@ class ToolResources(TypedDict, total=False):
     file_search: ToolResourcesFileSearch
 
 
-Tool: TypeAlias = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
-
-
 class TruncationStrategy(TypedDict, total=False):
     type: Required[Literal["auto", "last_messages"]]
     """The truncation strategy to use for the thread.
diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py
index cb812a2702..3a235f89a5 100644
--- a/src/openai/types/chat/chat_completion.py
+++ b/src/openai/types/chat/chat_completion.py
@@ -59,8 +59,26 @@ class ChatCompletion(BaseModel):
     object: Literal["chat.completion"]
     """The object type, which is always `chat.completion`."""
 
-    service_tier: Optional[Literal["scale", "default"]] = None
-    """The service tier used for processing the request."""
+    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
 
     system_fingerprint: Optional[str] = None
     """This fingerprint represents the backend configuration that the model runs with.
diff --git a/src/openai/types/chat/chat_completion_audio.py b/src/openai/types/chat/chat_completion_audio.py
index dd15508ebb..232d60563d 100644
--- a/src/openai/types/chat/chat_completion_audio.py
+++ b/src/openai/types/chat/chat_completion_audio.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from ..._models import BaseModel
 
 __all__ = ["ChatCompletionAudio"]
diff --git a/src/openai/types/chat/chat_completion_audio_param.py b/src/openai/types/chat/chat_completion_audio_param.py
index b902f2667f..25caada177 100644
--- a/src/openai/types/chat/chat_completion_audio_param.py
+++ b/src/openai/types/chat/chat_completion_audio_param.py
@@ -9,7 +9,7 @@
 
 
 class ChatCompletionAudioParam(TypedDict, total=False):
-    format: Required[Literal["wav", "mp3", "flac", "opus", "pcm16"]]
+    format: Required[Literal["wav", "aac", "mp3", "flac", "opus", "pcm16"]]
     """Specifies the output audio format.
 
     Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
@@ -22,6 +22,6 @@ class ChatCompletionAudioParam(TypedDict, total=False):
     ]
     """The voice the model uses to respond.
 
-    Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, and
-    `shimmer`.
+    Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `nova`,
+    `onyx`, `sage`, and `shimmer`.
     """
diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py
index 31b9cb5456..6fe996dd95 100644
--- a/src/openai/types/chat/chat_completion_chunk.py
+++ b/src/openai/types/chat/chat_completion_chunk.py
@@ -128,8 +128,26 @@ class ChatCompletionChunk(BaseModel):
     object: Literal["chat.completion.chunk"]
     """The object type, which is always `chat.completion.chunk`."""
 
-    service_tier: Optional[Literal["scale", "default"]] = None
-    """The service tier used for processing the request."""
+    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
 
     system_fingerprint: Optional[str] = None
     """
diff --git a/src/openai/types/chat/chat_completion_reasoning_effort.py b/src/openai/types/chat/chat_completion_reasoning_effort.py
index e4785c90bf..42a980c5b8 100644
--- a/src/openai/types/chat/chat_completion_reasoning_effort.py
+++ b/src/openai/types/chat/chat_completion_reasoning_effort.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from ..shared.reasoning_effort import ReasoningEffort
 
 __all__ = ["ChatCompletionReasoningEffort"]
diff --git a/src/openai/types/chat/chat_completion_store_message.py b/src/openai/types/chat/chat_completion_store_message.py
index 95adc08af8..8dc093f7b8 100644
--- a/src/openai/types/chat/chat_completion_store_message.py
+++ b/src/openai/types/chat/chat_completion_store_message.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from .chat_completion_message import ChatCompletionMessage
 
 __all__ = ["ChatCompletionStoreMessage"]
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index 05103fba91..60d5f53cdd 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -45,7 +45,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     model: Required[Union[str, ChatModel]]
-    """Model ID used to generate the response, like `gpt-4o` or `o1`.
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
 
     OpenAI offers a wide range of models with different capabilities, performance
     characteristics, and price points. Refer to the
@@ -123,7 +123,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     This value is now deprecated in favor of `max_completion_tokens`, and is not
     compatible with
-    [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+    [o-series models](https://platform.openai.com/docs/guides/reasoning).
     """
 
     metadata: Optional[Metadata]
@@ -208,7 +208,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     in the backend.
     """
 
-    service_tier: Optional[Literal["auto", "default"]]
+    service_tier: Optional[Literal["auto", "default", "flex"]]
     """Specifies the latency tier to use for processing the request.
 
     This parameter is relevant for customers subscribed to the scale tier service:
@@ -220,6 +220,9 @@ class CompletionCreateParamsBase(TypedDict, total=False):
       latency guarentee.
     - If set to 'default', the request will be processed using the default service
       tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
     - When not set, the default behavior is 'auto'.
 
     When this parameter is set, the response body will include the `service_tier`
@@ -227,9 +230,10 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     stop: Union[Optional[str], List[str], None]
-    """Up to 4 sequences where the API will stop generating further tokens.
+    """Not supported with latest reasoning models `o3` and `o4-mini`.
 
-    The returned text will not contain the stop sequence.
+    Up to 4 sequences where the API will stop generating further tokens. The
+    returned text will not contain the stop sequence.
     """
 
     store: Optional[bool]
diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py
index 9304d195d6..f3b0e310cc 100644
--- a/src/openai/types/chat_model.py
+++ b/src/openai/types/chat_model.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from .shared import chat_model
 
 __all__ = ["ChatModel"]
diff --git a/src/openai/types/completion_create_params.py b/src/openai/types/completion_create_params.py
index fdb1680d26..6ae20cff83 100644
--- a/src/openai/types/completion_create_params.py
+++ b/src/openai/types/completion_create_params.py
@@ -120,9 +120,10 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
 
     stop: Union[Optional[str], List[str], None]
-    """Up to 4 sequences where the API will stop generating further tokens.
+    """Not supported with latest reasoning models `o3` and `o4-mini`.
 
-    The returned text will not contain the stop sequence.
+    Up to 4 sequences where the API will stop generating further tokens. The
+    returned text will not contain the stop sequence.
     """
 
     stream_options: Optional[ChatCompletionStreamOptionsParam]
diff --git a/src/openai/types/eval_create_params.py b/src/openai/types/eval_create_params.py
new file mode 100644
index 0000000000..03f44f2c8c
--- /dev/null
+++ b/src/openai/types/eval_create_params.py
@@ -0,0 +1,215 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .shared_params.metadata import Metadata
+from .eval_string_check_grader_param import EvalStringCheckGraderParam
+from .eval_text_similarity_grader_param import EvalTextSimilarityGraderParam
+from .responses.response_input_text_param import ResponseInputTextParam
+
+__all__ = [
+    "EvalCreateParams",
+    "DataSourceConfig",
+    "DataSourceConfigCustom",
+    "DataSourceConfigLogs",
+    "TestingCriterion",
+    "TestingCriterionLabelModel",
+    "TestingCriterionLabelModelInput",
+    "TestingCriterionLabelModelInputSimpleInputMessage",
+    "TestingCriterionLabelModelInputEvalItem",
+    "TestingCriterionLabelModelInputEvalItemContent",
+    "TestingCriterionLabelModelInputEvalItemContentOutputText",
+    "TestingCriterionPython",
+    "TestingCriterionScoreModel",
+    "TestingCriterionScoreModelInput",
+    "TestingCriterionScoreModelInputContent",
+    "TestingCriterionScoreModelInputContentOutputText",
+]
+
+
+class EvalCreateParams(TypedDict, total=False):
+    data_source_config: Required[DataSourceConfig]
+    """The configuration for the data source used for the evaluation runs."""
+
+    testing_criteria: Required[Iterable[TestingCriterion]]
+    """A list of graders for all eval runs in this group."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+
+class DataSourceConfigCustom(TypedDict, total=False):
+    item_schema: Required[Dict[str, object]]
+    """The json schema for each row in the data source."""
+
+    type: Required[Literal["custom"]]
+    """The type of data source. Always `custom`."""
+
+    include_sample_schema: bool
+    """
+    Whether the eval should expect you to populate the sample namespace (ie, by
+    generating responses off of your data source)
+    """
+
+
+class DataSourceConfigLogs(TypedDict, total=False):
+    type: Required[Literal["logs"]]
+    """The type of data source. Always `logs`."""
+
+    metadata: Dict[str, object]
+    """Metadata filters for the logs data source."""
+
+
+DataSourceConfig: TypeAlias = Union[DataSourceConfigCustom, DataSourceConfigLogs]
+
+
+class TestingCriterionLabelModelInputSimpleInputMessage(TypedDict, total=False):
+    content: Required[str]
+    """The content of the message."""
+
+    role: Required[str]
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class TestingCriterionLabelModelInputEvalItemContentOutputText(TypedDict, total=False):
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+TestingCriterionLabelModelInputEvalItemContent: TypeAlias = Union[
+    str, ResponseInputTextParam, TestingCriterionLabelModelInputEvalItemContentOutputText
+]
+
+
+class TestingCriterionLabelModelInputEvalItem(TypedDict, total=False):
+    content: Required[TestingCriterionLabelModelInputEvalItemContent]
+    """Text inputs to the model - can contain template strings."""
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+TestingCriterionLabelModelInput: TypeAlias = Union[
+    TestingCriterionLabelModelInputSimpleInputMessage, TestingCriterionLabelModelInputEvalItem
+]
+
+
+class TestingCriterionLabelModel(TypedDict, total=False):
+    input: Required[Iterable[TestingCriterionLabelModelInput]]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the "item" namespace, ie {{item.name}}.
+    """
+
+    labels: Required[List[str]]
+    """The labels to classify to each item in the evaluation."""
+
+    model: Required[str]
+    """The model to use for the evaluation. Must support structured outputs."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    passing_labels: Required[List[str]]
+    """The labels that indicate a passing result. Must be a subset of labels."""
+
+    type: Required[Literal["label_model"]]
+    """The object type, which is always `label_model`."""
+
+
+class TestingCriterionPython(TypedDict, total=False):
+    name: Required[str]
+    """The name of the grader."""
+
+    source: Required[str]
+    """The source code of the python script."""
+
+    type: Required[Literal["python"]]
+    """The object type, which is always `python`."""
+
+    image_tag: str
+    """The image tag to use for the python script."""
+
+    pass_threshold: float
+    """The threshold for the score."""
+
+
+class TestingCriterionScoreModelInputContentOutputText(TypedDict, total=False):
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+TestingCriterionScoreModelInputContent: TypeAlias = Union[
+    str, ResponseInputTextParam, TestingCriterionScoreModelInputContentOutputText
+]
+
+
+class TestingCriterionScoreModelInput(TypedDict, total=False):
+    content: Required[TestingCriterionScoreModelInputContent]
+    """Text inputs to the model - can contain template strings."""
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+class TestingCriterionScoreModel(TypedDict, total=False):
+    input: Required[Iterable[TestingCriterionScoreModelInput]]
+    """The input text. This may include template strings."""
+
+    model: Required[str]
+    """The model to use for the evaluation."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    type: Required[Literal["score_model"]]
+    """The object type, which is always `score_model`."""
+
+    pass_threshold: float
+    """The threshold for the score."""
+
+    range: Iterable[float]
+    """The range of the score. Defaults to `[0, 1]`."""
+
+    sampling_params: object
+    """The sampling parameters for the model."""
+
+
+TestingCriterion: TypeAlias = Union[
+    TestingCriterionLabelModel,
+    EvalStringCheckGraderParam,
+    EvalTextSimilarityGraderParam,
+    TestingCriterionPython,
+    TestingCriterionScoreModel,
+]
diff --git a/src/openai/types/eval_create_response.py b/src/openai/types/eval_create_response.py
new file mode 100644
index 0000000000..6d77a81870
--- /dev/null
+++ b/src/openai/types/eval_create_response.py
@@ -0,0 +1,142 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .eval_label_model_grader import EvalLabelModelGrader
+from .eval_string_check_grader import EvalStringCheckGrader
+from .eval_text_similarity_grader import EvalTextSimilarityGrader
+from .responses.response_input_text import ResponseInputText
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalCreateResponse",
+    "DataSourceConfig",
+    "TestingCriterion",
+    "TestingCriterionPython",
+    "TestingCriterionScoreModel",
+    "TestingCriterionScoreModelInput",
+    "TestingCriterionScoreModelInputContent",
+    "TestingCriterionScoreModelInputContentOutputText",
+]
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+]
+
+
+class TestingCriterionPython(BaseModel):
+    __test__ = False
+    name: str
+    """The name of the grader."""
+
+    source: str
+    """The source code of the python script."""
+
+    type: Literal["python"]
+    """The object type, which is always `python`."""
+
+    image_tag: Optional[str] = None
+    """The image tag to use for the python script."""
+
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionScoreModelInputContentOutputText(BaseModel):
+    __test__ = False
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+TestingCriterionScoreModelInputContent: TypeAlias = Union[
+    str, ResponseInputText, TestingCriterionScoreModelInputContentOutputText
+]
+
+
+class TestingCriterionScoreModelInput(BaseModel):
+    __test__ = False
+    content: TestingCriterionScoreModelInputContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+class TestingCriterionScoreModel(BaseModel):
+    __test__ = False
+    input: List[TestingCriterionScoreModelInput]
+    """The input text. This may include template strings."""
+
+    model: str
+    """The model to use for the evaluation."""
+
+    name: str
+    """The name of the grader."""
+
+    type: Literal["score_model"]
+    """The object type, which is always `score_model`."""
+
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+    range: Optional[List[float]] = None
+    """The range of the score. Defaults to `[0, 1]`."""
+
+    sampling_params: Optional[object] = None
+    """The sampling parameters for the model."""
+
+
+TestingCriterion: TypeAlias = Annotated[
+    Union[
+        EvalLabelModelGrader,
+        EvalStringCheckGrader,
+        EvalTextSimilarityGrader,
+        TestingCriterionPython,
+        TestingCriterionScoreModel,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class EvalCreateResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/eval_custom_data_source_config.py b/src/openai/types/eval_custom_data_source_config.py
new file mode 100644
index 0000000000..d99701cc71
--- /dev/null
+++ b/src/openai/types/eval_custom_data_source_config.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+
+__all__ = ["EvalCustomDataSourceConfig"]
+
+
+class EvalCustomDataSourceConfig(BaseModel):
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["custom"]
+    """The type of data source. Always `custom`."""
diff --git a/src/openai/types/eval_delete_response.py b/src/openai/types/eval_delete_response.py
new file mode 100644
index 0000000000..a27261e242
--- /dev/null
+++ b/src/openai/types/eval_delete_response.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .._models import BaseModel
+
+__all__ = ["EvalDeleteResponse"]
+
+
+class EvalDeleteResponse(BaseModel):
+    deleted: bool
+
+    eval_id: str
+
+    object: str
diff --git a/src/openai/types/eval_label_model_grader.py b/src/openai/types/eval_label_model_grader.py
new file mode 100644
index 0000000000..40e6bda140
--- /dev/null
+++ b/src/openai/types/eval_label_model_grader.py
@@ -0,0 +1,53 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from .._models import BaseModel
+from .responses.response_input_text import ResponseInputText
+
+__all__ = ["EvalLabelModelGrader", "Input", "InputContent", "InputContentOutputText"]
+
+
+class InputContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+InputContent: TypeAlias = Union[str, ResponseInputText, InputContentOutputText]
+
+
+class Input(BaseModel):
+    content: InputContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+class EvalLabelModelGrader(BaseModel):
+    input: List[Input]
+
+    labels: List[str]
+    """The labels to assign to each item in the evaluation."""
+
+    model: str
+    """The model to use for the evaluation. Must support structured outputs."""
+
+    name: str
+    """The name of the grader."""
+
+    passing_labels: List[str]
+    """The labels that indicate a passing result. Must be a subset of labels."""
+
+    type: Literal["label_model"]
+    """The object type, which is always `label_model`."""
diff --git a/src/openai/types/eval_list_params.py b/src/openai/types/eval_list_params.py
new file mode 100644
index 0000000000..d9a12d0ddf
--- /dev/null
+++ b/src/openai/types/eval_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["EvalListParams"]
+
+
+class EvalListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last eval from the previous pagination request."""
+
+    limit: int
+    """Number of evals to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for evals by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order.
+    """
+
+    order_by: Literal["created_at", "updated_at"]
+    """Evals can be ordered by creation time or last updated time.
+
+    Use `created_at` for creation time or `updated_at` for last updated time.
+    """
diff --git a/src/openai/types/eval_list_response.py b/src/openai/types/eval_list_response.py
new file mode 100644
index 0000000000..8c7e9c5588
--- /dev/null
+++ b/src/openai/types/eval_list_response.py
@@ -0,0 +1,142 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .eval_label_model_grader import EvalLabelModelGrader
+from .eval_string_check_grader import EvalStringCheckGrader
+from .eval_text_similarity_grader import EvalTextSimilarityGrader
+from .responses.response_input_text import ResponseInputText
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalListResponse",
+    "DataSourceConfig",
+    "TestingCriterion",
+    "TestingCriterionPython",
+    "TestingCriterionScoreModel",
+    "TestingCriterionScoreModelInput",
+    "TestingCriterionScoreModelInputContent",
+    "TestingCriterionScoreModelInputContentOutputText",
+]
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+]
+
+
+class TestingCriterionPython(BaseModel):
+    __test__ = False
+    name: str
+    """The name of the grader."""
+
+    source: str
+    """The source code of the python script."""
+
+    type: Literal["python"]
+    """The object type, which is always `python`."""
+
+    image_tag: Optional[str] = None
+    """The image tag to use for the python script."""
+
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionScoreModelInputContentOutputText(BaseModel):
+    __test__ = False
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+TestingCriterionScoreModelInputContent: TypeAlias = Union[
+    str, ResponseInputText, TestingCriterionScoreModelInputContentOutputText
+]
+
+
+class TestingCriterionScoreModelInput(BaseModel):
+    __test__ = False
+    content: TestingCriterionScoreModelInputContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+class TestingCriterionScoreModel(BaseModel):
+    __test__ = False
+    input: List[TestingCriterionScoreModelInput]
+    """The input text. This may include template strings."""
+
+    model: str
+    """The model to use for the evaluation."""
+
+    name: str
+    """The name of the grader."""
+
+    type: Literal["score_model"]
+    """The object type, which is always `score_model`."""
+
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+    range: Optional[List[float]] = None
+    """The range of the score. Defaults to `[0, 1]`."""
+
+    sampling_params: Optional[object] = None
+    """The sampling parameters for the model."""
+
+
+TestingCriterion: TypeAlias = Annotated[
+    Union[
+        EvalLabelModelGrader,
+        EvalStringCheckGrader,
+        EvalTextSimilarityGrader,
+        TestingCriterionPython,
+        TestingCriterionScoreModel,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class EvalListResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/eval_retrieve_response.py b/src/openai/types/eval_retrieve_response.py
new file mode 100644
index 0000000000..625bae80f4
--- /dev/null
+++ b/src/openai/types/eval_retrieve_response.py
@@ -0,0 +1,142 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .eval_label_model_grader import EvalLabelModelGrader
+from .eval_string_check_grader import EvalStringCheckGrader
+from .eval_text_similarity_grader import EvalTextSimilarityGrader
+from .responses.response_input_text import ResponseInputText
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalRetrieveResponse",
+    "DataSourceConfig",
+    "TestingCriterion",
+    "TestingCriterionPython",
+    "TestingCriterionScoreModel",
+    "TestingCriterionScoreModelInput",
+    "TestingCriterionScoreModelInputContent",
+    "TestingCriterionScoreModelInputContentOutputText",
+]
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+]
+
+
+class TestingCriterionPython(BaseModel):
+    __test__ = False
+    name: str
+    """The name of the grader."""
+
+    source: str
+    """The source code of the python script."""
+
+    type: Literal["python"]
+    """The object type, which is always `python`."""
+
+    image_tag: Optional[str] = None
+    """The image tag to use for the python script."""
+
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionScoreModelInputContentOutputText(BaseModel):
+    __test__ = False
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+TestingCriterionScoreModelInputContent: TypeAlias = Union[
+    str, ResponseInputText, TestingCriterionScoreModelInputContentOutputText
+]
+
+
+class TestingCriterionScoreModelInput(BaseModel):
+    __test__ = False
+    content: TestingCriterionScoreModelInputContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+class TestingCriterionScoreModel(BaseModel):
+    __test__ = False
+    input: List[TestingCriterionScoreModelInput]
+    """The input text. This may include template strings."""
+
+    model: str
+    """The model to use for the evaluation."""
+
+    name: str
+    """The name of the grader."""
+
+    type: Literal["score_model"]
+    """The object type, which is always `score_model`."""
+
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+    range: Optional[List[float]] = None
+    """The range of the score. Defaults to `[0, 1]`."""
+
+    sampling_params: Optional[object] = None
+    """The sampling parameters for the model."""
+
+
+TestingCriterion: TypeAlias = Annotated[
+    Union[
+        EvalLabelModelGrader,
+        EvalStringCheckGrader,
+        EvalTextSimilarityGrader,
+        TestingCriterionPython,
+        TestingCriterionScoreModel,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class EvalRetrieveResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/eval_stored_completions_data_source_config.py b/src/openai/types/eval_stored_completions_data_source_config.py
new file mode 100644
index 0000000000..98f86a4719
--- /dev/null
+++ b/src/openai/types/eval_stored_completions_data_source_config.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from .._models import BaseModel
+from .shared.metadata import Metadata
+
+__all__ = ["EvalStoredCompletionsDataSourceConfig"]
+
+
+class EvalStoredCompletionsDataSourceConfig(BaseModel):
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The json schema for the run data source items. Learn how to build JSON schemas
+    [here](https://json-schema.org/).
+    """
+
+    type: Literal["stored_completions"]
+    """The type of data source. Always `stored_completions`."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
diff --git a/src/openai/types/eval_string_check_grader.py b/src/openai/types/eval_string_check_grader.py
new file mode 100644
index 0000000000..4dfc8035f9
--- /dev/null
+++ b/src/openai/types/eval_string_check_grader.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EvalStringCheckGrader"]
+
+
+class EvalStringCheckGrader(BaseModel):
+    input: str
+    """The input text. This may include template strings."""
+
+    name: str
+    """The name of the grader."""
+
+    operation: Literal["eq", "ne", "like", "ilike"]
+    """The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`."""
+
+    reference: str
+    """The reference text. This may include template strings."""
+
+    type: Literal["string_check"]
+    """The object type, which is always `string_check`."""
diff --git a/src/openai/types/eval_string_check_grader_param.py b/src/openai/types/eval_string_check_grader_param.py
new file mode 100644
index 0000000000..3511329f8b
--- /dev/null
+++ b/src/openai/types/eval_string_check_grader_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["EvalStringCheckGraderParam"]
+
+
+class EvalStringCheckGraderParam(TypedDict, total=False):
+    input: Required[str]
+    """The input text. This may include template strings."""
+
+    name: Required[str]
+    """The name of the grader."""
+
+    operation: Required[Literal["eq", "ne", "like", "ilike"]]
+    """The string check operation to perform. One of `eq`, `ne`, `like`, or `ilike`."""
+
+    reference: Required[str]
+    """The reference text. This may include template strings."""
+
+    type: Required[Literal["string_check"]]
+    """The object type, which is always `string_check`."""
diff --git a/src/openai/types/eval_text_similarity_grader.py b/src/openai/types/eval_text_similarity_grader.py
new file mode 100644
index 0000000000..853c6d4fbf
--- /dev/null
+++ b/src/openai/types/eval_text_similarity_grader.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["EvalTextSimilarityGrader"]
+
+
+class EvalTextSimilarityGrader(BaseModel):
+    evaluation_metric: Literal[
+        "fuzzy_match", "bleu", "gleu", "meteor", "rouge_1", "rouge_2", "rouge_3", "rouge_4", "rouge_5", "rouge_l"
+    ]
+    """The evaluation metric to use.
+
+    One of `fuzzy_match`, `bleu`, `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`,
+    `rouge_4`, `rouge_5`, or `rouge_l`.
+    """
+
+    input: str
+    """The text being graded."""
+
+    pass_threshold: float
+    """A float score where a value greater than or equal indicates a passing grade."""
+
+    reference: str
+    """The text being graded against."""
+
+    type: Literal["text_similarity"]
+    """The type of grader."""
+
+    name: Optional[str] = None
+    """The name of the grader."""
diff --git a/src/openai/types/eval_text_similarity_grader_param.py b/src/openai/types/eval_text_similarity_grader_param.py
new file mode 100644
index 0000000000..f07cc29178
--- /dev/null
+++ b/src/openai/types/eval_text_similarity_grader_param.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["EvalTextSimilarityGraderParam"]
+
+
+class EvalTextSimilarityGraderParam(TypedDict, total=False):
+    evaluation_metric: Required[
+        Literal[
+            "fuzzy_match", "bleu", "gleu", "meteor", "rouge_1", "rouge_2", "rouge_3", "rouge_4", "rouge_5", "rouge_l"
+        ]
+    ]
+    """The evaluation metric to use.
+
+    One of `fuzzy_match`, `bleu`, `gleu`, `meteor`, `rouge_1`, `rouge_2`, `rouge_3`,
+    `rouge_4`, `rouge_5`, or `rouge_l`.
+    """
+
+    input: Required[str]
+    """The text being graded."""
+
+    pass_threshold: Required[float]
+    """A float score where a value greater than or equal indicates a passing grade."""
+
+    reference: Required[str]
+    """The text being graded against."""
+
+    type: Required[Literal["text_similarity"]]
+    """The type of grader."""
+
+    name: str
+    """The name of the grader."""
diff --git a/src/openai/types/eval_update_params.py b/src/openai/types/eval_update_params.py
new file mode 100644
index 0000000000..042db29af5
--- /dev/null
+++ b/src/openai/types/eval_update_params.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import TypedDict
+
+from .shared_params.metadata import Metadata
+
+__all__ = ["EvalUpdateParams"]
+
+
+class EvalUpdateParams(TypedDict, total=False):
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """Rename the evaluation."""
diff --git a/src/openai/types/eval_update_response.py b/src/openai/types/eval_update_response.py
new file mode 100644
index 0000000000..2c280977a1
--- /dev/null
+++ b/src/openai/types/eval_update_response.py
@@ -0,0 +1,142 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .._models import BaseModel
+from .shared.metadata import Metadata
+from .eval_label_model_grader import EvalLabelModelGrader
+from .eval_string_check_grader import EvalStringCheckGrader
+from .eval_text_similarity_grader import EvalTextSimilarityGrader
+from .responses.response_input_text import ResponseInputText
+from .eval_custom_data_source_config import EvalCustomDataSourceConfig
+from .eval_stored_completions_data_source_config import EvalStoredCompletionsDataSourceConfig
+
+__all__ = [
+    "EvalUpdateResponse",
+    "DataSourceConfig",
+    "TestingCriterion",
+    "TestingCriterionPython",
+    "TestingCriterionScoreModel",
+    "TestingCriterionScoreModelInput",
+    "TestingCriterionScoreModelInputContent",
+    "TestingCriterionScoreModelInputContentOutputText",
+]
+
+DataSourceConfig: TypeAlias = Annotated[
+    Union[EvalCustomDataSourceConfig, EvalStoredCompletionsDataSourceConfig], PropertyInfo(discriminator="type")
+]
+
+
+class TestingCriterionPython(BaseModel):
+    __test__ = False
+    name: str
+    """The name of the grader."""
+
+    source: str
+    """The source code of the python script."""
+
+    type: Literal["python"]
+    """The object type, which is always `python`."""
+
+    image_tag: Optional[str] = None
+    """The image tag to use for the python script."""
+
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+
+class TestingCriterionScoreModelInputContentOutputText(BaseModel):
+    __test__ = False
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+TestingCriterionScoreModelInputContent: TypeAlias = Union[
+    str, ResponseInputText, TestingCriterionScoreModelInputContentOutputText
+]
+
+
+class TestingCriterionScoreModelInput(BaseModel):
+    __test__ = False
+    content: TestingCriterionScoreModelInputContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+class TestingCriterionScoreModel(BaseModel):
+    __test__ = False
+    input: List[TestingCriterionScoreModelInput]
+    """The input text. This may include template strings."""
+
+    model: str
+    """The model to use for the evaluation."""
+
+    name: str
+    """The name of the grader."""
+
+    type: Literal["score_model"]
+    """The object type, which is always `score_model`."""
+
+    pass_threshold: Optional[float] = None
+    """The threshold for the score."""
+
+    range: Optional[List[float]] = None
+    """The range of the score. Defaults to `[0, 1]`."""
+
+    sampling_params: Optional[object] = None
+    """The sampling parameters for the model."""
+
+
+TestingCriterion: TypeAlias = Annotated[
+    Union[
+        EvalLabelModelGrader,
+        EvalStringCheckGrader,
+        EvalTextSimilarityGrader,
+        TestingCriterionPython,
+        TestingCriterionScoreModel,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class EvalUpdateResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the eval was created."""
+
+    data_source_config: DataSourceConfig
+    """Configuration of data sources used in runs of the evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the evaluation."""
+
+    object: Literal["eval"]
+    """The object type."""
+
+    testing_criteria: List[TestingCriterion]
+    """A list of testing criteria."""
diff --git a/src/openai/types/evals/__init__.py b/src/openai/types/evals/__init__.py
new file mode 100644
index 0000000000..ebf84c6b8d
--- /dev/null
+++ b/src/openai/types/evals/__init__.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .eval_api_error import EvalAPIError as EvalAPIError
+from .run_list_params import RunListParams as RunListParams
+from .run_create_params import RunCreateParams as RunCreateParams
+from .run_list_response import RunListResponse as RunListResponse
+from .run_cancel_response import RunCancelResponse as RunCancelResponse
+from .run_create_response import RunCreateResponse as RunCreateResponse
+from .run_delete_response import RunDeleteResponse as RunDeleteResponse
+from .run_retrieve_response import RunRetrieveResponse as RunRetrieveResponse
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource as CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import (
+    CreateEvalCompletionsRunDataSource as CreateEvalCompletionsRunDataSource,
+)
+from .create_eval_jsonl_run_data_source_param import (
+    CreateEvalJSONLRunDataSourceParam as CreateEvalJSONLRunDataSourceParam,
+)
+from .create_eval_completions_run_data_source_param import (
+    CreateEvalCompletionsRunDataSourceParam as CreateEvalCompletionsRunDataSourceParam,
+)
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source.py b/src/openai/types/evals/create_eval_completions_run_data_source.py
new file mode 100644
index 0000000000..29c687b542
--- /dev/null
+++ b/src/openai/types/evals/create_eval_completions_run_data_source.py
@@ -0,0 +1,166 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from ..shared.metadata import Metadata
+from ..responses.easy_input_message import EasyInputMessage
+from ..responses.response_input_text import ResponseInputText
+
+__all__ = [
+    "CreateEvalCompletionsRunDataSource",
+    "Source",
+    "SourceFileContent",
+    "SourceFileContentContent",
+    "SourceFileID",
+    "SourceStoredCompletions",
+    "InputMessages",
+    "InputMessagesTemplate",
+    "InputMessagesTemplateTemplate",
+    "InputMessagesTemplateTemplateMessage",
+    "InputMessagesTemplateTemplateMessageContent",
+    "InputMessagesTemplateTemplateMessageContentOutputText",
+    "InputMessagesItemReference",
+    "SamplingParams",
+]
+
+
+class SourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class SourceFileContent(BaseModel):
+    content: List[SourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class SourceStoredCompletions(BaseModel):
+    type: Literal["stored_completions"]
+    """The type of source. Always `stored_completions`."""
+
+    created_after: Optional[int] = None
+    """An optional Unix timestamp to filter items created after this time."""
+
+    created_before: Optional[int] = None
+    """An optional Unix timestamp to filter items created before this time."""
+
+    limit: Optional[int] = None
+    """An optional maximum number of items to return."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Optional[str] = None
+    """An optional model to filter by (e.g., 'gpt-4o')."""
+
+
+Source: TypeAlias = Annotated[
+    Union[SourceFileContent, SourceFileID, SourceStoredCompletions], PropertyInfo(discriminator="type")
+]
+
+
+class InputMessagesTemplateTemplateMessageContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+InputMessagesTemplateTemplateMessageContent: TypeAlias = Union[
+    str, ResponseInputText, InputMessagesTemplateTemplateMessageContentOutputText
+]
+
+
+class InputMessagesTemplateTemplateMessage(BaseModel):
+    content: InputMessagesTemplateTemplateMessageContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+InputMessagesTemplateTemplate: TypeAlias = Annotated[
+    Union[EasyInputMessage, InputMessagesTemplateTemplateMessage], PropertyInfo(discriminator="type")
+]
+
+
+class InputMessagesTemplate(BaseModel):
+    template: List[InputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the "item" namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class InputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the "item" namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+InputMessages: TypeAlias = Annotated[
+    Union[InputMessagesTemplate, InputMessagesItemReference], PropertyInfo(discriminator="type")
+]
+
+
+class SamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class CreateEvalCompletionsRunDataSource(BaseModel):
+    source: Source
+    """A StoredCompletionsRunDataSource configuration describing a set of filters"""
+
+    type: Literal["completions"]
+    """The type of run data source. Always `completions`."""
+
+    input_messages: Optional[InputMessages] = None
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[SamplingParams] = None
diff --git a/src/openai/types/evals/create_eval_completions_run_data_source_param.py b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
new file mode 100644
index 0000000000..c53064ee27
--- /dev/null
+++ b/src/openai/types/evals/create_eval_completions_run_data_source_param.py
@@ -0,0 +1,160 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..shared_params.metadata import Metadata
+from ..responses.easy_input_message_param import EasyInputMessageParam
+from ..responses.response_input_text_param import ResponseInputTextParam
+
+__all__ = [
+    "CreateEvalCompletionsRunDataSourceParam",
+    "Source",
+    "SourceFileContent",
+    "SourceFileContentContent",
+    "SourceFileID",
+    "SourceStoredCompletions",
+    "InputMessages",
+    "InputMessagesTemplate",
+    "InputMessagesTemplateTemplate",
+    "InputMessagesTemplateTemplateMessage",
+    "InputMessagesTemplateTemplateMessageContent",
+    "InputMessagesTemplateTemplateMessageContentOutputText",
+    "InputMessagesItemReference",
+    "SamplingParams",
+]
+
+
+class SourceFileContentContent(TypedDict, total=False):
+    item: Required[Dict[str, object]]
+
+    sample: Dict[str, object]
+
+
+class SourceFileContent(TypedDict, total=False):
+    content: Required[Iterable[SourceFileContentContent]]
+    """The content of the jsonl file."""
+
+    type: Required[Literal["file_content"]]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(TypedDict, total=False):
+    id: Required[str]
+    """The identifier of the file."""
+
+    type: Required[Literal["file_id"]]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class SourceStoredCompletions(TypedDict, total=False):
+    type: Required[Literal["stored_completions"]]
+    """The type of source. Always `stored_completions`."""
+
+    created_after: Optional[int]
+    """An optional Unix timestamp to filter items created after this time."""
+
+    created_before: Optional[int]
+    """An optional Unix timestamp to filter items created before this time."""
+
+    limit: Optional[int]
+    """An optional maximum number of items to return."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Optional[str]
+    """An optional model to filter by (e.g., 'gpt-4o')."""
+
+
+Source: TypeAlias = Union[SourceFileContent, SourceFileID, SourceStoredCompletions]
+
+
+class InputMessagesTemplateTemplateMessageContentOutputText(TypedDict, total=False):
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+InputMessagesTemplateTemplateMessageContent: TypeAlias = Union[
+    str, ResponseInputTextParam, InputMessagesTemplateTemplateMessageContentOutputText
+]
+
+
+class InputMessagesTemplateTemplateMessage(TypedDict, total=False):
+    content: Required[InputMessagesTemplateTemplateMessageContent]
+    """Text inputs to the model - can contain template strings."""
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+InputMessagesTemplateTemplate: TypeAlias = Union[EasyInputMessageParam, InputMessagesTemplateTemplateMessage]
+
+
+class InputMessagesTemplate(TypedDict, total=False):
+    template: Required[Iterable[InputMessagesTemplateTemplate]]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the "item" namespace, ie {{item.name}}.
+    """
+
+    type: Required[Literal["template"]]
+    """The type of input messages. Always `template`."""
+
+
+class InputMessagesItemReference(TypedDict, total=False):
+    item_reference: Required[str]
+    """A reference to a variable in the "item" namespace. Ie, "item.name" """
+
+    type: Required[Literal["item_reference"]]
+    """The type of input messages. Always `item_reference`."""
+
+
+InputMessages: TypeAlias = Union[InputMessagesTemplate, InputMessagesItemReference]
+
+
+class SamplingParams(TypedDict, total=False):
+    max_completion_tokens: int
+    """The maximum number of tokens in the generated output."""
+
+    seed: int
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: float
+    """A higher temperature increases randomness in the outputs."""
+
+    top_p: float
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class CreateEvalCompletionsRunDataSourceParam(TypedDict, total=False):
+    source: Required[Source]
+    """A StoredCompletionsRunDataSource configuration describing a set of filters"""
+
+    type: Required[Literal["completions"]]
+    """The type of run data source. Always `completions`."""
+
+    input_messages: InputMessages
+
+    model: str
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: SamplingParams
diff --git a/src/openai/types/evals/create_eval_jsonl_run_data_source.py b/src/openai/types/evals/create_eval_jsonl_run_data_source.py
new file mode 100644
index 0000000000..d2be56243b
--- /dev/null
+++ b/src/openai/types/evals/create_eval_jsonl_run_data_source.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["CreateEvalJSONLRunDataSource", "Source", "SourceFileContent", "SourceFileContentContent", "SourceFileID"]
+
+
+class SourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class SourceFileContent(BaseModel):
+    content: List[SourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+Source: TypeAlias = Annotated[Union[SourceFileContent, SourceFileID], PropertyInfo(discriminator="type")]
+
+
+class CreateEvalJSONLRunDataSource(BaseModel):
+    source: Source
+
+    type: Literal["jsonl"]
+    """The type of data source. Always `jsonl`."""
diff --git a/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py b/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py
new file mode 100644
index 0000000000..b8ba48a666
--- /dev/null
+++ b/src/openai/types/evals/create_eval_jsonl_run_data_source_param.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "CreateEvalJSONLRunDataSourceParam",
+    "Source",
+    "SourceFileContent",
+    "SourceFileContentContent",
+    "SourceFileID",
+]
+
+
+class SourceFileContentContent(TypedDict, total=False):
+    item: Required[Dict[str, object]]
+
+    sample: Dict[str, object]
+
+
+class SourceFileContent(TypedDict, total=False):
+    content: Required[Iterable[SourceFileContentContent]]
+    """The content of the jsonl file."""
+
+    type: Required[Literal["file_content"]]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class SourceFileID(TypedDict, total=False):
+    id: Required[str]
+    """The identifier of the file."""
+
+    type: Required[Literal["file_id"]]
+    """The type of jsonl source. Always `file_id`."""
+
+
+Source: TypeAlias = Union[SourceFileContent, SourceFileID]
+
+
+class CreateEvalJSONLRunDataSourceParam(TypedDict, total=False):
+    source: Required[Source]
+
+    type: Required[Literal["jsonl"]]
+    """The type of data source. Always `jsonl`."""
diff --git a/src/openai/types/evals/eval_api_error.py b/src/openai/types/evals/eval_api_error.py
new file mode 100644
index 0000000000..fe76871024
--- /dev/null
+++ b/src/openai/types/evals/eval_api_error.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from ..._models import BaseModel
+
+__all__ = ["EvalAPIError"]
+
+
+class EvalAPIError(BaseModel):
+    code: str
+    """The error code."""
+
+    message: str
+    """The error message."""
diff --git a/src/openai/types/evals/run_cancel_response.py b/src/openai/types/evals/run_cancel_response.py
new file mode 100644
index 0000000000..eb6d689fc3
--- /dev/null
+++ b/src/openai/types/evals/run_cancel_response.py
@@ -0,0 +1,327 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..shared.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunCancelResponse",
+    "DataSource",
+    "DataSourceCompletions",
+    "DataSourceCompletionsSource",
+    "DataSourceCompletionsSourceFileContent",
+    "DataSourceCompletionsSourceFileContentContent",
+    "DataSourceCompletionsSourceFileID",
+    "DataSourceCompletionsSourceResponses",
+    "DataSourceCompletionsInputMessages",
+    "DataSourceCompletionsInputMessagesTemplate",
+    "DataSourceCompletionsInputMessagesTemplateTemplate",
+    "DataSourceCompletionsInputMessagesTemplateTemplateChatMessage",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItem",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceCompletionsInputMessagesItemReference",
+    "DataSourceCompletionsSamplingParams",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceCompletionsSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceCompletionsSourceFileContent(BaseModel):
+    content: List[DataSourceCompletionsSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceCompletionsSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceCompletionsSourceResponses(BaseModel):
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    allow_parallel_tool_calls: Optional[bool] = None
+    """Whether to allow parallel tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    has_tool_calls: Optional[bool] = None
+    """Whether the response has tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional search string for instructions.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceCompletionsSource: TypeAlias = Annotated[
+    Union[
+        DataSourceCompletionsSourceFileContent, DataSourceCompletionsSourceFileID, DataSourceCompletionsSourceResponses
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str, ResponseInputText, DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText
+]
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateEvalItem(BaseModel):
+    content: DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceCompletionsInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceCompletionsInputMessagesTemplateTemplateChatMessage,
+    DataSourceCompletionsInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceCompletionsInputMessagesTemplate(BaseModel):
+    template: List[DataSourceCompletionsInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the "item" namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceCompletionsInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the "item" namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceCompletionsInputMessages: TypeAlias = Annotated[
+    Union[DataSourceCompletionsInputMessagesTemplate, DataSourceCompletionsInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceCompletionsSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceCompletions(BaseModel):
+    source: DataSourceCompletionsSource
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Literal["completions"]
+    """The type of run data source. Always `completions`."""
+
+    input_messages: Optional[DataSourceCompletionsInputMessages] = None
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceCompletionsSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceCompletions],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunCancelResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_create_params.py b/src/openai/types/evals/run_create_params.py
new file mode 100644
index 0000000000..0c9720ea7a
--- /dev/null
+++ b/src/openai/types/evals/run_create_params.py
@@ -0,0 +1,247 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text_param import ResponseInputTextParam
+from .create_eval_jsonl_run_data_source_param import CreateEvalJSONLRunDataSourceParam
+from .create_eval_completions_run_data_source_param import CreateEvalCompletionsRunDataSourceParam
+
+__all__ = [
+    "RunCreateParams",
+    "DataSource",
+    "DataSourceCreateEvalResponsesRunDataSource",
+    "DataSourceCreateEvalResponsesRunDataSourceSource",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceFileContent",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceFileID",
+    "DataSourceCreateEvalResponsesRunDataSourceSourceResponses",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessages",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference",
+    "DataSourceCreateEvalResponsesRunDataSourceSamplingParams",
+]
+
+
+class RunCreateParams(TypedDict, total=False):
+    data_source: Required[DataSource]
+    """Details about the run's data source."""
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    name: str
+    """The name of the run."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent(TypedDict, total=False):
+    item: Required[Dict[str, object]]
+
+    sample: Dict[str, object]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceFileContent(TypedDict, total=False):
+    content: Required[Iterable[DataSourceCreateEvalResponsesRunDataSourceSourceFileContentContent]]
+    """The content of the jsonl file."""
+
+    type: Required[Literal["file_content"]]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceFileID(TypedDict, total=False):
+    id: Required[str]
+    """The identifier of the file."""
+
+    type: Required[Literal["file_id"]]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSourceResponses(TypedDict, total=False):
+    type: Required[Literal["responses"]]
+    """The type of run data source. Always `responses`."""
+
+    allow_parallel_tool_calls: Optional[bool]
+    """Whether to allow parallel tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    created_after: Optional[int]
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int]
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    has_tool_calls: Optional[bool]
+    """Whether the response has tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str]
+    """Optional search string for instructions.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object]
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str]
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float]
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    top_p: Optional[float]
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]]
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceCreateEvalResponsesRunDataSourceSource: TypeAlias = Union[
+    DataSourceCreateEvalResponsesRunDataSourceSourceFileContent,
+    DataSourceCreateEvalResponsesRunDataSourceSourceFileID,
+    DataSourceCreateEvalResponsesRunDataSourceSourceResponses,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage(TypedDict, total=False):
+    content: Required[str]
+    """The content of the message."""
+
+    role: Required[str]
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText(
+    TypedDict, total=False
+):
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
+
+
+DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str,
+    ResponseInputTextParam,
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContentOutputText,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem(TypedDict, total=False):
+    content: Required[DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItemContent]
+    """Text inputs to the model - can contain template strings."""
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
+
+
+DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateChatMessage,
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate(TypedDict, total=False):
+    template: Required[Iterable[DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplateTemplate]]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the "item" namespace, ie {{item.name}}.
+    """
+
+    type: Required[Literal["template"]]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference(TypedDict, total=False):
+    item_reference: Required[str]
+    """A reference to a variable in the "item" namespace. Ie, "item.name" """
+
+    type: Required[Literal["item_reference"]]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceCreateEvalResponsesRunDataSourceInputMessages: TypeAlias = Union[
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesTemplate,
+    DataSourceCreateEvalResponsesRunDataSourceInputMessagesItemReference,
+]
+
+
+class DataSourceCreateEvalResponsesRunDataSourceSamplingParams(TypedDict, total=False):
+    max_completion_tokens: int
+    """The maximum number of tokens in the generated output."""
+
+    seed: int
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: float
+    """A higher temperature increases randomness in the outputs."""
+
+    top_p: float
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceCreateEvalResponsesRunDataSource(TypedDict, total=False):
+    source: Required[DataSourceCreateEvalResponsesRunDataSourceSource]
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Required[Literal["completions"]]
+    """The type of run data source. Always `completions`."""
+
+    input_messages: DataSourceCreateEvalResponsesRunDataSourceInputMessages
+
+    model: str
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: DataSourceCreateEvalResponsesRunDataSourceSamplingParams
+
+
+DataSource: TypeAlias = Union[
+    CreateEvalJSONLRunDataSourceParam,
+    CreateEvalCompletionsRunDataSourceParam,
+    DataSourceCreateEvalResponsesRunDataSource,
+]
diff --git a/src/openai/types/evals/run_create_response.py b/src/openai/types/evals/run_create_response.py
new file mode 100644
index 0000000000..459399511c
--- /dev/null
+++ b/src/openai/types/evals/run_create_response.py
@@ -0,0 +1,327 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..shared.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunCreateResponse",
+    "DataSource",
+    "DataSourceCompletions",
+    "DataSourceCompletionsSource",
+    "DataSourceCompletionsSourceFileContent",
+    "DataSourceCompletionsSourceFileContentContent",
+    "DataSourceCompletionsSourceFileID",
+    "DataSourceCompletionsSourceResponses",
+    "DataSourceCompletionsInputMessages",
+    "DataSourceCompletionsInputMessagesTemplate",
+    "DataSourceCompletionsInputMessagesTemplateTemplate",
+    "DataSourceCompletionsInputMessagesTemplateTemplateChatMessage",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItem",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceCompletionsInputMessagesItemReference",
+    "DataSourceCompletionsSamplingParams",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceCompletionsSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceCompletionsSourceFileContent(BaseModel):
+    content: List[DataSourceCompletionsSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceCompletionsSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceCompletionsSourceResponses(BaseModel):
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    allow_parallel_tool_calls: Optional[bool] = None
+    """Whether to allow parallel tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    has_tool_calls: Optional[bool] = None
+    """Whether the response has tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional search string for instructions.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceCompletionsSource: TypeAlias = Annotated[
+    Union[
+        DataSourceCompletionsSourceFileContent, DataSourceCompletionsSourceFileID, DataSourceCompletionsSourceResponses
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str, ResponseInputText, DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText
+]
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateEvalItem(BaseModel):
+    content: DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceCompletionsInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceCompletionsInputMessagesTemplateTemplateChatMessage,
+    DataSourceCompletionsInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceCompletionsInputMessagesTemplate(BaseModel):
+    template: List[DataSourceCompletionsInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the "item" namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceCompletionsInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the "item" namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceCompletionsInputMessages: TypeAlias = Annotated[
+    Union[DataSourceCompletionsInputMessagesTemplate, DataSourceCompletionsInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceCompletionsSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceCompletions(BaseModel):
+    source: DataSourceCompletionsSource
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Literal["completions"]
+    """The type of run data source. Always `completions`."""
+
+    input_messages: Optional[DataSourceCompletionsInputMessages] = None
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceCompletionsSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceCompletions],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunCreateResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_delete_response.py b/src/openai/types/evals/run_delete_response.py
new file mode 100644
index 0000000000..d48d01f86c
--- /dev/null
+++ b/src/openai/types/evals/run_delete_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["RunDeleteResponse"]
+
+
+class RunDeleteResponse(BaseModel):
+    deleted: Optional[bool] = None
+
+    object: Optional[str] = None
+
+    run_id: Optional[str] = None
diff --git a/src/openai/types/evals/run_list_params.py b/src/openai/types/evals/run_list_params.py
new file mode 100644
index 0000000000..383b89d85c
--- /dev/null
+++ b/src/openai/types/evals/run_list_params.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["RunListParams"]
+
+
+class RunListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last run from the previous pagination request."""
+
+    limit: int
+    """Number of runs to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for runs by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
+
+    status: Literal["queued", "in_progress", "completed", "canceled", "failed"]
+    """Filter runs by status.
+
+    One of `queued` | `in_progress` | `failed` | `completed` | `canceled`.
+    """
diff --git a/src/openai/types/evals/run_list_response.py b/src/openai/types/evals/run_list_response.py
new file mode 100644
index 0000000000..278ceeabed
--- /dev/null
+++ b/src/openai/types/evals/run_list_response.py
@@ -0,0 +1,327 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..shared.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunListResponse",
+    "DataSource",
+    "DataSourceCompletions",
+    "DataSourceCompletionsSource",
+    "DataSourceCompletionsSourceFileContent",
+    "DataSourceCompletionsSourceFileContentContent",
+    "DataSourceCompletionsSourceFileID",
+    "DataSourceCompletionsSourceResponses",
+    "DataSourceCompletionsInputMessages",
+    "DataSourceCompletionsInputMessagesTemplate",
+    "DataSourceCompletionsInputMessagesTemplateTemplate",
+    "DataSourceCompletionsInputMessagesTemplateTemplateChatMessage",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItem",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceCompletionsInputMessagesItemReference",
+    "DataSourceCompletionsSamplingParams",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceCompletionsSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceCompletionsSourceFileContent(BaseModel):
+    content: List[DataSourceCompletionsSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceCompletionsSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceCompletionsSourceResponses(BaseModel):
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    allow_parallel_tool_calls: Optional[bool] = None
+    """Whether to allow parallel tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    has_tool_calls: Optional[bool] = None
+    """Whether the response has tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional search string for instructions.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceCompletionsSource: TypeAlias = Annotated[
+    Union[
+        DataSourceCompletionsSourceFileContent, DataSourceCompletionsSourceFileID, DataSourceCompletionsSourceResponses
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str, ResponseInputText, DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText
+]
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateEvalItem(BaseModel):
+    content: DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceCompletionsInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceCompletionsInputMessagesTemplateTemplateChatMessage,
+    DataSourceCompletionsInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceCompletionsInputMessagesTemplate(BaseModel):
+    template: List[DataSourceCompletionsInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the "item" namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceCompletionsInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the "item" namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceCompletionsInputMessages: TypeAlias = Annotated[
+    Union[DataSourceCompletionsInputMessagesTemplate, DataSourceCompletionsInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceCompletionsSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceCompletions(BaseModel):
+    source: DataSourceCompletionsSource
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Literal["completions"]
+    """The type of run data source. Always `completions`."""
+
+    input_messages: Optional[DataSourceCompletionsInputMessages] = None
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceCompletionsSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceCompletions],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunListResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/run_retrieve_response.py b/src/openai/types/evals/run_retrieve_response.py
new file mode 100644
index 0000000000..e142f31b14
--- /dev/null
+++ b/src/openai/types/evals/run_retrieve_response.py
@@ -0,0 +1,327 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from pydantic import Field as FieldInfo
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .eval_api_error import EvalAPIError
+from ..shared.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
+from ..responses.response_input_text import ResponseInputText
+from .create_eval_jsonl_run_data_source import CreateEvalJSONLRunDataSource
+from .create_eval_completions_run_data_source import CreateEvalCompletionsRunDataSource
+
+__all__ = [
+    "RunRetrieveResponse",
+    "DataSource",
+    "DataSourceCompletions",
+    "DataSourceCompletionsSource",
+    "DataSourceCompletionsSourceFileContent",
+    "DataSourceCompletionsSourceFileContentContent",
+    "DataSourceCompletionsSourceFileID",
+    "DataSourceCompletionsSourceResponses",
+    "DataSourceCompletionsInputMessages",
+    "DataSourceCompletionsInputMessagesTemplate",
+    "DataSourceCompletionsInputMessagesTemplateTemplate",
+    "DataSourceCompletionsInputMessagesTemplateTemplateChatMessage",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItem",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent",
+    "DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText",
+    "DataSourceCompletionsInputMessagesItemReference",
+    "DataSourceCompletionsSamplingParams",
+    "PerModelUsage",
+    "PerTestingCriteriaResult",
+    "ResultCounts",
+]
+
+
+class DataSourceCompletionsSourceFileContentContent(BaseModel):
+    item: Dict[str, object]
+
+    sample: Optional[Dict[str, object]] = None
+
+
+class DataSourceCompletionsSourceFileContent(BaseModel):
+    content: List[DataSourceCompletionsSourceFileContentContent]
+    """The content of the jsonl file."""
+
+    type: Literal["file_content"]
+    """The type of jsonl source. Always `file_content`."""
+
+
+class DataSourceCompletionsSourceFileID(BaseModel):
+    id: str
+    """The identifier of the file."""
+
+    type: Literal["file_id"]
+    """The type of jsonl source. Always `file_id`."""
+
+
+class DataSourceCompletionsSourceResponses(BaseModel):
+    type: Literal["responses"]
+    """The type of run data source. Always `responses`."""
+
+    allow_parallel_tool_calls: Optional[bool] = None
+    """Whether to allow parallel tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    created_after: Optional[int] = None
+    """Only include items created after this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    created_before: Optional[int] = None
+    """Only include items created before this timestamp (inclusive).
+
+    This is a query parameter used to select responses.
+    """
+
+    has_tool_calls: Optional[bool] = None
+    """Whether the response has tool calls.
+
+    This is a query parameter used to select responses.
+    """
+
+    instructions_search: Optional[str] = None
+    """Optional search string for instructions.
+
+    This is a query parameter used to select responses.
+    """
+
+    metadata: Optional[object] = None
+    """Metadata filter for the responses.
+
+    This is a query parameter used to select responses.
+    """
+
+    model: Optional[str] = None
+    """The name of the model to find responses for.
+
+    This is a query parameter used to select responses.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort] = None
+    """Optional reasoning effort parameter.
+
+    This is a query parameter used to select responses.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature. This is a query parameter used to select responses."""
+
+    top_p: Optional[float] = None
+    """Nucleus sampling parameter. This is a query parameter used to select responses."""
+
+    users: Optional[List[str]] = None
+    """List of user identifiers. This is a query parameter used to select responses."""
+
+
+DataSourceCompletionsSource: TypeAlias = Annotated[
+    Union[
+        DataSourceCompletionsSourceFileContent, DataSourceCompletionsSourceFileID, DataSourceCompletionsSourceResponses
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateChatMessage(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText(BaseModel):
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
+
+
+DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent: TypeAlias = Union[
+    str, ResponseInputText, DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContentOutputText
+]
+
+
+class DataSourceCompletionsInputMessagesTemplateTemplateEvalItem(BaseModel):
+    content: DataSourceCompletionsInputMessagesTemplateTemplateEvalItemContent
+    """Text inputs to the model - can contain template strings."""
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
+
+
+DataSourceCompletionsInputMessagesTemplateTemplate: TypeAlias = Union[
+    DataSourceCompletionsInputMessagesTemplateTemplateChatMessage,
+    DataSourceCompletionsInputMessagesTemplateTemplateEvalItem,
+]
+
+
+class DataSourceCompletionsInputMessagesTemplate(BaseModel):
+    template: List[DataSourceCompletionsInputMessagesTemplateTemplate]
+    """A list of chat messages forming the prompt or context.
+
+    May include variable references to the "item" namespace, ie {{item.name}}.
+    """
+
+    type: Literal["template"]
+    """The type of input messages. Always `template`."""
+
+
+class DataSourceCompletionsInputMessagesItemReference(BaseModel):
+    item_reference: str
+    """A reference to a variable in the "item" namespace. Ie, "item.name" """
+
+    type: Literal["item_reference"]
+    """The type of input messages. Always `item_reference`."""
+
+
+DataSourceCompletionsInputMessages: TypeAlias = Annotated[
+    Union[DataSourceCompletionsInputMessagesTemplate, DataSourceCompletionsInputMessagesItemReference],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class DataSourceCompletionsSamplingParams(BaseModel):
+    max_completion_tokens: Optional[int] = None
+    """The maximum number of tokens in the generated output."""
+
+    seed: Optional[int] = None
+    """A seed value to initialize the randomness, during sampling."""
+
+    temperature: Optional[float] = None
+    """A higher temperature increases randomness in the outputs."""
+
+    top_p: Optional[float] = None
+    """An alternative to temperature for nucleus sampling; 1.0 includes all tokens."""
+
+
+class DataSourceCompletions(BaseModel):
+    source: DataSourceCompletionsSource
+    """A EvalResponsesSource object describing a run data source configuration."""
+
+    type: Literal["completions"]
+    """The type of run data source. Always `completions`."""
+
+    input_messages: Optional[DataSourceCompletionsInputMessages] = None
+
+    model: Optional[str] = None
+    """The name of the model to use for generating completions (e.g. "o3-mini")."""
+
+    sampling_params: Optional[DataSourceCompletionsSamplingParams] = None
+
+
+DataSource: TypeAlias = Annotated[
+    Union[CreateEvalJSONLRunDataSource, CreateEvalCompletionsRunDataSource, DataSourceCompletions],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PerModelUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    invocation_count: int
+    """The number of invocations."""
+
+    run_model_name: str = FieldInfo(alias="model_name")
+    """The name of the model."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class PerTestingCriteriaResult(BaseModel):
+    failed: int
+    """Number of tests failed for this criteria."""
+
+    passed: int
+    """Number of tests passed for this criteria."""
+
+    testing_criteria: str
+    """A description of the testing criteria."""
+
+
+class ResultCounts(BaseModel):
+    errored: int
+    """Number of output items that resulted in an error."""
+
+    failed: int
+    """Number of output items that failed to pass the evaluation."""
+
+    passed: int
+    """Number of output items that passed the evaluation."""
+
+    total: int
+    """Total number of executed output items."""
+
+
+class RunRetrieveResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    data_source: DataSource
+    """Information about the run's data source."""
+
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    eval_id: str
+    """The identifier of the associated evaluation."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: str
+    """The model that is evaluated, if applicable."""
+
+    name: str
+    """The name of the evaluation run."""
+
+    object: Literal["eval.run"]
+    """The type of the object. Always "eval.run"."""
+
+    per_model_usage: List[PerModelUsage]
+    """Usage statistics for each model during the evaluation run."""
+
+    per_testing_criteria_results: List[PerTestingCriteriaResult]
+    """Results per testing criteria applied during the evaluation run."""
+
+    report_url: str
+    """The URL to the rendered evaluation run report on the UI dashboard."""
+
+    result_counts: ResultCounts
+    """Counters summarizing the outcomes of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/runs/__init__.py b/src/openai/types/evals/runs/__init__.py
new file mode 100644
index 0000000000..b77cbb6acd
--- /dev/null
+++ b/src/openai/types/evals/runs/__init__.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .output_item_list_params import OutputItemListParams as OutputItemListParams
+from .output_item_list_response import OutputItemListResponse as OutputItemListResponse
+from .output_item_retrieve_response import OutputItemRetrieveResponse as OutputItemRetrieveResponse
diff --git a/src/openai/types/evals/runs/output_item_list_params.py b/src/openai/types/evals/runs/output_item_list_params.py
new file mode 100644
index 0000000000..073bfc69a7
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_list_params.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["OutputItemListParams"]
+
+
+class OutputItemListParams(TypedDict, total=False):
+    eval_id: Required[str]
+
+    after: str
+    """Identifier for the last output item from the previous pagination request."""
+
+    limit: int
+    """Number of output items to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for output items by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
+
+    status: Literal["fail", "pass"]
+    """Filter output items by status.
+
+    Use `failed` to filter by failed output items or `pass` to filter by passed
+    output items.
+    """
diff --git a/src/openai/types/evals/runs/output_item_list_response.py b/src/openai/types/evals/runs/output_item_list_response.py
new file mode 100644
index 0000000000..72b1049f7b
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_list_response.py
@@ -0,0 +1,104 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import Dict, List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ..eval_api_error import EvalAPIError
+
+__all__ = ["OutputItemListResponse", "Sample", "SampleInput", "SampleOutput", "SampleUsage"]
+
+
+class SampleInput(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message sender (e.g., system, user, developer)."""
+
+
+class SampleOutput(BaseModel):
+    content: Optional[str] = None
+    """The content of the message."""
+
+    role: Optional[str] = None
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class SampleUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class Sample(BaseModel):
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    finish_reason: str
+    """The reason why the sample generation was finished."""
+
+    input: List[SampleInput]
+    """An array of input messages."""
+
+    max_completion_tokens: int
+    """The maximum number of tokens allowed for completion."""
+
+    model: str
+    """The model used for generating the sample."""
+
+    output: List[SampleOutput]
+    """An array of output messages."""
+
+    seed: int
+    """The seed used for generating the sample."""
+
+    temperature: float
+    """The sampling temperature used."""
+
+    top_p: float
+    """The top_p value used for sampling."""
+
+    usage: SampleUsage
+    """Token usage details for the sample."""
+
+
+class OutputItemListResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run output item."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    datasource_item: Dict[str, object]
+    """Details of the input data source item."""
+
+    datasource_item_id: int
+    """The identifier for the data source item."""
+
+    eval_id: str
+    """The identifier of the evaluation group."""
+
+    object: Literal["eval.run.output_item"]
+    """The type of the object. Always "eval.run.output_item"."""
+
+    results: List[Dict[str, builtins.object]]
+    """A list of results from the evaluation run."""
+
+    run_id: str
+    """The identifier of the evaluation run associated with this output item."""
+
+    sample: Sample
+    """A sample containing the input and output of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/evals/runs/output_item_retrieve_response.py b/src/openai/types/evals/runs/output_item_retrieve_response.py
new file mode 100644
index 0000000000..63aab5565f
--- /dev/null
+++ b/src/openai/types/evals/runs/output_item_retrieve_response.py
@@ -0,0 +1,104 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+import builtins
+from typing import Dict, List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ..eval_api_error import EvalAPIError
+
+__all__ = ["OutputItemRetrieveResponse", "Sample", "SampleInput", "SampleOutput", "SampleUsage"]
+
+
+class SampleInput(BaseModel):
+    content: str
+    """The content of the message."""
+
+    role: str
+    """The role of the message sender (e.g., system, user, developer)."""
+
+
+class SampleOutput(BaseModel):
+    content: Optional[str] = None
+    """The content of the message."""
+
+    role: Optional[str] = None
+    """The role of the message (e.g. "system", "assistant", "user")."""
+
+
+class SampleUsage(BaseModel):
+    cached_tokens: int
+    """The number of tokens retrieved from cache."""
+
+    completion_tokens: int
+    """The number of completion tokens generated."""
+
+    prompt_tokens: int
+    """The number of prompt tokens used."""
+
+    total_tokens: int
+    """The total number of tokens used."""
+
+
+class Sample(BaseModel):
+    error: EvalAPIError
+    """An object representing an error response from the Eval API."""
+
+    finish_reason: str
+    """The reason why the sample generation was finished."""
+
+    input: List[SampleInput]
+    """An array of input messages."""
+
+    max_completion_tokens: int
+    """The maximum number of tokens allowed for completion."""
+
+    model: str
+    """The model used for generating the sample."""
+
+    output: List[SampleOutput]
+    """An array of output messages."""
+
+    seed: int
+    """The seed used for generating the sample."""
+
+    temperature: float
+    """The sampling temperature used."""
+
+    top_p: float
+    """The top_p value used for sampling."""
+
+    usage: SampleUsage
+    """Token usage details for the sample."""
+
+
+class OutputItemRetrieveResponse(BaseModel):
+    id: str
+    """Unique identifier for the evaluation run output item."""
+
+    created_at: int
+    """Unix timestamp (in seconds) when the evaluation run was created."""
+
+    datasource_item: Dict[str, object]
+    """Details of the input data source item."""
+
+    datasource_item_id: int
+    """The identifier for the data source item."""
+
+    eval_id: str
+    """The identifier of the evaluation group."""
+
+    object: Literal["eval.run.output_item"]
+    """The type of the object. Always "eval.run.output_item"."""
+
+    results: List[Dict[str, builtins.object]]
+    """A list of results from the evaluation run."""
+
+    run_id: str
+    """The identifier of the evaluation run associated with this output item."""
+
+    sample: Sample
+    """A sample containing the input and output of the evaluation run."""
+
+    status: str
+    """The status of the evaluation run."""
diff --git a/src/openai/types/fine_tuning/checkpoints/__init__.py b/src/openai/types/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..2947b33145
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .permission_create_params import PermissionCreateParams as PermissionCreateParams
+from .permission_create_response import PermissionCreateResponse as PermissionCreateResponse
+from .permission_delete_response import PermissionDeleteResponse as PermissionDeleteResponse
+from .permission_retrieve_params import PermissionRetrieveParams as PermissionRetrieveParams
+from .permission_retrieve_response import PermissionRetrieveResponse as PermissionRetrieveResponse
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_create_params.py b/src/openai/types/fine_tuning/checkpoints/permission_create_params.py
new file mode 100644
index 0000000000..92f98f21b9
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_create_params.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+__all__ = ["PermissionCreateParams"]
+
+
+class PermissionCreateParams(TypedDict, total=False):
+    project_ids: Required[List[str]]
+    """The project identifiers to grant access to."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_create_response.py b/src/openai/types/fine_tuning/checkpoints/permission_create_response.py
new file mode 100644
index 0000000000..9bc14c00cc
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_create_response.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionCreateResponse"]
+
+
+class PermissionCreateResponse(BaseModel):
+    id: str
+    """The permission identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the permission was created."""
+
+    object: Literal["checkpoint.permission"]
+    """The object type, which is always "checkpoint.permission"."""
+
+    project_id: str
+    """The project identifier that the permission is for."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py b/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py
new file mode 100644
index 0000000000..1a92d912fa
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_delete_response.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionDeleteResponse"]
+
+
+class PermissionDeleteResponse(BaseModel):
+    id: str
+    """The ID of the fine-tuned model checkpoint permission that was deleted."""
+
+    deleted: bool
+    """Whether the fine-tuned model checkpoint permission was successfully deleted."""
+
+    object: Literal["checkpoint.permission"]
+    """The object type, which is always "checkpoint.permission"."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py
new file mode 100644
index 0000000000..6e66a867ca
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["PermissionRetrieveParams"]
+
+
+class PermissionRetrieveParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last permission ID from the previous pagination request."""
+
+    limit: int
+    """Number of permissions to retrieve."""
+
+    order: Literal["ascending", "descending"]
+    """The order in which to retrieve permissions."""
+
+    project_id: str
+    """The ID of the project to get permissions for."""
diff --git a/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py
new file mode 100644
index 0000000000..14c73b55d0
--- /dev/null
+++ b/src/openai/types/fine_tuning/checkpoints/permission_retrieve_response.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["PermissionRetrieveResponse", "Data"]
+
+
+class Data(BaseModel):
+    id: str
+    """The permission identifier, which can be referenced in the API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the permission was created."""
+
+    object: Literal["checkpoint.permission"]
+    """The object type, which is always "checkpoint.permission"."""
+
+    project_id: str
+    """The project identifier that the permission is for."""
+
+
+class PermissionRetrieveResponse(BaseModel):
+    data: List[Data]
+
+    has_more: bool
+
+    object: Literal["list"]
+
+    first_id: Optional[str] = None
+
+    last_id: Optional[str] = None
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
index 9a66aa4f17..2af73fbffb 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job_integration.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
 
 FineTuningJobIntegration = FineTuningJobWandbIntegrationObject
diff --git a/src/openai/types/image.py b/src/openai/types/image.py
index f48aa2c702..ecaef3fd58 100644
--- a/src/openai/types/image.py
+++ b/src/openai/types/image.py
@@ -9,16 +9,18 @@
 
 class Image(BaseModel):
     b64_json: Optional[str] = None
-    """
-    The base64-encoded JSON of the generated image, if `response_format` is
-    `b64_json`.
+    """The base64-encoded JSON of the generated image.
+
+    Default value for `gpt-image-1`, and only present if `response_format` is set to
+    `b64_json` for `dall-e-2` and `dall-e-3`.
     """
 
     revised_prompt: Optional[str] = None
-    """
-    The prompt that was used to generate the image, if there was any revision to the
-    prompt.
-    """
+    """For `dall-e-3` only, the revised prompt that was used to generate the image."""
 
     url: Optional[str] = None
-    """The URL of the generated image, if `response_format` is `url` (default)."""
+    """
+    When using `dall-e-2` or `dall-e-3`, the URL of the generated image if
+    `response_format` is set to `url` (default value). Unsupported for
+    `gpt-image-1`.
+    """
diff --git a/src/openai/types/image_create_variation_params.py b/src/openai/types/image_create_variation_params.py
index d20f672912..d10b74b2c2 100644
--- a/src/openai/types/image_create_variation_params.py
+++ b/src/openai/types/image_create_variation_params.py
@@ -25,10 +25,7 @@ class ImageCreateVariationParams(TypedDict, total=False):
     """
 
     n: Optional[int]
-    """The number of images to generate.
-
-    Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
-    """
+    """The number of images to generate. Must be between 1 and 10."""
 
     response_format: Optional[Literal["url", "b64_json"]]
     """The format in which the generated images are returned.
diff --git a/src/openai/types/image_edit_params.py b/src/openai/types/image_edit_params.py
index 1cb10611f3..f01a12c1b0 100644
--- a/src/openai/types/image_edit_params.py
+++ b/src/openai/types/image_edit_params.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Union, Optional
+from typing import List, Union, Optional
 from typing_extensions import Literal, Required, TypedDict
 
 from .._types import FileTypes
@@ -12,46 +12,61 @@
 
 
 class ImageEditParams(TypedDict, total=False):
-    image: Required[FileTypes]
-    """The image to edit.
+    image: Required[Union[FileTypes, List[FileTypes]]]
+    """The image(s) to edit.
 
-    Must be a valid PNG file, less than 4MB, and square. If mask is not provided,
-    image must have transparency, which will be used as the mask.
+    Must be a supported image file or an array of images. For `gpt-image-1`, each
+    image should be a `png`, `webp`, or `jpg` file less than 25MB. For `dall-e-2`,
+    you can only provide one image, and it should be a square `png` file less than
+    4MB.
     """
 
     prompt: Required[str]
     """A text description of the desired image(s).
 
-    The maximum length is 1000 characters.
+    The maximum length is 1000 characters for `dall-e-2`, and 32000 characters for
+    `gpt-image-1`.
     """
 
     mask: FileTypes
     """An additional image whose fully transparent areas (e.g.
 
-    where alpha is zero) indicate where `image` should be edited. Must be a valid
-    PNG file, less than 4MB, and have the same dimensions as `image`.
+    where alpha is zero) indicate where `image` should be edited. If there are
+    multiple images provided, the mask will be applied on the first image. Must be a
+    valid PNG file, less than 4MB, and have the same dimensions as `image`.
     """
 
     model: Union[str, ImageModel, None]
     """The model to use for image generation.
 
-    Only `dall-e-2` is supported at this time.
+    Only `dall-e-2` and `gpt-image-1` are supported. Defaults to `dall-e-2` unless a
+    parameter specific to `gpt-image-1` is used.
     """
 
     n: Optional[int]
     """The number of images to generate. Must be between 1 and 10."""
 
+    quality: Optional[Literal["standard", "low", "medium", "high", "auto"]]
+    """The quality of the image that will be generated.
+
+    `high`, `medium` and `low` are only supported for `gpt-image-1`. `dall-e-2` only
+    supports `standard` quality. Defaults to `auto`.
+    """
+
     response_format: Optional[Literal["url", "b64_json"]]
     """The format in which the generated images are returned.
 
     Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
-    image has been generated.
+    image has been generated. This parameter is only supported for `dall-e-2`, as
+    `gpt-image-1` will always return base64-encoded images.
     """
 
     size: Optional[Literal["256x256", "512x512", "1024x1024"]]
     """The size of the generated images.
 
-    Must be one of `256x256`, `512x512`, or `1024x1024`.
+    Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or
+    `auto` (default value) for `gpt-image-1`, and one of `256x256`, `512x512`, or
+    `1024x1024` for `dall-e-2`.
     """
 
     user: str
diff --git a/src/openai/types/image_generate_params.py b/src/openai/types/image_generate_params.py
index c88c45f518..8fc10220dc 100644
--- a/src/openai/types/image_generate_params.py
+++ b/src/openai/types/image_generate_params.py
@@ -14,12 +14,33 @@ class ImageGenerateParams(TypedDict, total=False):
     prompt: Required[str]
     """A text description of the desired image(s).
 
-    The maximum length is 1000 characters for `dall-e-2` and 4000 characters for
-    `dall-e-3`.
+    The maximum length is 32000 characters for `gpt-image-1`, 1000 characters for
+    `dall-e-2` and 4000 characters for `dall-e-3`.
+    """
+
+    background: Optional[Literal["transparent", "opaque", "auto"]]
+    """Allows to set transparency for the background of the generated image(s).
+
+    This parameter is only supported for `gpt-image-1`. Must be one of
+    `transparent`, `opaque` or `auto` (default value). When `auto` is used, the
+    model will automatically determine the best background for the image.
+
+    If `transparent`, the output format needs to support transparency, so it should
+    be set to either `png` (default value) or `webp`.
     """
 
     model: Union[str, ImageModel, None]
-    """The model to use for image generation."""
+    """The model to use for image generation.
+
+    One of `dall-e-2`, `dall-e-3`, or `gpt-image-1`. Defaults to `dall-e-2` unless a
+    parameter specific to `gpt-image-1` is used.
+    """
+
+    moderation: Optional[Literal["low", "auto"]]
+    """Control the content-moderation level for images generated by `gpt-image-1`.
+
+    Must be either `low` for less restrictive filtering or `auto` (default value).
+    """
 
     n: Optional[int]
     """The number of images to generate.
@@ -27,34 +48,57 @@ class ImageGenerateParams(TypedDict, total=False):
     Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
     """
 
-    quality: Literal["standard", "hd"]
+    output_compression: Optional[int]
+    """The compression level (0-100%) for the generated images.
+
+    This parameter is only supported for `gpt-image-1` with the `webp` or `jpeg`
+    output formats, and defaults to 100.
+    """
+
+    output_format: Optional[Literal["png", "jpeg", "webp"]]
+    """The format in which the generated images are returned.
+
+    This parameter is only supported for `gpt-image-1`. Must be one of `png`,
+    `jpeg`, or `webp`.
+    """
+
+    quality: Optional[Literal["standard", "hd", "low", "medium", "high", "auto"]]
     """The quality of the image that will be generated.
 
-    `hd` creates images with finer details and greater consistency across the image.
-    This param is only supported for `dall-e-3`.
+    - `auto` (default value) will automatically select the best quality for the
+      given model.
+    - `high`, `medium` and `low` are supported for `gpt-image-1`.
+    - `hd` and `standard` are supported for `dall-e-3`.
+    - `standard` is the only option for `dall-e-2`.
     """
 
     response_format: Optional[Literal["url", "b64_json"]]
-    """The format in which the generated images are returned.
+    """The format in which generated images with `dall-e-2` and `dall-e-3` are
+    returned.
 
     Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
-    image has been generated.
+    image has been generated. This parameter isn't supported for `gpt-image-1` which
+    will always return base64-encoded images.
     """
 
-    size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]]
+    size: Optional[
+        Literal["auto", "1024x1024", "1536x1024", "1024x1536", "256x256", "512x512", "1792x1024", "1024x1792"]
+    ]
     """The size of the generated images.
 
-    Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`. Must be one
-    of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models.
+    Must be one of `1024x1024`, `1536x1024` (landscape), `1024x1536` (portrait), or
+    `auto` (default value) for `gpt-image-1`, one of `256x256`, `512x512`, or
+    `1024x1024` for `dall-e-2`, and one of `1024x1024`, `1792x1024`, or `1024x1792`
+    for `dall-e-3`.
     """
 
     style: Optional[Literal["vivid", "natural"]]
     """The style of the generated images.
 
-    Must be one of `vivid` or `natural`. Vivid causes the model to lean towards
-    generating hyper-real and dramatic images. Natural causes the model to produce
-    more natural, less hyper-real looking images. This param is only supported for
-    `dall-e-3`.
+    This parameter is only supported for `dall-e-3`. Must be one of `vivid` or
+    `natural`. Vivid causes the model to lean towards generating hyper-real and
+    dramatic images. Natural causes the model to produce more natural, less
+    hyper-real looking images.
     """
 
     user: str
diff --git a/src/openai/types/image_model.py b/src/openai/types/image_model.py
index 1672369bea..7fed69ed82 100644
--- a/src/openai/types/image_model.py
+++ b/src/openai/types/image_model.py
@@ -4,4 +4,4 @@
 
 __all__ = ["ImageModel"]
 
-ImageModel: TypeAlias = Literal["dall-e-2", "dall-e-3"]
+ImageModel: TypeAlias = Literal["dall-e-2", "dall-e-3", "gpt-image-1"]
diff --git a/src/openai/types/images_response.py b/src/openai/types/images_response.py
index 7cee813184..df454afa4d 100644
--- a/src/openai/types/images_response.py
+++ b/src/openai/types/images_response.py
@@ -1,14 +1,41 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import List
+from typing import List, Optional
 
 from .image import Image
 from .._models import BaseModel
 
-__all__ = ["ImagesResponse"]
+__all__ = ["ImagesResponse", "Usage", "UsageInputTokensDetails"]
+
+
+class UsageInputTokensDetails(BaseModel):
+    image_tokens: int
+    """The number of image tokens in the input prompt."""
+
+    text_tokens: int
+    """The number of text tokens in the input prompt."""
+
+
+class Usage(BaseModel):
+    input_tokens: int
+    """The number of tokens (images and text) in the input prompt."""
+
+    input_tokens_details: UsageInputTokensDetails
+    """The input tokens detailed information for the image generation."""
+
+    output_tokens: int
+    """The number of image tokens in the output image."""
+
+    total_tokens: int
+    """The total number of tokens (images and text) used for the image generation."""
 
 
 class ImagesResponse(BaseModel):
     created: int
+    """The Unix timestamp (in seconds) of when the image was created."""
+
+    data: Optional[List[Image]] = None
+    """The list of generated images."""
 
-    data: List[Image]
+    usage: Optional[Usage] = None
+    """For `gpt-image-1` only, the token usage information for the image generation."""
diff --git a/src/openai/types/model_deleted.py b/src/openai/types/model_deleted.py
index 7f81e1b380..e7601f74e4 100644
--- a/src/openai/types/model_deleted.py
+++ b/src/openai/types/model_deleted.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from .._models import BaseModel
 
 __all__ = ["ModelDeleted"]
diff --git a/src/openai/types/responses/__init__.py b/src/openai/types/responses/__init__.py
index 4f07a3d097..22fd2a0802 100644
--- a/src/openai/types/responses/__init__.py
+++ b/src/openai/types/responses/__init__.py
@@ -22,6 +22,7 @@
 from .web_search_tool import WebSearchTool as WebSearchTool
 from .file_search_tool import FileSearchTool as FileSearchTool
 from .tool_choice_types import ToolChoiceTypes as ToolChoiceTypes
+from .easy_input_message import EasyInputMessage as EasyInputMessage
 from .response_item_list import ResponseItemList as ResponseItemList
 from .computer_tool_param import ComputerToolParam as ComputerToolParam
 from .function_tool_param import FunctionToolParam as FunctionToolParam
@@ -117,6 +118,12 @@
 from .response_input_message_content_list_param import (
     ResponseInputMessageContentListParam as ResponseInputMessageContentListParam,
 )
+from .response_reasoning_summary_part_done_event import (
+    ResponseReasoningSummaryPartDoneEvent as ResponseReasoningSummaryPartDoneEvent,
+)
+from .response_reasoning_summary_text_done_event import (
+    ResponseReasoningSummaryTextDoneEvent as ResponseReasoningSummaryTextDoneEvent,
+)
 from .response_web_search_call_in_progress_event import (
     ResponseWebSearchCallInProgressEvent as ResponseWebSearchCallInProgressEvent,
 )
@@ -126,6 +133,12 @@
 from .response_function_call_arguments_done_event import (
     ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
 )
+from .response_reasoning_summary_part_added_event import (
+    ResponseReasoningSummaryPartAddedEvent as ResponseReasoningSummaryPartAddedEvent,
+)
+from .response_reasoning_summary_text_delta_event import (
+    ResponseReasoningSummaryTextDeltaEvent as ResponseReasoningSummaryTextDeltaEvent,
+)
 from .response_function_call_arguments_delta_event import (
     ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
 )
diff --git a/src/openai/types/responses/easy_input_message.py b/src/openai/types/responses/easy_input_message.py
new file mode 100644
index 0000000000..4ed0194f9f
--- /dev/null
+++ b/src/openai/types/responses/easy_input_message.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_input_message_content_list import ResponseInputMessageContentList
+
+__all__ = ["EasyInputMessage"]
+
+
+class EasyInputMessage(BaseModel):
+    content: Union[str, ResponseInputMessageContentList]
+    """
+    Text, image, or audio input to the model, used to generate a response. Can also
+    contain previous assistant responses.
+    """
+
+    role: Literal["user", "assistant", "system", "developer"]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always `message`."""
diff --git a/src/openai/types/responses/response.py b/src/openai/types/responses/response.py
index 8cd1e01144..254f7e204b 100644
--- a/src/openai/types/responses/response.py
+++ b/src/openai/types/responses/response.py
@@ -62,7 +62,7 @@ class Response(BaseModel):
     """
 
     model: ResponsesModel
-    """Model ID used to generate the response, like `gpt-4o` or `o1`.
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
 
     OpenAI offers a wide range of models with different capabilities, performance
     characteristics, and price points. Refer to the
@@ -149,6 +149,27 @@ class Response(BaseModel):
     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
     """
 
+    service_tier: Optional[Literal["auto", "default", "flex"]] = None
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
+
     status: Optional[ResponseStatus] = None
     """The status of the response generation.
 
diff --git a/src/openai/types/responses/response_create_params.py b/src/openai/types/responses/response_create_params.py
index ed82e678e5..3c0a9d7b8a 100644
--- a/src/openai/types/responses/response_create_params.py
+++ b/src/openai/types/responses/response_create_params.py
@@ -38,7 +38,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     """
 
     model: Required[ResponsesModel]
-    """Model ID used to generate the response, like `gpt-4o` or `o1`.
+    """Model ID used to generate the response, like `gpt-4o` or `o3`.
 
     OpenAI offers a wide range of models with different capabilities, performance
     characteristics, and price points. Refer to the
@@ -102,6 +102,27 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     [reasoning models](https://platform.openai.com/docs/guides/reasoning).
     """
 
+    service_tier: Optional[Literal["auto", "default", "flex"]]
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - If set to 'flex', the request will be processed with the Flex Processing
+      service tier.
+      [Learn more](https://platform.openai.com/docs/guides/flex-processing).
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
+
     store: Optional[bool]
     """Whether to store the generated model response for later retrieval via API."""
 
diff --git a/src/openai/types/responses/response_function_tool_call_item.py b/src/openai/types/responses/response_function_tool_call_item.py
index 25984f9451..762015a4b1 100644
--- a/src/openai/types/responses/response_function_tool_call_item.py
+++ b/src/openai/types/responses/response_function_tool_call_item.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from .response_function_tool_call import ResponseFunctionToolCall
 
 __all__ = ["ResponseFunctionToolCallItem"]
diff --git a/src/openai/types/responses/response_reasoning_summary_part_added_event.py b/src/openai/types/responses/response_reasoning_summary_part_added_event.py
new file mode 100644
index 0000000000..fd11520170
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_part_added_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryPartAddedEvent", "Part"]
+
+
+class Part(BaseModel):
+    text: str
+    """The text of the summary part."""
+
+    type: Literal["summary_text"]
+    """The type of the summary part. Always `summary_text`."""
+
+
+class ResponseReasoningSummaryPartAddedEvent(BaseModel):
+    item_id: str
+    """The ID of the item this summary part is associated with."""
+
+    output_index: int
+    """The index of the output item this summary part is associated with."""
+
+    part: Part
+    """The summary part that was added."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    type: Literal["response.reasoning_summary_part.added"]
+    """The type of the event. Always `response.reasoning_summary_part.added`."""
diff --git a/src/openai/types/responses/response_reasoning_summary_part_done_event.py b/src/openai/types/responses/response_reasoning_summary_part_done_event.py
new file mode 100644
index 0000000000..7f30189a49
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_part_done_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryPartDoneEvent", "Part"]
+
+
+class Part(BaseModel):
+    text: str
+    """The text of the summary part."""
+
+    type: Literal["summary_text"]
+    """The type of the summary part. Always `summary_text`."""
+
+
+class ResponseReasoningSummaryPartDoneEvent(BaseModel):
+    item_id: str
+    """The ID of the item this summary part is associated with."""
+
+    output_index: int
+    """The index of the output item this summary part is associated with."""
+
+    part: Part
+    """The completed summary part."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    type: Literal["response.reasoning_summary_part.done"]
+    """The type of the event. Always `response.reasoning_summary_part.done`."""
diff --git a/src/openai/types/responses/response_reasoning_summary_text_delta_event.py b/src/openai/types/responses/response_reasoning_summary_text_delta_event.py
new file mode 100644
index 0000000000..6d0cbd8265
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_text_delta_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryTextDeltaEvent"]
+
+
+class ResponseReasoningSummaryTextDeltaEvent(BaseModel):
+    delta: str
+    """The text delta that was added to the summary."""
+
+    item_id: str
+    """The ID of the item this summary text delta is associated with."""
+
+    output_index: int
+    """The index of the output item this summary text delta is associated with."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    type: Literal["response.reasoning_summary_text.delta"]
+    """The type of the event. Always `response.reasoning_summary_text.delta`."""
diff --git a/src/openai/types/responses/response_reasoning_summary_text_done_event.py b/src/openai/types/responses/response_reasoning_summary_text_done_event.py
new file mode 100644
index 0000000000..15b894c75b
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_summary_text_done_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningSummaryTextDoneEvent"]
+
+
+class ResponseReasoningSummaryTextDoneEvent(BaseModel):
+    item_id: str
+    """The ID of the item this summary text is associated with."""
+
+    output_index: int
+    """The index of the output item this summary text is associated with."""
+
+    summary_index: int
+    """The index of the summary part within the reasoning summary."""
+
+    text: str
+    """The full text of the completed reasoning summary."""
+
+    type: Literal["response.reasoning_summary_text.done"]
+    """The type of the event. Always `response.reasoning_summary_text.done`."""
diff --git a/src/openai/types/responses/response_stream_event.py b/src/openai/types/responses/response_stream_event.py
index 446863b175..07c18bd217 100644
--- a/src/openai/types/responses/response_stream_event.py
+++ b/src/openai/types/responses/response_stream_event.py
@@ -27,9 +27,13 @@
 from .response_web_search_call_searching_event import ResponseWebSearchCallSearchingEvent
 from .response_file_search_call_completed_event import ResponseFileSearchCallCompletedEvent
 from .response_file_search_call_searching_event import ResponseFileSearchCallSearchingEvent
+from .response_reasoning_summary_part_done_event import ResponseReasoningSummaryPartDoneEvent
+from .response_reasoning_summary_text_done_event import ResponseReasoningSummaryTextDoneEvent
 from .response_web_search_call_in_progress_event import ResponseWebSearchCallInProgressEvent
 from .response_file_search_call_in_progress_event import ResponseFileSearchCallInProgressEvent
 from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .response_reasoning_summary_part_added_event import ResponseReasoningSummaryPartAddedEvent
+from .response_reasoning_summary_text_delta_event import ResponseReasoningSummaryTextDeltaEvent
 from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
 from .response_code_interpreter_call_code_done_event import ResponseCodeInterpreterCallCodeDoneEvent
 from .response_code_interpreter_call_completed_event import ResponseCodeInterpreterCallCompletedEvent
@@ -65,6 +69,10 @@
         ResponseIncompleteEvent,
         ResponseOutputItemAddedEvent,
         ResponseOutputItemDoneEvent,
+        ResponseReasoningSummaryPartAddedEvent,
+        ResponseReasoningSummaryPartDoneEvent,
+        ResponseReasoningSummaryTextDeltaEvent,
+        ResponseReasoningSummaryTextDoneEvent,
         ResponseRefusalDeltaEvent,
         ResponseRefusalDoneEvent,
         ResponseTextAnnotationDeltaEvent,
diff --git a/src/openai/types/responses/response_usage.py b/src/openai/types/responses/response_usage.py
index 9ad36bd326..52b93ac578 100644
--- a/src/openai/types/responses/response_usage.py
+++ b/src/openai/types/responses/response_usage.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from ..._models import BaseModel
 
 __all__ = ["ResponseUsage", "InputTokensDetails", "OutputTokensDetails"]
diff --git a/src/openai/types/shared/chat_model.py b/src/openai/types/shared/chat_model.py
index b19375725d..4869cd325c 100644
--- a/src/openai/types/shared/chat_model.py
+++ b/src/openai/types/shared/chat_model.py
@@ -5,6 +5,16 @@
 __all__ = ["ChatModel"]
 
 ChatModel: TypeAlias = Literal[
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+    "gpt-4.1-2025-04-14",
+    "gpt-4.1-mini-2025-04-14",
+    "gpt-4.1-nano-2025-04-14",
+    "o4-mini",
+    "o4-mini-2025-04-16",
+    "o3",
+    "o3-2025-04-16",
     "o3-mini",
     "o3-mini-2025-01-31",
     "o1",
diff --git a/src/openai/types/shared/reasoning.py b/src/openai/types/shared/reasoning.py
index 78a396d738..107aab2e4a 100644
--- a/src/openai/types/shared/reasoning.py
+++ b/src/openai/types/shared/reasoning.py
@@ -19,10 +19,17 @@ class Reasoning(BaseModel):
     result in faster responses and fewer tokens used on reasoning in a response.
     """
 
-    generate_summary: Optional[Literal["concise", "detailed"]] = None
-    """**computer_use_preview only**
+    generate_summary: Optional[Literal["auto", "concise", "detailed"]] = None
+    """**Deprecated:** use `summary` instead.
 
     A summary of the reasoning performed by the model. This can be useful for
-    debugging and understanding the model's reasoning process. One of `concise` or
-    `detailed`.
+    debugging and understanding the model's reasoning process. One of `auto`,
+    `concise`, or `detailed`.
+    """
+
+    summary: Optional[Literal["auto", "concise", "detailed"]] = None
+    """A summary of the reasoning performed by the model.
+
+    This can be useful for debugging and understanding the model's reasoning
+    process. One of `auto`, `concise`, or `detailed`.
     """
diff --git a/src/openai/types/shared_params/chat_model.py b/src/openai/types/shared_params/chat_model.py
index ff81b07ac3..99e082fc11 100644
--- a/src/openai/types/shared_params/chat_model.py
+++ b/src/openai/types/shared_params/chat_model.py
@@ -7,6 +7,16 @@
 __all__ = ["ChatModel"]
 
 ChatModel: TypeAlias = Literal[
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+    "gpt-4.1-2025-04-14",
+    "gpt-4.1-mini-2025-04-14",
+    "gpt-4.1-nano-2025-04-14",
+    "o4-mini",
+    "o4-mini-2025-04-16",
+    "o3",
+    "o3-2025-04-16",
     "o3-mini",
     "o3-mini-2025-01-31",
     "o1",
diff --git a/src/openai/types/shared_params/reasoning.py b/src/openai/types/shared_params/reasoning.py
index 2953b895c4..73e1a008df 100644
--- a/src/openai/types/shared_params/reasoning.py
+++ b/src/openai/types/shared_params/reasoning.py
@@ -20,10 +20,17 @@ class Reasoning(TypedDict, total=False):
     result in faster responses and fewer tokens used on reasoning in a response.
     """
 
-    generate_summary: Optional[Literal["concise", "detailed"]]
-    """**computer_use_preview only**
+    generate_summary: Optional[Literal["auto", "concise", "detailed"]]
+    """**Deprecated:** use `summary` instead.
 
     A summary of the reasoning performed by the model. This can be useful for
-    debugging and understanding the model's reasoning process. One of `concise` or
-    `detailed`.
+    debugging and understanding the model's reasoning process. One of `auto`,
+    `concise`, or `detailed`.
+    """
+
+    summary: Optional[Literal["auto", "concise", "detailed"]]
+    """A summary of the reasoning performed by the model.
+
+    This can be useful for debugging and understanding the model's reasoning
+    process. One of `auto`, `concise`, or `detailed`.
     """
diff --git a/src/openai/types/static_file_chunking_strategy.py b/src/openai/types/static_file_chunking_strategy.py
index 2813bc6630..cb842442c1 100644
--- a/src/openai/types/static_file_chunking_strategy.py
+++ b/src/openai/types/static_file_chunking_strategy.py
@@ -1,6 +1,5 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
 from .._models import BaseModel
 
 __all__ = ["StaticFileChunkingStrategy"]
diff --git a/tests/api_resources/beta/test_threads.py b/tests/api_resources/beta/test_threads.py
index ecf5b11102..9916d5bdc6 100644
--- a/tests/api_resources/beta/test_threads.py
+++ b/tests/api_resources/beta/test_threads.py
@@ -220,7 +220,7 @@ def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI)
             max_completion_tokens=256,
             max_prompt_tokens=256,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="string",
             parallel_tool_calls=True,
             response_format="auto",
             stream=False,
@@ -309,7 +309,7 @@ def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI)
             max_completion_tokens=256,
             max_prompt_tokens=256,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="string",
             parallel_tool_calls=True,
             response_format="auto",
             temperature=1,
@@ -584,7 +584,7 @@ async def test_method_create_and_run_with_all_params_overload_1(self, async_clie
             max_completion_tokens=256,
             max_prompt_tokens=256,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="string",
             parallel_tool_calls=True,
             response_format="auto",
             stream=False,
@@ -673,7 +673,7 @@ async def test_method_create_and_run_with_all_params_overload_2(self, async_clie
             max_completion_tokens=256,
             max_prompt_tokens=256,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="string",
             parallel_tool_calls=True,
             response_format="auto",
             temperature=1,
diff --git a/tests/api_resources/beta/threads/test_runs.py b/tests/api_resources/beta/threads/test_runs.py
index d05ee96144..4230ccebe4 100644
--- a/tests/api_resources/beta/threads/test_runs.py
+++ b/tests/api_resources/beta/threads/test_runs.py
@@ -54,7 +54,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             max_completion_tokens=256,
             max_prompt_tokens=256,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="string",
             parallel_tool_calls=True,
             reasoning_effort="low",
             response_format="auto",
@@ -138,7 +138,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             max_completion_tokens=256,
             max_prompt_tokens=256,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="string",
             parallel_tool_calls=True,
             reasoning_effort="low",
             response_format="auto",
@@ -552,7 +552,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             max_completion_tokens=256,
             max_prompt_tokens=256,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="string",
             parallel_tool_calls=True,
             reasoning_effort="low",
             response_format="auto",
@@ -636,7 +636,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             max_completion_tokens=256,
             max_prompt_tokens=256,
             metadata={"foo": "string"},
-            model="gpt-4o",
+            model="string",
             parallel_tool_calls=True,
             reasoning_effort="low",
             response_format="auto",
diff --git a/tests/api_resources/evals/__init__.py b/tests/api_resources/evals/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/evals/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/evals/runs/__init__.py b/tests/api_resources/evals/runs/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/evals/runs/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/evals/runs/test_output_items.py b/tests/api_resources/evals/runs/test_output_items.py
new file mode 100644
index 0000000000..f764f0336e
--- /dev/null
+++ b/tests/api_resources/evals/runs/test_output_items.py
@@ -0,0 +1,263 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.evals.runs import OutputItemListResponse, OutputItemRetrieveResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestOutputItems:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        output_item = client.evals.runs.output_items.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.evals.runs.output_items.with_raw_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.evals.runs.output_items.with_streaming_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = response.parse()
+            assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="",
+                run_id="run_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="eval_id",
+                run_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `output_item_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="",
+                eval_id="eval_id",
+                run_id="run_id",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        output_item = client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        output_item = client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="fail",
+        )
+        assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.evals.runs.output_items.with_raw_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.evals.runs.output_items.with_streaming_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = response.parse()
+            assert_matches_type(SyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.list(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.output_items.with_raw_response.list(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+
+class TestAsyncOutputItems:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        output_item = await async_client.evals.runs.output_items.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.output_items.with_raw_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.output_items.with_streaming_response.retrieve(
+            output_item_id="output_item_id",
+            eval_id="eval_id",
+            run_id="run_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = await response.parse()
+            assert_matches_type(OutputItemRetrieveResponse, output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="",
+                run_id="run_id",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="output_item_id",
+                eval_id="eval_id",
+                run_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `output_item_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.retrieve(
+                output_item_id="",
+                eval_id="eval_id",
+                run_id="run_id",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        output_item = await async_client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        output_item = await async_client.evals.runs.output_items.list(
+            run_id="run_id",
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="fail",
+        )
+        assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.output_items.with_raw_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        output_item = response.parse()
+        assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.output_items.with_streaming_response.list(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            output_item = await response.parse()
+            assert_matches_type(AsyncCursorPage[OutputItemListResponse], output_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.list(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.output_items.with_raw_response.list(
+                run_id="",
+                eval_id="eval_id",
+            )
diff --git a/tests/api_resources/evals/test_runs.py b/tests/api_resources/evals/test_runs.py
new file mode 100644
index 0000000000..cefb1c82ff
--- /dev/null
+++ b/tests/api_resources/evals/test_runs.py
@@ -0,0 +1,589 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.evals import (
+    RunListResponse,
+    RunCancelResponse,
+    RunCreateResponse,
+    RunDeleteResponse,
+    RunRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestRuns:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        run = client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        run = client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [
+                        {
+                            "item": {"foo": "bar"},
+                            "sample": {"foo": "bar"},
+                        }
+                    ],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunCreateResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.create(
+                eval_id="",
+                data_source={
+                    "source": {
+                        "content": [{"item": {"foo": "bar"}}],
+                        "type": "file_content",
+                    },
+                    "type": "jsonl",
+                },
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        run = client.evals.runs.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.retrieve(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.with_raw_response.retrieve(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        run = client.evals.runs.list(
+            eval_id="eval_id",
+        )
+        assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        run = client.evals.runs.list(
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="queued",
+        )
+        assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.list(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.list(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(SyncCursorPage[RunListResponse], run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.list(
+                eval_id="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        run = client.evals.runs.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.delete(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.with_raw_response.delete(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        run = client.evals.runs.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.evals.runs.with_raw_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.evals.runs.with_streaming_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = response.parse()
+            assert_matches_type(RunCancelResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.runs.with_raw_response.cancel(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            client.evals.runs.with_raw_response.cancel(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+
+class TestAsyncRuns:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [
+                        {
+                            "item": {"foo": "bar"},
+                            "sample": {"foo": "bar"},
+                        }
+                    ],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCreateResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.create(
+            eval_id="eval_id",
+            data_source={
+                "source": {
+                    "content": [{"item": {"foo": "bar"}}],
+                    "type": "file_content",
+                },
+                "type": "jsonl",
+            },
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunCreateResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.create(
+                eval_id="",
+                data_source={
+                    "source": {
+                        "content": [{"item": {"foo": "bar"}}],
+                        "type": "file_content",
+                    },
+                    "type": "jsonl",
+                },
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.retrieve(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunRetrieveResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.retrieve(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.retrieve(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.list(
+            eval_id="eval_id",
+        )
+        assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.list(
+            eval_id="eval_id",
+            after="after",
+            limit=0,
+            order="asc",
+            status="queued",
+        )
+        assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.list(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.list(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(AsyncCursorPage[RunListResponse], run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.list(
+                eval_id="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.delete(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunDeleteResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.delete(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.delete(
+                run_id="",
+                eval_id="eval_id",
+            )
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        run = await async_client.evals.runs.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.runs.with_raw_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        run = response.parse()
+        assert_matches_type(RunCancelResponse, run, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.runs.with_streaming_response.cancel(
+            run_id="run_id",
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            run = await response.parse()
+            assert_matches_type(RunCancelResponse, run, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.cancel(
+                run_id="run_id",
+                eval_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
+            await async_client.evals.runs.with_raw_response.cancel(
+                run_id="",
+                eval_id="eval_id",
+            )
diff --git a/tests/api_resources/fine_tuning/checkpoints/__init__.py b/tests/api_resources/fine_tuning/checkpoints/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/fine_tuning/checkpoints/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/fine_tuning/checkpoints/test_permissions.py b/tests/api_resources/fine_tuning/checkpoints/test_permissions.py
new file mode 100644
index 0000000000..6aa0b867d9
--- /dev/null
+++ b/tests/api_resources/fine_tuning/checkpoints/test_permissions.py
@@ -0,0 +1,317 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncPage, AsyncPage
+from openai.types.fine_tuning.checkpoints import (
+    PermissionCreateResponse,
+    PermissionDeleteResponse,
+    PermissionRetrieveResponse,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestPermissions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+        assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.fine_tuning.checkpoints.permissions.with_streaming_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = response.parse()
+            assert_matches_type(SyncPage[PermissionCreateResponse], permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+                fine_tuned_model_checkpoint="",
+                project_ids=["string"],
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
+            limit=0,
+            order="ascending",
+            project_id="project_id",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.fine_tuning.checkpoints.permissions.with_streaming_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = response.parse()
+            assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+                fine_tuned_model_checkpoint="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        permission = client.fine_tuning.checkpoints.permissions.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.fine_tuning.checkpoints.permissions.with_streaming_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = response.parse()
+            assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+                fine_tuned_model_checkpoint="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `permission_id` but received ''"):
+            client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="",
+                fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            )
+
+
+class TestAsyncPermissions:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+        assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.create(
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            project_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = await response.parse()
+            assert_matches_type(AsyncPage[PermissionCreateResponse], permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.create(
+                fine_tuned_model_checkpoint="",
+                project_ids=["string"],
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
+            limit=0,
+            order="ascending",
+            project_id="project_id",
+        )
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.retrieve(
+            fine_tuned_model_checkpoint="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = await response.parse()
+            assert_matches_type(PermissionRetrieveResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.retrieve(
+                fine_tuned_model_checkpoint="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        permission = await async_client.fine_tuning.checkpoints.permissions.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        permission = response.parse()
+        assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.fine_tuning.checkpoints.permissions.with_streaming_response.delete(
+            permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+            fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            permission = await response.parse()
+            assert_matches_type(PermissionDeleteResponse, permission, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(
+            ValueError, match=r"Expected a non-empty value for `fine_tuned_model_checkpoint` but received ''"
+        ):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="cp_zc4Q7MP6XxulcVzj4MZdwsAB",
+                fine_tuned_model_checkpoint="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `permission_id` but received ''"):
+            await async_client.fine_tuning.checkpoints.permissions.with_raw_response.delete(
+                permission_id="",
+                fine_tuned_model_checkpoint="ft:gpt-4o-mini-2024-07-18:org:weather:B7R9VjQd",
+            )
diff --git a/tests/api_resources/test_evals.py b/tests/api_resources/test_evals.py
new file mode 100644
index 0000000000..4ae2c597dd
--- /dev/null
+++ b/tests/api_resources/test_evals.py
@@ -0,0 +1,571 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import (
+    EvalListResponse,
+    EvalCreateResponse,
+    EvalDeleteResponse,
+    EvalUpdateResponse,
+    EvalRetrieveResponse,
+)
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestEvals:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        eval = client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        eval = client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+                "include_sample_schema": True,
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        eval = client.evals.retrieve(
+            "eval_id",
+        )
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.retrieve(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.retrieve(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        eval = client.evals.update(
+            eval_id="eval_id",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_method_update_with_all_params(self, client: OpenAI) -> None:
+        eval = client.evals.update(
+            eval_id="eval_id",
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.update(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.update(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.with_raw_response.update(
+                eval_id="",
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        eval = client.evals.list()
+        assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        eval = client.evals.list(
+            after="after",
+            limit=0,
+            order="asc",
+            order_by="created_at",
+        )
+        assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(SyncCursorPage[EvalListResponse], eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        eval = client.evals.delete(
+            "eval_id",
+        )
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.evals.with_raw_response.delete(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.evals.with_streaming_response.delete(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = response.parse()
+            assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            client.evals.with_raw_response.delete(
+                "",
+            )
+
+
+class TestAsyncEvals:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+                "include_sample_schema": True,
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.create(
+            data_source_config={
+                "item_schema": {"foo": "bar"},
+                "type": "custom",
+            },
+            testing_criteria=[
+                {
+                    "input": [
+                        {
+                            "content": "content",
+                            "role": "role",
+                        }
+                    ],
+                    "labels": ["string"],
+                    "model": "model",
+                    "name": "name",
+                    "passing_labels": ["string"],
+                    "type": "label_model",
+                }
+            ],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalCreateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.retrieve(
+            "eval_id",
+        )
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.retrieve(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.retrieve(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalRetrieveResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.update(
+            eval_id="eval_id",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.update(
+            eval_id="eval_id",
+            metadata={"foo": "string"},
+            name="name",
+        )
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.update(
+            eval_id="eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.update(
+            eval_id="eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalUpdateResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.with_raw_response.update(
+                eval_id="",
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.list()
+        assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.list(
+            after="after",
+            limit=0,
+            order="asc",
+            order_by="created_at",
+        )
+        assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(AsyncCursorPage[EvalListResponse], eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        eval = await async_client.evals.delete(
+            "eval_id",
+        )
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.evals.with_raw_response.delete(
+            "eval_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        eval = response.parse()
+        assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.evals.with_streaming_response.delete(
+            "eval_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            eval = await response.parse()
+            assert_matches_type(EvalDeleteResponse, eval, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `eval_id` but received ''"):
+            await async_client.evals.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/test_images.py b/tests/api_resources/test_images.py
index 9bc9719bc5..7997e9f5a1 100644
--- a/tests/api_resources/test_images.py
+++ b/tests/api_resources/test_images.py
@@ -28,10 +28,10 @@ def test_method_create_variation(self, client: OpenAI) -> None:
     def test_method_create_variation_with_all_params(self, client: OpenAI) -> None:
         image = client.images.create_variation(
             image=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
             response_format="url",
-            size="256x256",
+            size="1024x1024",
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
@@ -74,10 +74,11 @@ def test_method_edit_with_all_params(self, client: OpenAI) -> None:
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
             mask=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
+            quality="high",
             response_format="url",
-            size="256x256",
+            size="1024x1024",
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
@@ -119,11 +120,15 @@ def test_method_generate(self, client: OpenAI) -> None:
     def test_method_generate_with_all_params(self, client: OpenAI) -> None:
         image = client.images.generate(
             prompt="A cute baby sea otter",
-            model="dall-e-3",
+            background="transparent",
+            model="string",
+            moderation="low",
             n=1,
-            quality="standard",
+            output_compression=100,
+            output_format="png",
+            quality="medium",
             response_format="url",
-            size="256x256",
+            size="1024x1024",
             style="vivid",
             user="user-1234",
         )
@@ -168,10 +173,10 @@ async def test_method_create_variation(self, async_client: AsyncOpenAI) -> None:
     async def test_method_create_variation_with_all_params(self, async_client: AsyncOpenAI) -> None:
         image = await async_client.images.create_variation(
             image=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
             response_format="url",
-            size="256x256",
+            size="1024x1024",
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
@@ -214,10 +219,11 @@ async def test_method_edit_with_all_params(self, async_client: AsyncOpenAI) -> N
             image=b"raw file contents",
             prompt="A cute baby sea otter wearing a beret",
             mask=b"raw file contents",
-            model="dall-e-2",
+            model="string",
             n=1,
+            quality="high",
             response_format="url",
-            size="256x256",
+            size="1024x1024",
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
@@ -259,11 +265,15 @@ async def test_method_generate(self, async_client: AsyncOpenAI) -> None:
     async def test_method_generate_with_all_params(self, async_client: AsyncOpenAI) -> None:
         image = await async_client.images.generate(
             prompt="A cute baby sea otter",
-            model="dall-e-3",
+            background="transparent",
+            model="string",
+            moderation="low",
             n=1,
-            quality="standard",
+            output_compression=100,
+            output_format="png",
+            quality="medium",
             response_format="url",
-            size="256x256",
+            size="1024x1024",
             style="vivid",
             user="user-1234",
         )
diff --git a/tests/api_resources/test_moderations.py b/tests/api_resources/test_moderations.py
index bbdeb63e49..6df6464110 100644
--- a/tests/api_resources/test_moderations.py
+++ b/tests/api_resources/test_moderations.py
@@ -28,7 +28,7 @@ def test_method_create(self, client: OpenAI) -> None:
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         moderation = client.moderations.create(
             input="I want to kill them.",
-            model="omni-moderation-2024-09-26",
+            model="string",
         )
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
 
@@ -71,7 +71,7 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         moderation = await async_client.moderations.create(
             input="I want to kill them.",
-            model="omni-moderation-2024-09-26",
+            model="string",
         )
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
 
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
index e45a5becf3..3753af8fdb 100644
--- a/tests/api_resources/test_responses.py
+++ b/tests/api_resources/test_responses.py
@@ -38,8 +38,10 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             stream=False,
             temperature=1,
@@ -116,8 +118,10 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             temperature=1,
             text={"format": {"type": "text"}},
@@ -280,8 +284,10 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             stream=False,
             temperature=1,
@@ -358,8 +364,10 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             previous_response_id="previous_response_id",
             reasoning={
                 "effort": "low",
-                "generate_summary": "concise",
+                "generate_summary": "auto",
+                "summary": "auto",
             },
+            service_tier="auto",
             store=True,
             temperature=1,
             text={"format": {"type": "text"}},
diff --git a/tests/conftest.py b/tests/conftest.py
index fa82d39d86..8b01753e2f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,7 +10,7 @@
 from openai import OpenAI, AsyncOpenAI
 
 if TYPE_CHECKING:
-    from _pytest.fixtures import FixtureRequest
+    from _pytest.fixtures import FixtureRequest  # pyright: ignore[reportPrivateImportUsage]
 
 pytest.register_assert_rewrite("tests.utils")
 
diff --git a/tests/test_client.py b/tests/test_client.py
index 62654afe1e..616255af3c 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -1797,7 +1797,7 @@ def test_get_platform(self) -> None:
         import threading
 
         from openai._utils import asyncify
-        from openai._base_client import get_platform 
+        from openai._base_client import get_platform
 
         async def test_main() -> None:
             result = await asyncify(get_platform)()
diff --git a/tests/test_models.py b/tests/test_models.py
index b9be1f3ea3..440e17a08c 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -492,12 +492,15 @@ class Model(BaseModel):
         resource_id: Optional[str] = None
 
     m = Model.construct()
+    assert m.resource_id is None
     assert "resource_id" not in m.model_fields_set
 
     m = Model.construct(resource_id=None)
+    assert m.resource_id is None
     assert "resource_id" in m.model_fields_set
 
     m = Model.construct(resource_id="foo")
+    assert m.resource_id == "foo"
     assert "resource_id" in m.model_fields_set
 
 
@@ -832,7 +835,7 @@ class B(BaseModel):
 
 @pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1")
 def test_type_alias_type() -> None:
-    Alias = TypeAliasType("Alias", str)
+    Alias = TypeAliasType("Alias", str)  # pyright: ignore
 
     class Model(BaseModel):
         alias: Alias
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 385fbe2b2c..965f65f74f 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -8,7 +8,7 @@
 
 import pytest
 
-from openai._types import Base64FileInput
+from openai._types import NOT_GIVEN, Base64FileInput
 from openai._utils import (
     PropertyInfo,
     transform as _transform,
@@ -432,3 +432,22 @@ async def test_base64_file_input(use_async: bool) -> None:
     assert await transform({"foo": io.BytesIO(b"Hello, world!")}, TypedDictBase64Input, use_async) == {
         "foo": "SGVsbG8sIHdvcmxkIQ=="
     }  # type: ignore[comparison-overlap]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_transform_skipping(use_async: bool) -> None:
+    # lists of ints are left as-is
+    data = [1, 2, 3]
+    assert await transform(data, List[int], use_async) is data
+
+    # iterables of ints are converted to a list
+    data = iter([1, 2, 3])
+    assert await transform(data, Iterable[int], use_async) == [1, 2, 3]
+
+
+@parametrize
+@pytest.mark.asyncio
+async def test_strips_notgiven(use_async: bool) -> None:
+    assert await transform({"foo_bar": "bar"}, Foo1, use_async) == {"fooBar": "bar"}
+    assert await transform({"foo_bar": NOT_GIVEN}, Foo1, use_async) == {}