diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
index dd93962010..ff261bad78 100644
--- a/.devcontainer/Dockerfile
+++ b/.devcontainer/Dockerfile
@@ -3,7 +3,7 @@ FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT}
 
 USER vscode
 
-RUN curl -sSf https://rye-up.com/get | RYE_VERSION="0.24.0" RYE_INSTALL_OPTION="--yes" bash
+RUN curl -sSf https://rye.astral.sh/get | RYE_VERSION="0.44.0" RYE_INSTALL_OPTION="--yes" bash
 ENV PATH=/home/vscode/.rye/shims:$PATH
 
-RUN echo "[[ -d .venv ]] && source .venv/bin/activate" >> /home/vscode/.bashrc
+RUN echo "[[ -d .venv ]] && source .venv/bin/activate || export PATH=\$PATH" >> /home/vscode/.bashrc
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index bbeb30b148..c17fdc169f 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -24,6 +24,9 @@
         }
       }
     }
+  },
+  "features": {
+    "ghcr.io/devcontainers/features/node:1": {}
   }
 
   // Features to add to the dev container. More info: https://containers.dev/features.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 3ce5f8d004..d58c8454c5 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1 +1,4 @@
+# This file is used to automatically assign reviewers to PRs
+# For more information see: https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners
+
 * @openai/sdks-team
diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml
deleted file mode 100644
index fa09dbe5b0..0000000000
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ /dev/null
@@ -1,64 +0,0 @@
-name: Bug report
-description: Report an issue or bug with this library
-labels: ['bug']
-body:
-  - type: markdown
-    attributes:
-      value: |
-        Thanks for taking the time to fill out this bug report!
-  - type: checkboxes
-    id: non_api
-    attributes:
-      label: Confirm this is an issue with the Python library and not an underlying OpenAI API
-      description: Issues with the underlying OpenAI API should be reported on our [Developer Community](https://community.openai.com/c/api/7)
-      options:
-        - label: This is an issue with the Python library
-          required: true
-  - type: textarea
-    id: what-happened
-    attributes:
-      label: Describe the bug
-      description: A clear and concise description of what the bug is, and any additional context.
-      placeholder: Tell us what you see!
-    validations:
-      required: true
-  - type: textarea
-    id: repro-steps
-    attributes:
-      label: To Reproduce
-      description: Steps to reproduce the behavior.
-      placeholder: |
-        1. Fetch a '...'
-        2. Update the '....'
-        3. See error
-    validations:
-      required: true
-  - type: textarea
-    id: code-snippets
-    attributes:
-      label: Code snippets
-      description: If applicable, add code snippets to help explain your problem.
-      render: Python
-    validations:
-      required: false
-  - type: input
-    id: os
-    attributes:
-      label: OS
-      placeholder: macOS
-    validations:
-      required: true
-  - type: input
-    id: language-version
-    attributes:
-      label: Python version
-      placeholder: Python v3.11.4
-    validations:
-      required: true
-  - type: input
-    id: lib-version
-    attributes:
-      label: Library version
-      placeholder: openai v1.0.1
-    validations:
-      required: true
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
deleted file mode 100644
index 0498cf7f6f..0000000000
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ /dev/null
@@ -1,7 +0,0 @@
-blank_issues_enabled: false
-contact_links:
-  - name: OpenAI support
-    url: https://help.openai.com/
-    about: |
-      Please only file issues here that you believe represent actual bugs or feature requests for the OpenAI Python library.
-      If you're having general trouble with the OpenAI API, please visit our help center to get support.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml
deleted file mode 100644
index b529547d08..0000000000
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-name: Feature request
-description: Suggest an idea for this library
-labels: ['feature-request']
-body:
-  - type: markdown
-    attributes:
-      value: |
-        Thanks for taking the time to fill out this feature request!
-  - type: checkboxes
-    id: non_api
-    attributes:
-      label: Confirm this is a feature request for the Python library and not the underlying OpenAI API.
-      description: Feature requests for the underlying OpenAI API should be reported on our [Developer Community](https://community.openai.com/c/api/7)
-      options:
-        - label: This is a feature request for the Python library
-          required: true
-  - type: textarea
-    id: feature
-    attributes:
-      label: Describe the feature or improvement you're requesting
-      description: A clear and concise description of what you want to happen.
-    validations:
-      required: true
-  - type: textarea
-    id: context
-    attributes:
-      label: Additional context
-      description: Add any other context about the feature request here.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
deleted file mode 100644
index 4416b1e547..0000000000
--- a/.github/pull_request_template.md
+++ /dev/null
@@ -1,10 +0,0 @@
-<!-- Thank you for contributing to this project! -->
-<!-- The code in this repository is all auto-generated, and is not meant to be edited manually. -->
-<!-- We recommend opening an Issue instead, but you are still welcome to open a PR to share for -->
-<!-- an improvement if you wish, just note that we are unlikely to merge it as-is. -->
-
-- [ ] I understand that this repository is auto-generated and my pull request may not be merged
-
-## Changes being requested
-
-## Additional context & links
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 76655ed7d6..d86fc0ea53 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,22 +6,22 @@ on:
   pull_request:
     branches:
       - main
+      - next
 
 jobs:
   lint:
     name: lint
     runs-on: ubuntu-latest
-    if: github.repository == 'openai/openai-python'
 
     steps:
       - uses: actions/checkout@v4
 
       - name: Install Rye
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.24.0
+          RYE_VERSION: '0.44.0'
           RYE_INSTALL_OPTION: '--yes'
 
       - name: Install dependencies
@@ -29,20 +29,20 @@ jobs:
 
       - name: Run lints
         run: ./scripts/lint
+
   test:
     name: test
     runs-on: ubuntu-latest
-    if: github.repository == 'openai/openai-python'
 
     steps:
       - uses: actions/checkout@v4
 
       - name: Install Rye
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.24.0
+          RYE_VERSION: '0.44.0'
           RYE_INSTALL_OPTION: '--yes'
 
       - name: Bootstrap
@@ -51,3 +51,29 @@ jobs:
       - name: Run tests
         run: ./scripts/test
 
+  examples:
+    name: examples
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install Rye
+        run: |
+          curl -sSf https://rye.astral.sh/get | bash
+          echo "$HOME/.rye/shims" >> $GITHUB_PATH
+        env:
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
+      - name: Install dependencies
+        run: |
+          rye sync --all-features
+
+      - env: 
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+            rye run python examples/demo.py
+      - env: 
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+            rye run python examples/async_demo.py
diff --git a/.github/workflows/create-releases.yml b/.github/workflows/create-releases.yml
deleted file mode 100644
index a641be287b..0000000000
--- a/.github/workflows/create-releases.yml
+++ /dev/null
@@ -1,39 +0,0 @@
-name: Create releases
-on:
-  schedule:
-    - cron: '0 5 * * *' # every day at 5am UTC
-  push:
-    branches:
-      - main
-
-jobs:
-  release:
-    name: release
-    if: github.ref == 'refs/heads/main' && github.repository == 'openai/openai-python'
-    runs-on: ubuntu-latest
-    environment: publish
-
-    steps:
-      - uses: actions/checkout@v4
-
-      - uses: stainless-api/trigger-release-please@v1
-        id: release
-        with:
-          repo: ${{ github.event.repository.full_name }}
-          stainless-api-key: ${{ secrets.STAINLESS_API_KEY }}
-
-      - name: Install Rye
-        if: ${{ steps.release.outputs.releases_created }}
-        run: |
-          curl -sSf https://rye-up.com/get | bash
-          echo "$HOME/.rye/shims" >> $GITHUB_PATH
-        env:
-          RYE_VERSION: 0.24.0
-          RYE_INSTALL_OPTION: "--yes"
-
-      - name: Publish to PyPI
-        if: ${{ steps.release.outputs.releases_created }}
-        run: |
-          bash ./bin/publish-pypi
-        env:
-          PYPI_TOKEN: ${{ secrets.OPENAI_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.github/workflows/publish-pypi.yml b/.github/workflows/publish-pypi.yml
index 2f88f86407..403b895b7e 100644
--- a/.github/workflows/publish-pypi.yml
+++ b/.github/workflows/publish-pypi.yml
@@ -1,9 +1,13 @@
-# workflow for re-running publishing to PyPI in case it fails for some reason
-# you can run this workflow by navigating to https://www.github.com/openai/openai-python/actions/workflows/publish-pypi.yml
+# This workflow is triggered when a GitHub release is created.
+# It can also be run manually to re-publish to PyPI in case it failed for some reason.
+# You can run this workflow by navigating to https://www.github.com/openai/openai-python/actions/workflows/publish-pypi.yml
 name: Publish PyPI
 on:
   workflow_dispatch:
 
+  release:
+    types: [published]
+
 jobs:
   publish:
     name: publish
@@ -14,11 +18,11 @@ jobs:
 
       - name: Install Rye
         run: |
-          curl -sSf https://rye-up.com/get | bash
+          curl -sSf https://rye.astral.sh/get | bash
           echo "$HOME/.rye/shims" >> $GITHUB_PATH
         env:
-          RYE_VERSION: 0.24.0
-          RYE_INSTALL_OPTION: "--yes"
+          RYE_VERSION: '0.44.0'
+          RYE_INSTALL_OPTION: '--yes'
 
       - name: Publish to PyPI
         run: |
diff --git a/.github/workflows/release-doctor.yml b/.github/workflows/release-doctor.yml
index e078964a6f..445f626d93 100644
--- a/.github/workflows/release-doctor.yml
+++ b/.github/workflows/release-doctor.yml
@@ -19,5 +19,4 @@ jobs:
         run: |
           bash ./bin/check-release-environment
         env:
-          STAINLESS_API_KEY: ${{ secrets.STAINLESS_API_KEY }}
           PYPI_TOKEN: ${{ secrets.OPENAI_PYPI_TOKEN || secrets.PYPI_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 0f9a66a976..8779740800 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.prism.log
 .vscode
 _dev
 
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 1f79fd2d11..dac37ce406 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "1.30.1"
+  ".": "1.66.4"
 }
\ No newline at end of file
diff --git a/.stats.yml b/.stats.yml
index 2e5c705a0d..1e04d7c268 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
-configured_endpoints: 64
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-363dd904e5d6e65b3a323fc88e6b502fb23a6aa319be219273e3ee47c7530993.yml
+configured_endpoints: 81
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-f763c1a35c8b9b02f1e31b9b2e09e21f98bfe8413e5079c86cbb07da2dd7779b.yml
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 354d21b2d2..52c2eb213a 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -2,9 +2,13 @@
 
 ### With Rye
 
-We use [Rye](https://rye-up.com/) to manage dependencies so we highly recommend [installing it](https://rye-up.com/guide/installation/) as it will automatically provision a Python environment with the expected Python version.
+We use [Rye](https://rye.astral.sh/) to manage dependencies because it will automatically provision a Python environment with the expected Python version. To set it up, run:
 
-After installing Rye, you'll just have to run this command:
+```sh
+$ ./scripts/bootstrap
+```
+
+Or [install Rye manually](https://rye.astral.sh/guide/installation/) and run:
 
 ```sh
 $ rye sync --all-features
@@ -31,25 +35,25 @@ $ pip install -r requirements-dev.lock
 
 ## Modifying/Adding code
 
-Most of the SDK is generated code, and any modified code will be overridden on the next generation. The
-`src/openai/lib/` and `examples/` directories are exceptions and will never be overridden.
+Most of the SDK is generated code. Modifications to code will be persisted between generations, but may
+result in merge conflicts between manual patches and changes from the generator. The generator will never
+modify the contents of the `src/openai/lib/` and `examples/` directories.
 
 ## Adding and running examples
 
-All files in the `examples/` directory are not modified by the Stainless generator and can be freely edited or
-added to.
+All files in the `examples/` directory are not modified by the generator and can be freely edited or added to.
 
-```bash
+```py
 # add an example to examples/<your-example>.py
 
 #!/usr/bin/env -S rye run python
 …
 ```
 
-```
-chmod +x examples/<your-example>.py
+```sh
+$ chmod +x examples/<your-example>.py
 # run the example against your api
-./examples/<your-example>.py
+$ ./examples/<your-example>.py
 ```
 
 ## Using the repository from source
@@ -58,8 +62,8 @@ If you’d like to use the repository from source, you can either install from g
 
 To install via git:
 
-```bash
-pip install git+ssh://git@github.com/openai/openai-python.git
+```sh
+$ pip install git+ssh://git@github.com/openai/openai-python.git
 ```
 
 Alternatively, you can build from source and install the wheel file:
@@ -68,29 +72,29 @@ Building this package will create two files in the `dist/` directory, a `.tar.gz
 
 To create a distributable version of the library, all you have to do is run this command:
 
-```bash
-rye build
+```sh
+$ rye build
 # or
-python -m build
+$ python -m build
 ```
 
 Then to install:
 
 ```sh
-pip install ./path-to-wheel-file.whl
+$ pip install ./path-to-wheel-file.whl
 ```
 
 ## Running tests
 
 Most tests require you to [set up a mock server](https://github.com/stoplightio/prism) against the OpenAPI spec to run the tests.
 
-```bash
+```sh
 # you will need npm installed
-npx prism mock path/to/your/openapi.yml
+$ npx prism mock path/to/your/openapi.yml
 ```
 
-```bash
-rye run pytest
+```sh
+$ ./scripts/test
 ```
 
 ## Linting and formatting
@@ -100,14 +104,14 @@ This repository uses [ruff](https://github.com/astral-sh/ruff) and
 
 To lint:
 
-```bash
-rye run lint
+```sh
+$ ./scripts/lint
 ```
 
 To format and fix all ruff issues automatically:
 
-```bash
-rye run format
+```sh
+$ ./scripts/format
 ```
 
 ## Publishing and releases
diff --git a/LICENSE b/LICENSE
index 621a6becfb..f011417af6 100644
--- a/LICENSE
+++ b/LICENSE
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright 2024 OpenAI
+   Copyright 2025 OpenAI
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
diff --git a/README.md b/README.md
index e566a2f8d0..b4924be8e6 100644
--- a/README.md
+++ b/README.md
@@ -2,21 +2,16 @@
 
 [![PyPI version](https://img.shields.io/pypi/v/openai.svg)](https://pypi.org/project/openai/)
 
-The OpenAI Python library provides convenient access to the OpenAI REST API from any Python 3.7+
+The OpenAI Python library provides convenient access to the OpenAI REST API from any Python 3.8+
 application. The library includes type definitions for all request params and response fields,
 and offers both synchronous and asynchronous clients powered by [httpx](https://github.com/encode/httpx).
 
-It is generated from our [OpenAPI specification](https://github.com/openai/openai-openapi) with [Stainless](https://stainlessapi.com/).
-
 ## Documentation
 
-The REST API documentation can be found [on platform.openai.com](https://platform.openai.com/docs). The full API of this library can be found in [api.md](api.md).
+The REST API documentation can be found on [platform.openai.com](https://platform.openai.com/docs). The full API of this library can be found in [api.md](api.md).
 
 ## Installation
 
-> [!IMPORTANT]
-> The SDK was rewritten in v1, which was released November 6th 2023. See the [v1 migration guide](https://github.com/openai/openai-python/discussions/742), which includes scripts to automatically update your code.
-
 ```sh
 # install from PyPI
 pip install openai
@@ -31,8 +26,7 @@ import os
 from openai import OpenAI
 
 client = OpenAI(
-    # This is the default and can be omitted
-    api_key=os.environ.get("OPENAI_API_KEY"),
+    api_key=os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
 )
 
 chat_completion = client.chat.completions.create(
@@ -42,7 +36,7 @@ chat_completion = client.chat.completions.create(
             "content": "Say this is a test",
         }
     ],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 )
 ```
 
@@ -51,56 +45,6 @@ we recommend using [python-dotenv](https://pypi.org/project/python-dotenv/)
 to add `OPENAI_API_KEY="My API Key"` to your `.env` file
 so that your API Key is not stored in source control.
 
-### Polling Helpers
-
-When interacting with the API some actions such as starting a Run and adding files to vector stores are asynchronous and take time to complete. The SDK includes
-helper functions which will poll the status until it reaches a terminal state and then return the resulting object.
-If an API method results in an action which could benefit from polling there will be a corresponding version of the
-method ending in '\_and_poll'.
-
-For instance to create a Run and poll until it reaches a terminal state you can run:
-
-```python
-run = client.beta.threads.runs.create_and_poll(
-    thread_id=thread.id,
-    assistant_id=assistant.id,
-)
-```
-
-More information on the lifecycle of a Run can be found in the [Run Lifecycle Documentation](https://platform.openai.com/docs/assistants/how-it-works/run-lifecycle)
-
-### Bulk Upload Helpers
-
-When creating an interacting with vector stores, you can use the polling helpers to monitor the status of operations.
-For convenience, we also provide a bulk upload helper to allow you to simultaneously upload several files at once.
-
-```python
-sample_files = [Path("sample-paper.pdf"), ...]
-
-batch = await client.vector_stores.file_batches.upload_and_poll(
-    store.id,
-    files=sample_files,
-)
-```
-
-### Streaming Helpers
-
-The SDK also includes helpers to process streams and handle the incoming events.
-
-```python
-with client.beta.threads.runs.stream(
-    thread_id=thread.id,
-    assistant_id=assistant.id,
-    instructions="Please address the user as Jane Doe. The user has a premium account.",
-) as stream:
-    for event in stream:
-        # Print the text from text delta events
-        if event.type == "thread.message.delta" and event.data.delta.content:
-            print(event.data.delta.content[0].text)
-```
-
-More information on streaming helpers can be found in the dedicated documentation: [helpers.md](helpers.md)
-
 ## Async usage
 
 Simply import `AsyncOpenAI` instead of `OpenAI` and use `await` with each API call:
@@ -111,8 +55,7 @@ import asyncio
 from openai import AsyncOpenAI
 
 client = AsyncOpenAI(
-    # This is the default and can be omitted
-    api_key=os.environ.get("OPENAI_API_KEY"),
+    api_key=os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
 )
 
 
@@ -124,7 +67,7 @@ async def main() -> None:
                 "content": "Say this is a test",
             }
         ],
-        model="gpt-3.5-turbo",
+        model="gpt-4o",
     )
 
 
@@ -143,12 +86,17 @@ from openai import OpenAI
 client = OpenAI()
 
 stream = client.chat.completions.create(
-    model="gpt-4",
-    messages=[{"role": "user", "content": "Say this is a test"}],
+    messages=[
+        {
+            "role": "user",
+            "content": "Say this is a test",
+        }
+    ],
+    model="gpt-4o",
     stream=True,
 )
-for chunk in stream:
-    print(chunk.choices[0].delta.content or "", end="")
+for chat_completion in stream:
+    print(chat_completion)
 ```
 
 The async client uses the exact same interface.
@@ -158,60 +106,20 @@ from openai import AsyncOpenAI
 
 client = AsyncOpenAI()
 
-
-async def main():
-    stream = await client.chat.completions.create(
-        model="gpt-4",
-        messages=[{"role": "user", "content": "Say this is a test"}],
-        stream=True,
-    )
-    async for chunk in stream:
-        print(chunk.choices[0].delta.content or "", end="")
-
-
-asyncio.run(main())
-```
-
-## Module-level client
-
-> [!IMPORTANT]
-> We highly recommend instantiating client instances instead of relying on the global client.
-
-We also expose a global client instance that is accessible in a similar fashion to versions prior to v1.
-
-```py
-import openai
-
-# optional; defaults to `os.environ['OPENAI_API_KEY']`
-openai.api_key = '...'
-
-# all client options can be configured just like the `OpenAI` instantiation counterpart
-openai.base_url = "https://.../"
-openai.default_headers = {"x-foo": "true"}
-
-completion = openai.chat.completions.create(
-    model="gpt-4",
+stream = await client.chat.completions.create(
     messages=[
         {
             "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
+            "content": "Say this is a test",
+        }
     ],
+    model="gpt-4o",
+    stream=True,
 )
-print(completion.choices[0].message.content)
+async for chat_completion in stream:
+    print(chat_completion)
 ```
 
-The API is the exact same as the standard client instance based API.
-
-This is intended to be used within REPLs or notebooks for faster iteration, **not** in application code.
-
-We recommend that you always instantiate a client (e.g., with `client = OpenAI()`) in application code because:
-
-- It can be difficult to reason about where client options are configured
-- It's not possible to change certain client options without potentially causing race conditions
-- It's harder to mock for testing purposes
-- It's not possible to control cleanup of network connections
-
 ## Using types
 
 Nested request parameters are [TypedDicts](https://docs.python.org/3/library/typing.html#typing.TypedDict). Responses are [Pydantic models](https://docs.pydantic.dev) which also provide helper methods for things like:
@@ -228,7 +136,7 @@ List methods in the OpenAI API are paginated.
 This library provides auto-paginating iterators with each list response, so you do not have to request successive pages manually:
 
 ```python
-import openai
+from openai import OpenAI
 
 client = OpenAI()
 
@@ -246,7 +154,7 @@ Or, asynchronously:
 
 ```python
 import asyncio
-import openai
+from openai import AsyncOpenAI
 
 client = AsyncOpenAI()
 
@@ -308,7 +216,7 @@ completion = client.chat.completions.create(
             "content": "Can you generate an example json object describing a fruit?",
         }
     ],
-    model="gpt-3.5-turbo-1106",
+    model="gpt-4o",
     response_format={"type": "json_object"},
 )
 ```
@@ -348,7 +256,7 @@ client = OpenAI()
 
 try:
     client.fine_tuning.jobs.create(
-        model="gpt-3.5-turbo",
+        model="gpt-4o",
         training_file="file-abc123",
     )
 except openai.APIConnectionError as e:
@@ -362,7 +270,7 @@ except openai.APIStatusError as e:
     print(e.response)
 ```
 
-Error codes are as followed:
+Error codes are as follows:
 
 | Status Code | Error Type                 |
 | ----------- | -------------------------- |
@@ -397,10 +305,10 @@ client.with_options(max_retries=5).chat.completions.create(
     messages=[
         {
             "role": "user",
-            "content": "How can I get the name of the current day in Node.js?",
+            "content": "How can I get the name of the current day in JavaScript?",
         }
     ],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 )
 ```
 
@@ -431,7 +339,7 @@ client.with_options(timeout=5.0).chat.completions.create(
             "content": "How can I list all files in a directory using Python?",
         }
     ],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 )
 ```
 
@@ -445,12 +353,14 @@ Note that requests that time out are [retried twice by default](#retries).
 
 We use the standard library [`logging`](https://docs.python.org/3/library/logging.html) module.
 
-You can enable logging by setting the environment variable `OPENAI_LOG` to `debug`.
+You can enable logging by setting the environment variable `OPENAI_LOG` to `info`.
 
 ```shell
-$ export OPENAI_LOG=debug
+$ export OPENAI_LOG=info
 ```
 
+Or to `debug` for more verbose logging.
+
 ### How to tell whether `None` means `null` or missing
 
 In an API response, a field may be explicitly `null`, or missing entirely; in either case, its value is `None` in this library. You can differentiate the two cases with `.model_fields_set`:
@@ -476,7 +386,7 @@ response = client.chat.completions.with_raw_response.create(
         "role": "user",
         "content": "Say this is a test",
     }],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 )
 print(response.headers.get('X-My-Header'))
 
@@ -484,7 +394,7 @@ completion = response.parse()  # get the object that `chat.completions.create()`
 print(completion)
 ```
 
-These methods return an [`LegacyAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_legacy_response.py) object. This is a legacy class as we're changing it slightly in the next major version.
+These methods return a [`LegacyAPIResponse`](https://github.com/openai/openai-python/tree/main/src/openai/_legacy_response.py) object. This is a legacy class as we're changing it slightly in the next major version.
 
 For the sync client this will mostly be the same with the exception
 of `content` & `text` will be methods instead of properties. In the
@@ -509,7 +419,7 @@ with client.chat.completions.with_streaming_response.create(
             "content": "Say this is a test",
         }
     ],
-    model="gpt-3.5-turbo",
+    model="gpt-4o",
 ) as response:
     print(response.headers.get("X-My-Header"))
 
@@ -528,8 +438,7 @@ If you need to access undocumented endpoints, params, or response properties, th
 #### Undocumented endpoints
 
 To make requests to undocumented endpoints, you can make requests using `client.get`, `client.post`, and other
-http verbs. Options on the client will be respected (such as retries) will be respected when making this
-request.
+http verbs. Options on the client will be respected (such as retries) when making this request.
 
 ```py
 import httpx
@@ -558,81 +467,71 @@ can also get all the extra fields on the Pydantic model as a dict with
 
 You can directly override the [httpx client](https://www.python-httpx.org/api/#client) to customize it for your use case, including:
 
-- Support for proxies
-- Custom transports
-- Additional [advanced](https://www.python-httpx.org/advanced/#client-instances) functionality
+- Support for [proxies](https://www.python-httpx.org/advanced/proxies/)
+- Custom [transports](https://www.python-httpx.org/advanced/transports/)
+- Additional [advanced](https://www.python-httpx.org/advanced/clients/) functionality
 
 ```python
+import httpx
 from openai import OpenAI, DefaultHttpxClient
 
 client = OpenAI(
     # Or use the `OPENAI_BASE_URL` env var
     base_url="http://my.test.server.example.com:8083/",
     http_client=DefaultHttpxClient(
-        proxies="http://my.test.proxy.example.com/",
+        proxy="http://my.test.proxy.example.com/",
         transport=httpx.HTTPTransport(local_address="0.0.0.0"),
     ),
 )
 ```
 
-### Managing HTTP resources
-
-By default the library closes underlying HTTP connections whenever the client is [garbage collected](https://docs.python.org/3/reference/datamodel.html#object.__del__). You can manually close the client using the `.close()` method if desired, or with a context manager that closes when exiting.
+You can also customize the client on a per-request basis by using `with_options()`:
 
-## Microsoft Azure OpenAI
+```python
+client.with_options(http_client=DefaultHttpxClient(...))
+```
 
-To use this library with [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/overview), use the `AzureOpenAI`
-class instead of the `OpenAI` class.
+### Managing HTTP resources
 
-> [!IMPORTANT]
-> The Azure API shape differs from the core API shape which means that the static types for responses / params
-> won't always be correct.
+By default the library closes underlying HTTP connections whenever the client is [garbage collected](https://docs.python.org/3/reference/datamodel.html#object.__del__). You can manually close the client using the `.close()` method if desired, or with a context manager that closes when exiting.
 
 ```py
-from openai import AzureOpenAI
-
-# gets the API Key from environment variable AZURE_OPENAI_API_KEY
-client = AzureOpenAI(
-    # https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
-    api_version="2023-07-01-preview",
-    # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
-    azure_endpoint="https://example-endpoint.openai.azure.com/",
-)
-
-completion = client.chat.completions.create(
-    model="deployment-name",  # e.g. gpt-35-instant
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-)
-print(completion.to_json())
-```
-
-In addition to the options provided in the base `OpenAI` client, the following options are provided:
+from openai import OpenAI
 
-- `azure_endpoint` (or the `AZURE_OPENAI_ENDPOINT` environment variable)
-- `azure_deployment`
-- `api_version` (or the `OPENAI_API_VERSION` environment variable)
-- `azure_ad_token` (or the `AZURE_OPENAI_AD_TOKEN` environment variable)
-- `azure_ad_token_provider`
+with OpenAI() as client:
+  # make requests here
+  ...
 
-An example of using the client with Azure Active Directory can be found [here](https://github.com/openai/openai-python/blob/main/examples/azure_ad.py).
+# HTTP client is now closed
+```
 
 ## Versioning
 
 This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions, though certain backwards-incompatible changes may be released as minor versions:
 
 1. Changes that only affect static types, without breaking runtime behavior.
-2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals)_.
+2. Changes to library internals which are technically public but not intended or documented for external use. _(Please open a GitHub issue to let us know if you are relying on such internals.)_
 3. Changes that we do not expect to impact the vast majority of users in practice.
 
 We take backwards-compatibility seriously and work hard to ensure you can rely on a smooth upgrade experience.
 
 We are keen for your feedback; please open an [issue](https://www.github.com/openai/openai-python/issues) with questions, bugs, or suggestions.
 
+### Determining the installed version
+
+If you've upgraded to the latest version but aren't seeing any new features you were expecting then your python environment is likely still using an older version.
+
+You can determine the version that is being used at runtime with:
+
+```py
+import openai
+print(openai.__version__)
+```
+
 ## Requirements
 
-Python 3.7 or higher.
+Python 3.8 or higher.
+
+## Contributing
+
+See [the contributing documentation](./CONTRIBUTING.md).
diff --git a/SECURITY.md b/SECURITY.md
index c54acaf331..3b3bd8a662 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -2,9 +2,9 @@
 
 ## Reporting Security Issues
 
-This SDK is generated by [Stainless Software Inc](http://stainlessapi.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
+This SDK is generated by [Stainless Software Inc](http://stainless.com). Stainless takes security seriously, and encourages you to report any security vulnerability promptly so that appropriate action can be taken.
 
-To report a security issue, please contact the Stainless team at security@stainlessapi.com.
+To report a security issue, please contact the Stainless team at security@stainless.com.
 
 ## Responsible Disclosure
 
diff --git a/api.md b/api.md
index de69f11dca..b148b0a085 100644
--- a/api.md
+++ b/api.md
@@ -1,7 +1,20 @@
 # Shared Types
 
 ```python
-from openai.types import ErrorObject, FunctionDefinition, FunctionParameters
+from openai.types import (
+    ChatModel,
+    ComparisonFilter,
+    CompoundFilter,
+    ErrorObject,
+    FunctionDefinition,
+    FunctionParameters,
+    Metadata,
+    Reasoning,
+    ReasoningEffort,
+    ResponseFormatJSONObject,
+    ResponseFormatJSONSchema,
+    ResponseFormatText,
+)
 ```
 
 # Completions
@@ -32,17 +45,26 @@ Types:
 from openai.types.chat import (
     ChatCompletion,
     ChatCompletionAssistantMessageParam,
+    ChatCompletionAudio,
+    ChatCompletionAudioParam,
     ChatCompletionChunk,
     ChatCompletionContentPart,
     ChatCompletionContentPartImage,
+    ChatCompletionContentPartInputAudio,
+    ChatCompletionContentPartRefusal,
     ChatCompletionContentPartText,
+    ChatCompletionDeleted,
+    ChatCompletionDeveloperMessageParam,
     ChatCompletionFunctionCallOption,
     ChatCompletionFunctionMessageParam,
     ChatCompletionMessage,
     ChatCompletionMessageParam,
     ChatCompletionMessageToolCall,
+    ChatCompletionModality,
     ChatCompletionNamedToolChoice,
+    ChatCompletionPredictionContent,
     ChatCompletionRole,
+    ChatCompletionStoreMessage,
     ChatCompletionStreamOptions,
     ChatCompletionSystemMessageParam,
     ChatCompletionTokenLogprob,
@@ -50,19 +72,30 @@ from openai.types.chat import (
     ChatCompletionToolChoiceOption,
     ChatCompletionToolMessageParam,
     ChatCompletionUserMessageParam,
+    ChatCompletionReasoningEffort,
 )
 ```
 
 Methods:
 
-- <code title="post /chat/completions">client.chat.completions.<a href="https://github.com/src/openai/resources/chat/completions.py">create</a>(\*\*<a href="https://github.com/src/openai/types/chat/completion_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
+- <code title="post /chat/completions">client.chat.completions.<a href="https://github.com/src/openai/resources/chat/completions/completions.py">create</a>(\*\*<a href="https://github.com/src/openai/types/chat/completion_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
+- <code title="get /chat/completions/{completion_id}">client.chat.completions.<a href="https://github.com/src/openai/resources/chat/completions/completions.py">retrieve</a>(completion_id) -> <a href="https://github.com/src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
+- <code title="post /chat/completions/{completion_id}">client.chat.completions.<a href="https://github.com/src/openai/resources/chat/completions/completions.py">update</a>(completion_id, \*\*<a href="https://github.com/src/openai/types/chat/completion_update_params.py">params</a>) -> <a href="https://github.com/src/openai/types/chat/chat_completion.py">ChatCompletion</a></code>
+- <code title="get /chat/completions">client.chat.completions.<a href="https://github.com/src/openai/resources/chat/completions/completions.py">list</a>(\*\*<a href="https://github.com/src/openai/types/chat/completion_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/chat/chat_completion.py">SyncCursorPage[ChatCompletion]</a></code>
+- <code title="delete /chat/completions/{completion_id}">client.chat.completions.<a href="https://github.com/src/openai/resources/chat/completions/completions.py">delete</a>(completion_id) -> <a href="https://github.com/src/openai/types/chat/chat_completion_deleted.py">ChatCompletionDeleted</a></code>
+
+### Messages
+
+Methods:
+
+- <code title="get /chat/completions/{completion_id}/messages">client.chat.completions.messages.<a href="https://github.com/src/openai/resources/chat/completions/messages.py">list</a>(completion_id, \*\*<a href="https://github.com/src/openai/types/chat/completions/message_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/chat/chat_completion_store_message.py">SyncCursorPage[ChatCompletionStoreMessage]</a></code>
 
 # Embeddings
 
 Types:
 
 ```python
-from openai.types import CreateEmbeddingResponse, Embedding
+from openai.types import CreateEmbeddingResponse, Embedding, EmbeddingModel
 ```
 
 Methods:
@@ -74,25 +107,24 @@ Methods:
 Types:
 
 ```python
-from openai.types import FileContent, FileDeleted, FileObject
+from openai.types import FileContent, FileDeleted, FileObject, FilePurpose
 ```
 
 Methods:
 
 - <code title="post /files">client.files.<a href="https://github.com/src/openai/resources/files.py">create</a>(\*\*<a href="https://github.com/src/openai/types/file_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/file_object.py">FileObject</a></code>
 - <code title="get /files/{file_id}">client.files.<a href="https://github.com/src/openai/resources/files.py">retrieve</a>(file_id) -> <a href="https://github.com/src/openai/types/file_object.py">FileObject</a></code>
-- <code title="get /files">client.files.<a href="https://github.com/src/openai/resources/files.py">list</a>(\*\*<a href="https://github.com/src/openai/types/file_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/file_object.py">SyncPage[FileObject]</a></code>
+- <code title="get /files">client.files.<a href="https://github.com/src/openai/resources/files.py">list</a>(\*\*<a href="https://github.com/src/openai/types/file_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/file_object.py">SyncCursorPage[FileObject]</a></code>
 - <code title="delete /files/{file_id}">client.files.<a href="https://github.com/src/openai/resources/files.py">delete</a>(file_id) -> <a href="https://github.com/src/openai/types/file_deleted.py">FileDeleted</a></code>
 - <code title="get /files/{file_id}/content">client.files.<a href="https://github.com/src/openai/resources/files.py">content</a>(file_id) -> HttpxBinaryResponseContent</code>
-- <code title="get /files/{file_id}/content">client.files.<a href="https://github.com/src/openai/resources/files.py">retrieve_content</a>(file_id) -> str</code>
-- <code>client.files.<a href="https://github.com/src/openai/resources/files.py">wait_for_processing</a>(\*args) -> FileObject</code>
+- <code title="get /files/{file_id}/content">client.files.<a href="https://github.com/src/openai/resources/files.py">retrieve_content</a>(file_id) -> <a href="https://github.com/src/openai/types/file_content.py">str</a></code>
 
 # Images
 
 Types:
 
 ```python
-from openai.types import Image, ImagesResponse
+from openai.types import Image, ImageModel, ImagesResponse
 ```
 
 Methods:
@@ -103,32 +135,50 @@ Methods:
 
 # Audio
 
+Types:
+
+```python
+from openai.types import AudioModel, AudioResponseFormat
+```
+
 ## Transcriptions
 
 Types:
 
 ```python
-from openai.types.audio import Transcription
+from openai.types.audio import (
+    Transcription,
+    TranscriptionSegment,
+    TranscriptionVerbose,
+    TranscriptionWord,
+    TranscriptionCreateResponse,
+)
 ```
 
 Methods:
 
-- <code title="post /audio/transcriptions">client.audio.transcriptions.<a href="https://github.com/src/openai/resources/audio/transcriptions.py">create</a>(\*\*<a href="https://github.com/src/openai/types/audio/transcription_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/audio/transcription.py">Transcription</a></code>
+- <code title="post /audio/transcriptions">client.audio.transcriptions.<a href="https://github.com/src/openai/resources/audio/transcriptions.py">create</a>(\*\*<a href="https://github.com/src/openai/types/audio/transcription_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/audio/transcription_create_response.py">TranscriptionCreateResponse</a></code>
 
 ## Translations
 
 Types:
 
 ```python
-from openai.types.audio import Translation
+from openai.types.audio import Translation, TranslationVerbose, TranslationCreateResponse
 ```
 
 Methods:
 
-- <code title="post /audio/translations">client.audio.translations.<a href="https://github.com/src/openai/resources/audio/translations.py">create</a>(\*\*<a href="https://github.com/src/openai/types/audio/translation_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/audio/translation.py">Translation</a></code>
+- <code title="post /audio/translations">client.audio.translations.<a href="https://github.com/src/openai/resources/audio/translations.py">create</a>(\*\*<a href="https://github.com/src/openai/types/audio/translation_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/audio/translation_create_response.py">TranslationCreateResponse</a></code>
 
 ## Speech
 
+Types:
+
+```python
+from openai.types.audio import SpeechModel
+```
+
 Methods:
 
 - <code title="post /audio/speech">client.audio.speech.<a href="https://github.com/src/openai/resources/audio/speech.py">create</a>(\*\*<a href="https://github.com/src/openai/types/audio/speech_create_params.py">params</a>) -> HttpxBinaryResponseContent</code>
@@ -138,7 +188,14 @@ Methods:
 Types:
 
 ```python
-from openai.types import Moderation, ModerationCreateResponse
+from openai.types import (
+    Moderation,
+    ModerationImageURLInput,
+    ModerationModel,
+    ModerationMultiModalInput,
+    ModerationTextInput,
+    ModerationCreateResponse,
+)
 ```
 
 Methods:
@@ -169,9 +226,9 @@ Types:
 from openai.types.fine_tuning import (
     FineTuningJob,
     FineTuningJobEvent,
-    FineTuningJobIntegration,
     FineTuningJobWandbIntegration,
     FineTuningJobWandbIntegrationObject,
+    FineTuningJobIntegration,
 )
 ```
 
@@ -195,60 +252,133 @@ Methods:
 
 - <code title="get /fine_tuning/jobs/{fine_tuning_job_id}/checkpoints">client.fine_tuning.jobs.checkpoints.<a href="https://github.com/src/openai/resources/fine_tuning/jobs/checkpoints.py">list</a>(fine_tuning_job_id, \*\*<a href="https://github.com/src/openai/types/fine_tuning/jobs/checkpoint_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/fine_tuning/jobs/fine_tuning_job_checkpoint.py">SyncCursorPage[FineTuningJobCheckpoint]</a></code>
 
-# Beta
+# VectorStores
+
+Types:
+
+```python
+from openai.types import (
+    AutoFileChunkingStrategyParam,
+    FileChunkingStrategy,
+    FileChunkingStrategyParam,
+    OtherFileChunkingStrategyObject,
+    StaticFileChunkingStrategy,
+    StaticFileChunkingStrategyObject,
+    StaticFileChunkingStrategyObjectParam,
+    VectorStore,
+    VectorStoreDeleted,
+    VectorStoreSearchResponse,
+)
+```
+
+Methods:
+
+- <code title="post /vector_stores">client.vector_stores.<a href="https://github.com/src/openai/resources/vector_stores/vector_stores.py">create</a>(\*\*<a href="https://github.com/src/openai/types/vector_store_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/vector_store.py">VectorStore</a></code>
+- <code title="get /vector_stores/{vector_store_id}">client.vector_stores.<a href="https://github.com/src/openai/resources/vector_stores/vector_stores.py">retrieve</a>(vector_store_id) -> <a href="https://github.com/src/openai/types/vector_store.py">VectorStore</a></code>
+- <code title="post /vector_stores/{vector_store_id}">client.vector_stores.<a href="https://github.com/src/openai/resources/vector_stores/vector_stores.py">update</a>(vector_store_id, \*\*<a href="https://github.com/src/openai/types/vector_store_update_params.py">params</a>) -> <a href="https://github.com/src/openai/types/vector_store.py">VectorStore</a></code>
+- <code title="get /vector_stores">client.vector_stores.<a href="https://github.com/src/openai/resources/vector_stores/vector_stores.py">list</a>(\*\*<a href="https://github.com/src/openai/types/vector_store_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/vector_store.py">SyncCursorPage[VectorStore]</a></code>
+- <code title="delete /vector_stores/{vector_store_id}">client.vector_stores.<a href="https://github.com/src/openai/resources/vector_stores/vector_stores.py">delete</a>(vector_store_id) -> <a href="https://github.com/src/openai/types/vector_store_deleted.py">VectorStoreDeleted</a></code>
+- <code title="post /vector_stores/{vector_store_id}/search">client.vector_stores.<a href="https://github.com/src/openai/resources/vector_stores/vector_stores.py">search</a>(vector_store_id, \*\*<a href="https://github.com/src/openai/types/vector_store_search_params.py">params</a>) -> <a href="https://github.com/src/openai/types/vector_store_search_response.py">SyncPage[VectorStoreSearchResponse]</a></code>
 
-## VectorStores
+## Files
 
 Types:
 
 ```python
-from openai.types.beta import VectorStore, VectorStoreDeleted
+from openai.types.vector_stores import VectorStoreFile, VectorStoreFileDeleted, FileContentResponse
 ```
 
 Methods:
 
-- <code title="post /vector_stores">client.beta.vector_stores.<a href="https://github.com/src/openai/resources/beta/vector_stores/vector_stores.py">create</a>(\*\*<a href="https://github.com/src/openai/types/beta/vector_store_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/vector_store.py">VectorStore</a></code>
-- <code title="get /vector_stores/{vector_store_id}">client.beta.vector_stores.<a href="https://github.com/src/openai/resources/beta/vector_stores/vector_stores.py">retrieve</a>(vector_store_id) -> <a href="https://github.com/src/openai/types/beta/vector_store.py">VectorStore</a></code>
-- <code title="post /vector_stores/{vector_store_id}">client.beta.vector_stores.<a href="https://github.com/src/openai/resources/beta/vector_stores/vector_stores.py">update</a>(vector_store_id, \*\*<a href="https://github.com/src/openai/types/beta/vector_store_update_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/vector_store.py">VectorStore</a></code>
-- <code title="get /vector_stores">client.beta.vector_stores.<a href="https://github.com/src/openai/resources/beta/vector_stores/vector_stores.py">list</a>(\*\*<a href="https://github.com/src/openai/types/beta/vector_store_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/vector_store.py">SyncCursorPage[VectorStore]</a></code>
-- <code title="delete /vector_stores/{vector_store_id}">client.beta.vector_stores.<a href="https://github.com/src/openai/resources/beta/vector_stores/vector_stores.py">delete</a>(vector_store_id) -> <a href="https://github.com/src/openai/types/beta/vector_store_deleted.py">VectorStoreDeleted</a></code>
+- <code title="post /vector_stores/{vector_store_id}/files">client.vector_stores.files.<a href="https://github.com/src/openai/resources/vector_stores/files.py">create</a>(vector_store_id, \*\*<a href="https://github.com/src/openai/types/vector_stores/file_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
+- <code title="get /vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="https://github.com/src/openai/resources/vector_stores/files.py">retrieve</a>(file_id, \*, vector_store_id) -> <a href="https://github.com/src/openai/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
+- <code title="post /vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="https://github.com/src/openai/resources/vector_stores/files.py">update</a>(file_id, \*, vector_store_id, \*\*<a href="https://github.com/src/openai/types/vector_stores/file_update_params.py">params</a>) -> <a href="https://github.com/src/openai/types/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
+- <code title="get /vector_stores/{vector_store_id}/files">client.vector_stores.files.<a href="https://github.com/src/openai/resources/vector_stores/files.py">list</a>(vector_store_id, \*\*<a href="https://github.com/src/openai/types/vector_stores/file_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
+- <code title="delete /vector_stores/{vector_store_id}/files/{file_id}">client.vector_stores.files.<a href="https://github.com/src/openai/resources/vector_stores/files.py">delete</a>(file_id, \*, vector_store_id) -> <a href="https://github.com/src/openai/types/vector_stores/vector_store_file_deleted.py">VectorStoreFileDeleted</a></code>
+- <code title="get /vector_stores/{vector_store_id}/files/{file_id}/content">client.vector_stores.files.<a href="https://github.com/src/openai/resources/vector_stores/files.py">content</a>(file_id, \*, vector_store_id) -> <a href="https://github.com/src/openai/types/vector_stores/file_content_response.py">SyncPage[FileContentResponse]</a></code>
 
-### Files
+## FileBatches
 
 Types:
 
 ```python
-from openai.types.beta.vector_stores import VectorStoreFile, VectorStoreFileDeleted
+from openai.types.vector_stores import VectorStoreFileBatch
 ```
 
 Methods:
 
-- <code title="post /vector_stores/{vector_store_id}/files">client.beta.vector_stores.files.<a href="https://github.com/src/openai/resources/beta/vector_stores/files.py">create</a>(vector_store_id, \*\*<a href="https://github.com/src/openai/types/beta/vector_stores/file_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
-- <code title="get /vector_stores/{vector_store_id}/files/{file_id}">client.beta.vector_stores.files.<a href="https://github.com/src/openai/resources/beta/vector_stores/files.py">retrieve</a>(file_id, \*, vector_store_id) -> <a href="https://github.com/src/openai/types/beta/vector_stores/vector_store_file.py">VectorStoreFile</a></code>
-- <code title="get /vector_stores/{vector_store_id}/files">client.beta.vector_stores.files.<a href="https://github.com/src/openai/resources/beta/vector_stores/files.py">list</a>(vector_store_id, \*\*<a href="https://github.com/src/openai/types/beta/vector_stores/file_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
-- <code title="delete /vector_stores/{vector_store_id}/files/{file_id}">client.beta.vector_stores.files.<a href="https://github.com/src/openai/resources/beta/vector_stores/files.py">delete</a>(file_id, \*, vector_store_id) -> <a href="https://github.com/src/openai/types/beta/vector_stores/vector_store_file_deleted.py">VectorStoreFileDeleted</a></code>
-- <code>client.beta.vector_stores.files.<a href="https://github.com/src/openai/resources/beta/vector_stores/files.py">create_and_poll</a>(\*args) -> VectorStoreFile</code>
-- <code>client.beta.vector_stores.files.<a href="https://github.com/src/openai/resources/beta/vector_stores/files.py">poll</a>(\*args) -> VectorStoreFile</code>
-- <code>client.beta.vector_stores.files.<a href="https://github.com/src/openai/resources/beta/vector_stores/files.py">upload</a>(\*args) -> VectorStoreFile</code>
-- <code>client.beta.vector_stores.files.<a href="https://github.com/src/openai/resources/beta/vector_stores/files.py">upload_and_poll</a>(\*args) -> VectorStoreFile</code>
+- <code title="post /vector_stores/{vector_store_id}/file_batches">client.vector_stores.file_batches.<a href="https://github.com/src/openai/resources/vector_stores/file_batches.py">create</a>(vector_store_id, \*\*<a href="https://github.com/src/openai/types/vector_stores/file_batch_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}">client.vector_stores.file_batches.<a href="https://github.com/src/openai/resources/vector_stores/file_batches.py">retrieve</a>(batch_id, \*, vector_store_id) -> <a href="https://github.com/src/openai/types/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="post /vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel">client.vector_stores.file_batches.<a href="https://github.com/src/openai/resources/vector_stores/file_batches.py">cancel</a>(batch_id, \*, vector_store_id) -> <a href="https://github.com/src/openai/types/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
+- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}/files">client.vector_stores.file_batches.<a href="https://github.com/src/openai/resources/vector_stores/file_batches.py">list_files</a>(batch_id, \*, vector_store_id, \*\*<a href="https://github.com/src/openai/types/vector_stores/file_batch_list_files_params.py">params</a>) -> <a href="https://github.com/src/openai/types/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
+
+# Beta
+
+## Realtime
+
+Types:
+
+```python
+from openai.types.beta.realtime import (
+    ConversationCreatedEvent,
+    ConversationItem,
+    ConversationItemContent,
+    ConversationItemCreateEvent,
+    ConversationItemCreatedEvent,
+    ConversationItemDeleteEvent,
+    ConversationItemDeletedEvent,
+    ConversationItemInputAudioTranscriptionCompletedEvent,
+    ConversationItemInputAudioTranscriptionFailedEvent,
+    ConversationItemTruncateEvent,
+    ConversationItemTruncatedEvent,
+    ConversationItemWithReference,
+    ErrorEvent,
+    InputAudioBufferAppendEvent,
+    InputAudioBufferClearEvent,
+    InputAudioBufferClearedEvent,
+    InputAudioBufferCommitEvent,
+    InputAudioBufferCommittedEvent,
+    InputAudioBufferSpeechStartedEvent,
+    InputAudioBufferSpeechStoppedEvent,
+    RateLimitsUpdatedEvent,
+    RealtimeClientEvent,
+    RealtimeResponse,
+    RealtimeResponseStatus,
+    RealtimeResponseUsage,
+    RealtimeServerEvent,
+    ResponseAudioDeltaEvent,
+    ResponseAudioDoneEvent,
+    ResponseAudioTranscriptDeltaEvent,
+    ResponseAudioTranscriptDoneEvent,
+    ResponseCancelEvent,
+    ResponseContentPartAddedEvent,
+    ResponseContentPartDoneEvent,
+    ResponseCreateEvent,
+    ResponseCreatedEvent,
+    ResponseDoneEvent,
+    ResponseFunctionCallArgumentsDeltaEvent,
+    ResponseFunctionCallArgumentsDoneEvent,
+    ResponseOutputItemAddedEvent,
+    ResponseOutputItemDoneEvent,
+    ResponseTextDeltaEvent,
+    ResponseTextDoneEvent,
+    SessionCreatedEvent,
+    SessionUpdateEvent,
+    SessionUpdatedEvent,
+)
+```
 
-### FileBatches
+### Sessions
 
 Types:
 
 ```python
-from openai.types.beta.vector_stores import VectorStoreFileBatch
+from openai.types.beta.realtime import Session, SessionCreateResponse
 ```
 
 Methods:
 
-- <code title="post /vector_stores/{vector_store_id}/file_batches">client.beta.vector_stores.file_batches.<a href="https://github.com/src/openai/resources/beta/vector_stores/file_batches.py">create</a>(vector_store_id, \*\*<a href="https://github.com/src/openai/types/beta/vector_stores/file_batch_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
-- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}">client.beta.vector_stores.file_batches.<a href="https://github.com/src/openai/resources/beta/vector_stores/file_batches.py">retrieve</a>(batch_id, \*, vector_store_id) -> <a href="https://github.com/src/openai/types/beta/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
-- <code title="post /vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel">client.beta.vector_stores.file_batches.<a href="https://github.com/src/openai/resources/beta/vector_stores/file_batches.py">cancel</a>(batch_id, \*, vector_store_id) -> <a href="https://github.com/src/openai/types/beta/vector_stores/vector_store_file_batch.py">VectorStoreFileBatch</a></code>
-- <code title="get /vector_stores/{vector_store_id}/file_batches/{batch_id}/files">client.beta.vector_stores.file_batches.<a href="https://github.com/src/openai/resources/beta/vector_stores/file_batches.py">list_files</a>(batch_id, \*, vector_store_id, \*\*<a href="https://github.com/src/openai/types/beta/vector_stores/file_batch_list_files_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/vector_stores/vector_store_file.py">SyncCursorPage[VectorStoreFile]</a></code>
-- <code>client.beta.vector_stores.file_batches.<a href="https://github.com/src/openai/resources/beta/vector_stores/file_batches.py">create_and_poll</a>(\*args) -> VectorStoreFileBatch</code>
-- <code>client.beta.vector_stores.file_batches.<a href="https://github.com/src/openai/resources/beta/vector_stores/file_batches.py">poll</a>(\*args) -> VectorStoreFileBatch</code>
-- <code>client.beta.vector_stores.file_batches.<a href="https://github.com/src/openai/resources/beta/vector_stores/file_batches.py">upload_and_poll</a>(\*args) -> VectorStoreFileBatch</code>
+- <code title="post /realtime/sessions">client.beta.realtime.sessions.<a href="https://github.com/src/openai/resources/beta/realtime/sessions.py">create</a>(\*\*<a href="https://github.com/src/openai/types/beta/realtime/session_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/realtime/session_create_response.py">SessionCreateResponse</a></code>
 
 ## Assistants
 
@@ -284,7 +414,6 @@ Types:
 
 ```python
 from openai.types.beta import (
-    AssistantResponseFormat,
     AssistantResponseFormatOption,
     AssistantToolChoice,
     AssistantToolChoiceFunction,
@@ -301,8 +430,6 @@ Methods:
 - <code title="post /threads/{thread_id}">client.beta.threads.<a href="https://github.com/src/openai/resources/beta/threads/threads.py">update</a>(thread_id, \*\*<a href="https://github.com/src/openai/types/beta/thread_update_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/thread.py">Thread</a></code>
 - <code title="delete /threads/{thread_id}">client.beta.threads.<a href="https://github.com/src/openai/resources/beta/threads/threads.py">delete</a>(thread_id) -> <a href="https://github.com/src/openai/types/beta/thread_deleted.py">ThreadDeleted</a></code>
 - <code title="post /threads/runs">client.beta.threads.<a href="https://github.com/src/openai/resources/beta/threads/threads.py">create_and_run</a>(\*\*<a href="https://github.com/src/openai/types/beta/thread_create_and_run_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/threads/run.py">Run</a></code>
-- <code>client.beta.threads.<a href="https://github.com/src/openai/resources/beta/threads/threads.py">create_and_run_poll</a>(\*args) -> Run</code>
-- <code>client.beta.threads.<a href="https://github.com/src/openai/resources/beta/threads/threads.py">create_and_run_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
 
 ### Runs
 
@@ -320,12 +447,6 @@ Methods:
 - <code title="get /threads/{thread_id}/runs">client.beta.threads.runs.<a href="https://github.com/src/openai/resources/beta/threads/runs/runs.py">list</a>(thread_id, \*\*<a href="https://github.com/src/openai/types/beta/threads/run_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/threads/run.py">SyncCursorPage[Run]</a></code>
 - <code title="post /threads/{thread_id}/runs/{run_id}/cancel">client.beta.threads.runs.<a href="https://github.com/src/openai/resources/beta/threads/runs/runs.py">cancel</a>(run_id, \*, thread_id) -> <a href="https://github.com/src/openai/types/beta/threads/run.py">Run</a></code>
 - <code title="post /threads/{thread_id}/runs/{run_id}/submit_tool_outputs">client.beta.threads.runs.<a href="https://github.com/src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs</a>(run_id, \*, thread_id, \*\*<a href="https://github.com/src/openai/types/beta/threads/run_submit_tool_outputs_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/threads/run.py">Run</a></code>
-- <code>client.beta.threads.runs.<a href="https://github.com/src/openai/resources/beta/threads/runs/runs.py">create_and_poll</a>(\*args) -> Run</code>
-- <code>client.beta.threads.runs.<a href="https://github.com/src/openai/resources/beta/threads/runs/runs.py">create_and_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
-- <code>client.beta.threads.runs.<a href="https://github.com/src/openai/resources/beta/threads/runs/runs.py">poll</a>(\*args) -> Run</code>
-- <code>client.beta.threads.runs.<a href="https://github.com/src/openai/resources/beta/threads/runs/runs.py">stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
-- <code>client.beta.threads.runs.<a href="https://github.com/src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs_and_poll</a>(\*args) -> Run</code>
-- <code>client.beta.threads.runs.<a href="https://github.com/src/openai/resources/beta/threads/runs/runs.py">submit_tool_outputs_stream</a>(\*args) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]</code>
 
 #### Steps
 
@@ -346,6 +467,7 @@ from openai.types.beta.threads.runs import (
     RunStepDelta,
     RunStepDeltaEvent,
     RunStepDeltaMessageDelta,
+    RunStepInclude,
     ToolCall,
     ToolCallDelta,
     ToolCallDeltaObject,
@@ -355,7 +477,7 @@ from openai.types.beta.threads.runs import (
 
 Methods:
 
-- <code title="get /threads/{thread_id}/runs/{run_id}/steps/{step_id}">client.beta.threads.runs.steps.<a href="https://github.com/src/openai/resources/beta/threads/runs/steps.py">retrieve</a>(step_id, \*, thread_id, run_id) -> <a href="https://github.com/src/openai/types/beta/threads/runs/run_step.py">RunStep</a></code>
+- <code title="get /threads/{thread_id}/runs/{run_id}/steps/{step_id}">client.beta.threads.runs.steps.<a href="https://github.com/src/openai/resources/beta/threads/runs/steps.py">retrieve</a>(step_id, \*, thread_id, run_id, \*\*<a href="https://github.com/src/openai/types/beta/threads/runs/step_retrieve_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/threads/runs/run_step.py">RunStep</a></code>
 - <code title="get /threads/{thread_id}/runs/{run_id}/steps">client.beta.threads.runs.steps.<a href="https://github.com/src/openai/resources/beta/threads/runs/steps.py">list</a>(run_id, \*, thread_id, \*\*<a href="https://github.com/src/openai/types/beta/threads/runs/step_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/beta/threads/runs/run_step.py">SyncCursorPage[RunStep]</a></code>
 
 ### Messages
@@ -385,6 +507,8 @@ from openai.types.beta.threads import (
     MessageDeleted,
     MessageDelta,
     MessageDeltaEvent,
+    RefusalContentBlock,
+    RefusalDeltaBlock,
     Text,
     TextContentBlock,
     TextContentBlockParam,
@@ -415,3 +539,126 @@ Methods:
 - <code title="get /batches/{batch_id}">client.batches.<a href="https://github.com/src/openai/resources/batches.py">retrieve</a>(batch_id) -> <a href="https://github.com/src/openai/types/batch.py">Batch</a></code>
 - <code title="get /batches">client.batches.<a href="https://github.com/src/openai/resources/batches.py">list</a>(\*\*<a href="https://github.com/src/openai/types/batch_list_params.py">params</a>) -> <a href="https://github.com/src/openai/types/batch.py">SyncCursorPage[Batch]</a></code>
 - <code title="post /batches/{batch_id}/cancel">client.batches.<a href="https://github.com/src/openai/resources/batches.py">cancel</a>(batch_id) -> <a href="https://github.com/src/openai/types/batch.py">Batch</a></code>
+
+# Uploads
+
+Types:
+
+```python
+from openai.types import Upload
+```
+
+Methods:
+
+- <code title="post /uploads">client.uploads.<a href="https://github.com/src/openai/resources/uploads/uploads.py">create</a>(\*\*<a href="https://github.com/src/openai/types/upload_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/upload.py">Upload</a></code>
+- <code title="post /uploads/{upload_id}/cancel">client.uploads.<a href="https://github.com/src/openai/resources/uploads/uploads.py">cancel</a>(upload_id) -> <a href="https://github.com/src/openai/types/upload.py">Upload</a></code>
+- <code title="post /uploads/{upload_id}/complete">client.uploads.<a href="https://github.com/src/openai/resources/uploads/uploads.py">complete</a>(upload_id, \*\*<a href="https://github.com/src/openai/types/upload_complete_params.py">params</a>) -> <a href="https://github.com/src/openai/types/upload.py">Upload</a></code>
+
+## Parts
+
+Types:
+
+```python
+from openai.types.uploads import UploadPart
+```
+
+Methods:
+
+- <code title="post /uploads/{upload_id}/parts">client.uploads.parts.<a href="https://github.com/src/openai/resources/uploads/parts.py">create</a>(upload_id, \*\*<a href="https://github.com/src/openai/types/uploads/part_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/uploads/upload_part.py">UploadPart</a></code>
+
+# Responses
+
+Types:
+
+```python
+from openai.types.responses import (
+    ComputerTool,
+    EasyInputMessage,
+    FileSearchTool,
+    FunctionTool,
+    Response,
+    ResponseAudioDeltaEvent,
+    ResponseAudioDoneEvent,
+    ResponseAudioTranscriptDeltaEvent,
+    ResponseAudioTranscriptDoneEvent,
+    ResponseCodeInterpreterCallCodeDeltaEvent,
+    ResponseCodeInterpreterCallCodeDoneEvent,
+    ResponseCodeInterpreterCallCompletedEvent,
+    ResponseCodeInterpreterCallInProgressEvent,
+    ResponseCodeInterpreterCallInterpretingEvent,
+    ResponseCodeInterpreterToolCall,
+    ResponseCompletedEvent,
+    ResponseComputerToolCall,
+    ResponseContent,
+    ResponseContentPartAddedEvent,
+    ResponseContentPartDoneEvent,
+    ResponseCreatedEvent,
+    ResponseError,
+    ResponseErrorEvent,
+    ResponseFailedEvent,
+    ResponseFileSearchCallCompletedEvent,
+    ResponseFileSearchCallInProgressEvent,
+    ResponseFileSearchCallSearchingEvent,
+    ResponseFileSearchToolCall,
+    ResponseFormatTextConfig,
+    ResponseFormatTextJSONSchemaConfig,
+    ResponseFunctionCallArgumentsDeltaEvent,
+    ResponseFunctionCallArgumentsDoneEvent,
+    ResponseFunctionToolCall,
+    ResponseFunctionWebSearch,
+    ResponseInProgressEvent,
+    ResponseIncludable,
+    ResponseIncompleteEvent,
+    ResponseInput,
+    ResponseInputAudio,
+    ResponseInputContent,
+    ResponseInputFile,
+    ResponseInputImage,
+    ResponseInputItem,
+    ResponseInputMessageContentList,
+    ResponseInputText,
+    ResponseOutputAudio,
+    ResponseOutputItem,
+    ResponseOutputItemAddedEvent,
+    ResponseOutputItemDoneEvent,
+    ResponseOutputMessage,
+    ResponseOutputRefusal,
+    ResponseOutputText,
+    ResponseReasoningItem,
+    ResponseRefusalDeltaEvent,
+    ResponseRefusalDoneEvent,
+    ResponseStatus,
+    ResponseStreamEvent,
+    ResponseTextAnnotationDeltaEvent,
+    ResponseTextConfig,
+    ResponseTextDeltaEvent,
+    ResponseTextDoneEvent,
+    ResponseUsage,
+    ResponseWebSearchCallCompletedEvent,
+    ResponseWebSearchCallInProgressEvent,
+    ResponseWebSearchCallSearchingEvent,
+    Tool,
+    ToolChoiceFunction,
+    ToolChoiceOptions,
+    ToolChoiceTypes,
+    WebSearchTool,
+)
+```
+
+Methods:
+
+- <code title="post /responses">client.responses.<a href="https://github.com/src/openai/resources/responses/responses.py">create</a>(\*\*<a href="https://github.com/src/openai/types/responses/response_create_params.py">params</a>) -> <a href="https://github.com/src/openai/types/responses/response.py">Response</a></code>
+- <code title="get /responses/{response_id}">client.responses.<a href="https://github.com/src/openai/resources/responses/responses.py">retrieve</a>(response_id, \*\*<a href="https://github.com/src/openai/types/responses/response_retrieve_params.py">params</a>) -> <a href="https://github.com/src/openai/types/responses/response.py">Response</a></code>
+- <code title="delete /responses/{response_id}">client.responses.<a href="https://github.com/src/openai/resources/responses/responses.py">delete</a>(response_id) -> None</code>
+
+## InputItems
+
+Types:
+
+```python
+from openai.types.responses import ResponseItemList
+```
+
+Methods:
+
+- <code title="get /responses/{response_id}/input_items">client.responses.input_items.<a href="https://github.com/src/openai/resources/responses/input_items.py">list</a>(response_id, \*\*<a href="https://github.com/src/openai/types/responses/input_item_list_params.py">params</a>) -> SyncCursorPage[Data]</code>
diff --git a/bin/check-release-environment b/bin/check-release-environment
index 2cc5ad6352..5471b69edb 100644
--- a/bin/check-release-environment
+++ b/bin/check-release-environment
@@ -2,10 +2,6 @@
 
 errors=()
 
-if [ -z "${STAINLESS_API_KEY}" ]; then
-  errors+=("The STAINLESS_API_KEY secret has not been set. Please contact Stainless for an API key & set it in your organization secrets on GitHub.")
-fi
-
 if [ -z "${PYPI_TOKEN}" ]; then
   errors+=("The OPENAI_PYPI_TOKEN secret has not been set. Please set it in either this repository's secrets or your organization secrets.")
 fi
diff --git a/examples/assistant.py b/examples/assistant.py
deleted file mode 100644
index 0631494ecc..0000000000
--- a/examples/assistant.py
+++ /dev/null
@@ -1,38 +0,0 @@
-
-import openai
-
-# gets API Key from environment variable OPENAI_API_KEY
-client = openai.OpenAI()
-
-assistant = client.beta.assistants.create(
-    name="Math Tutor",
-    instructions="You are a personal math tutor. Write and run code to answer math questions.",
-    tools=[{"type": "code_interpreter"}],
-    model="gpt-4-1106-preview",
-)
-
-thread = client.beta.threads.create()
-
-message = client.beta.threads.messages.create(
-    thread_id=thread.id,
-    role="user",
-    content="I need to solve the equation `3x + 11 = 14`. Can you help me?",
-)
-
-run = client.beta.threads.runs.create_and_poll(
-    thread_id=thread.id,
-    assistant_id=assistant.id,
-    instructions="Please address the user as Jane Doe. The user has a premium account.",
-)
-
-print("Run completed with status: " + run.status)
-
-if run.status == "completed":
-    messages = client.beta.threads.messages.list(thread_id=thread.id)
-
-    print("messages: ")
-    for message in messages:
-        assert message.content[0].type == "text"
-        print({"role": message.role, "message": message.content[0].text.value})
-
-    client.beta.assistants.delete(assistant.id)
diff --git a/examples/assistant_stream.py b/examples/assistant_stream.py
deleted file mode 100644
index 0465d3930f..0000000000
--- a/examples/assistant_stream.py
+++ /dev/null
@@ -1,33 +0,0 @@
-import openai
-
-# gets API Key from environment variable OPENAI_API_KEY
-client = openai.OpenAI()
-
-assistant = client.beta.assistants.create(
-    name="Math Tutor",
-    instructions="You are a personal math tutor. Write and run code to answer math questions.",
-    tools=[{"type": "code_interpreter"}],
-    model="gpt-4-1106-preview",
-)
-
-thread = client.beta.threads.create()
-
-message = client.beta.threads.messages.create(
-    thread_id=thread.id,
-    role="user",
-    content="I need to solve the equation `3x + 11 = 14`. Can you help me?",
-)
-
-print("starting run stream")
-
-stream = client.beta.threads.runs.create(
-    thread_id=thread.id,
-    assistant_id=assistant.id,
-    instructions="Please address the user as Jane Doe. The user has a premium account.",
-    stream=True,
-)
-
-for event in stream:
-    print(event.model_dump_json(indent=2, exclude_unset=True))
-
-client.beta.assistants.delete(assistant.id)
diff --git a/examples/assistant_stream_helpers.py b/examples/assistant_stream_helpers.py
deleted file mode 100644
index 7baec77c72..0000000000
--- a/examples/assistant_stream_helpers.py
+++ /dev/null
@@ -1,78 +0,0 @@
-from __future__ import annotations
-
-from typing_extensions import override
-
-import openai
-from openai import AssistantEventHandler
-from openai.types.beta import AssistantStreamEvent
-from openai.types.beta.threads import Text, TextDelta
-from openai.types.beta.threads.runs import RunStep, RunStepDelta
-
-
-class EventHandler(AssistantEventHandler):
-    @override
-    def on_event(self, event: AssistantStreamEvent) -> None:
-        if event.event == "thread.run.step.created":
-            details = event.data.step_details
-            if details.type == "tool_calls":
-                print("Generating code to interpret:\n\n```py")
-        elif event.event == "thread.message.created":
-            print("\nResponse:\n")
-
-    @override
-    def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
-        print(delta.value, end="", flush=True)
-
-    @override
-    def on_run_step_done(self, run_step: RunStep) -> None:
-        details = run_step.step_details
-        if details.type == "tool_calls":
-            for tool in details.tool_calls:
-                if tool.type == "code_interpreter":
-                    print("\n```\nExecuting code...")
-
-    @override
-    def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
-        details = delta.step_details
-        if details is not None and details.type == "tool_calls":
-            for tool in details.tool_calls or []:
-                if tool.type == "code_interpreter" and tool.code_interpreter and tool.code_interpreter.input:
-                    print(tool.code_interpreter.input, end="", flush=True)
-
-
-def main() -> None:
-    client = openai.OpenAI()
-
-    assistant = client.beta.assistants.create(
-        name="Math Tutor",
-        instructions="You are a personal math tutor. Write and run code to answer math questions.",
-        tools=[{"type": "code_interpreter"}],
-        model="gpt-4-1106-preview",
-    )
-
-    try:
-        question = "I need to solve the equation `3x + 11 = 14`. Can you help me?"
-
-        thread = client.beta.threads.create(
-            messages=[
-                {
-                    "role": "user",
-                    "content": question,
-                },
-            ]
-        )
-        print(f"Question: {question}\n")
-
-        with client.beta.threads.runs.stream(
-            thread_id=thread.id,
-            assistant_id=assistant.id,
-            instructions="Please address the user as Jane Doe. The user has a premium account.",
-            event_handler=EventHandler(),
-        ) as stream:
-            stream.until_done()
-            print()
-    finally:
-        client.beta.assistants.delete(assistant.id)
-
-
-main()
diff --git a/examples/async_demo.py b/examples/async_demo.py
deleted file mode 100755
index 793b4e43fb..0000000000
--- a/examples/async_demo.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env -S poetry run python
-
-import asyncio
-
-from openai import AsyncOpenAI
-
-# gets API Key from environment variable OPENAI_API_KEY
-client = AsyncOpenAI()
-
-
-async def main() -> None:
-    stream = await client.completions.create(
-        model="gpt-3.5-turbo-instruct",
-        prompt="Say this is a test",
-        stream=True,
-    )
-    async for completion in stream:
-        print(completion.choices[0].text, end="")
-    print()
-
-
-asyncio.run(main())
diff --git a/examples/audio.py b/examples/audio.py
deleted file mode 100755
index 85f47bfb06..0000000000
--- a/examples/audio.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env rye run python
-
-import time
-from pathlib import Path
-
-from openai import OpenAI
-
-# gets OPENAI_API_KEY from your environment variables
-openai = OpenAI()
-
-speech_file_path = Path(__file__).parent / "speech.mp3"
-
-
-def main() -> None:
-    stream_to_speakers()
-
-    # Create text-to-speech audio file
-    with openai.audio.speech.with_streaming_response.create(
-        model="tts-1",
-        voice="alloy",
-        input="the quick brown fox jumped over the lazy dogs",
-    ) as response:
-        response.stream_to_file(speech_file_path)
-
-    # Create transcription from audio file
-    transcription = openai.audio.transcriptions.create(
-        model="whisper-1",
-        file=speech_file_path,
-    )
-    print(transcription.text)
-
-    # Create translation from audio file
-    translation = openai.audio.translations.create(
-        model="whisper-1",
-        file=speech_file_path,
-    )
-    print(translation.text)
-
-
-def stream_to_speakers() -> None:
-    import pyaudio
-
-    player_stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1, rate=24000, output=True)
-
-    start_time = time.time()
-
-    with openai.audio.speech.with_streaming_response.create(
-        model="tts-1",
-        voice="alloy",
-        response_format="pcm",  # similar to WAV, but without a header chunk at the start.
-        input="""I see skies of blue and clouds of white
-                The bright blessed days, the dark sacred nights
-                And I think to myself
-                What a wonderful world""",
-    ) as response:
-        print(f"Time to first byte: {int((time.time() - start_time) * 1000)}ms")
-        for chunk in response.iter_bytes(chunk_size=1024):
-            player_stream.write(chunk)
-
-    print(f"Done in {int((time.time() - start_time) * 1000)}ms.")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/azure.py b/examples/azure.py
deleted file mode 100755
index 6936c4cb0e..0000000000
--- a/examples/azure.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from openai import AzureOpenAI
-
-# may change in the future
-# https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
-api_version = "2023-07-01-preview"
-
-# gets the API Key from environment variable AZURE_OPENAI_API_KEY
-client = AzureOpenAI(
-    api_version=api_version,
-    # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
-    azure_endpoint="https://example-endpoint.openai.azure.com/",
-)
-
-completion = client.chat.completions.create(
-    model="deployment-name",  # e.g. gpt-35-instant
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-)
-print(completion.to_json())
-
-
-deployment_client = AzureOpenAI(
-    api_version=api_version,
-    # https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
-    azure_endpoint="https://example-resource.azure.openai.com/",
-    # Navigate to the Azure OpenAI Studio to deploy a model.
-    azure_deployment="deployment-name",  # e.g. gpt-35-instant
-)
-
-completion = deployment_client.chat.completions.create(
-    model="<ignored>",
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-)
-print(completion.to_json())
diff --git a/examples/azure_ad.py b/examples/azure_ad.py
deleted file mode 100755
index 1b0d81863d..0000000000
--- a/examples/azure_ad.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from azure.identity import DefaultAzureCredential, get_bearer_token_provider
-
-from openai import AzureOpenAI
-
-token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")
-
-
-# may change in the future
-# https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning
-api_version = "2023-07-01-preview"
-
-# https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal#create-a-resource
-endpoint = "https://my-resource.openai.azure.com/"
-
-client = AzureOpenAI(
-    api_version=api_version,
-    azure_endpoint=endpoint,
-    azure_ad_token_provider=token_provider,
-)
-
-completion = client.chat.completions.create(
-    model="deployment-name",  # e.g. gpt-35-instant
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-)
-print(completion.to_json())
diff --git a/examples/demo.py b/examples/demo.py
deleted file mode 100755
index ac1710f3e0..0000000000
--- a/examples/demo.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env -S poetry run python
-
-from openai import OpenAI
-
-# gets API Key from environment variable OPENAI_API_KEY
-client = OpenAI()
-
-# Non-streaming:
-print("----- standard request -----")
-completion = client.chat.completions.create(
-    model="gpt-4",
-    messages=[
-        {
-            "role": "user",
-            "content": "Say this is a test",
-        },
-    ],
-)
-print(completion.choices[0].message.content)
-
-# Streaming:
-print("----- streaming request -----")
-stream = client.chat.completions.create(
-    model="gpt-4",
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-    stream=True,
-)
-for chunk in stream:
-    if not chunk.choices:
-        continue
-
-    print(chunk.choices[0].delta.content, end="")
-print()
-
-# Response headers:
-print("----- custom response headers test -----")
-response = client.chat.completions.with_raw_response.create(
-    model="gpt-4",
-    messages=[
-        {
-            "role": "user",
-            "content": "Say this is a test",
-        }
-    ],
-)
-completion = response.parse()
-print(response.request_id)
-print(completion.choices[0].message.content)
diff --git a/examples/module_client.py b/examples/module_client.py
deleted file mode 100755
index 5f2fb79dcf..0000000000
--- a/examples/module_client.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import openai
-
-# will default to `os.environ['OPENAI_API_KEY']` if not explicitly set
-openai.api_key = "..."
-
-# all client options can be configured just like the `OpenAI` instantiation counterpart
-openai.base_url = "https://.../"
-openai.default_headers = {"x-foo": "true"}
-
-# all API calls work in the exact same fashion as well
-stream = openai.chat.completions.create(
-    model="gpt-4",
-    messages=[
-        {
-            "role": "user",
-            "content": "How do I output all files in a directory using Python?",
-        },
-    ],
-    stream=True,
-)
-
-for chunk in stream:
-    print(chunk.choices[0].delta.content or "", end="", flush=True)
-
-print()
diff --git a/examples/picture.py b/examples/picture.py
deleted file mode 100644
index c27b52b0da..0000000000
--- a/examples/picture.py
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-
-from openai import OpenAI
-
-# gets OPENAI_API_KEY from your environment variables
-openai = OpenAI()
-
-prompt = "An astronaut lounging in a tropical resort in space, pixel art"
-model = "dall-e-3"
-
-
-def main() -> None:
-    # Generate an image based on the prompt
-    response = openai.images.generate(prompt=prompt, model=model)
-
-    # Prints response containing a URL link to image
-    print(response)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/streaming.py b/examples/streaming.py
deleted file mode 100755
index 9a84891a83..0000000000
--- a/examples/streaming.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env -S poetry run python
-
-import asyncio
-
-from openai import OpenAI, AsyncOpenAI
-
-# This script assumes you have the OPENAI_API_KEY environment variable set to a valid OpenAI API key.
-#
-# You can run this script from the root directory like so:
-# `python examples/streaming.py`
-
-
-def sync_main() -> None:
-    client = OpenAI()
-    response = client.completions.create(
-        model="gpt-3.5-turbo-instruct",
-        prompt="1,2,3,",
-        max_tokens=5,
-        temperature=0,
-        stream=True,
-    )
-
-    # You can manually control iteration over the response
-    first = next(response)
-    print(f"got response data: {first.to_json()}")
-
-    # Or you could automatically iterate through all of data.
-    # Note that the for loop will not exit until *all* of the data has been processed.
-    for data in response:
-        print(data.to_json())
-
-
-async def async_main() -> None:
-    client = AsyncOpenAI()
-    response = await client.completions.create(
-        model="gpt-3.5-turbo-instruct",
-        prompt="1,2,3,",
-        max_tokens=5,
-        temperature=0,
-        stream=True,
-    )
-
-    # You can manually control iteration over the response.
-    # In Python 3.10+ you can also use the `await anext(response)` builtin instead
-    first = await response.__anext__()
-    print(f"got response data: {first.to_json()}")
-
-    # Or you could automatically iterate through all of data.
-    # Note that the for loop will not exit until *all* of the data has been processed.
-    async for data in response:
-        print(data.to_json())
-
-
-sync_main()
-
-asyncio.run(async_main())
diff --git a/helpers.md b/helpers.md
deleted file mode 100644
index 3508b59a33..0000000000
--- a/helpers.md
+++ /dev/null
@@ -1,238 +0,0 @@
-# Streaming Helpers
-
-OpenAI supports streaming responses when interacting with the [Assistant](#assistant-streaming-api) APIs.
-
-## Assistant Streaming API
-
-OpenAI supports streaming responses from Assistants. The SDK provides convenience wrappers around the API
-so you can subscribe to the types of events you are interested in as well as receive accumulated responses.
-
-More information can be found in the documentation: [Assistant Streaming](https://platform.openai.com/docs/assistants/overview?lang=python)
-
-#### An example of creating a run and subscribing to some events
-
-You can subscribe to events by creating an event handler class and overloading the relevant event handlers.
-
-```python
-from typing_extensions import override
-from openai import AssistantEventHandler, OpenAI
-from openai.types.beta.threads import Text, TextDelta
-from openai.types.beta.threads.runs import ToolCall, ToolCallDelta
-
-client = openai.OpenAI()
-
-# First, we create a EventHandler class to define
-# how we want to handle the events in the response stream.
-
-class EventHandler(AssistantEventHandler):
-  @override
-  def on_text_created(self, text: Text) -> None:
-    print(f"\nassistant > ", end="", flush=True)
-
-  @override
-  def on_text_delta(self, delta: TextDelta, snapshot: Text):
-    print(delta.value, end="", flush=True)
-
-  @override
-  def on_tool_call_created(self, tool_call: ToolCall):
-    print(f"\nassistant > {tool_call.type}\n", flush=True)
-
-  @override
-  def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall):
-    if delta.type == "code_interpreter" and delta.code_interpreter:
-      if delta.code_interpreter.input:
-        print(delta.code_interpreter.input, end="", flush=True)
-      if delta.code_interpreter.outputs:
-        print(f"\n\noutput >", flush=True)
-        for output in delta.code_interpreter.outputs:
-          if output.type == "logs":
-            print(f"\n{output.logs}", flush=True)
-
-# Then, we use the `stream` SDK helper
-# with the `EventHandler` class to create the Run
-# and stream the response.
-
-with client.beta.threads.runs.stream(
-  thread_id="thread_id",
-  assistant_id="assistant_id",
-  event_handler=EventHandler(),
-) as stream:
-  stream.until_done()
-```
-
-#### An example of iterating over events
-
-You can also iterate over all the streamed events.
-
-```python
-with client.beta.threads.runs.stream(
-  thread_id=thread.id,
-  assistant_id=assistant.id
-) as stream:
-    for event in stream:
-        # Print the text from text delta events
-        if event.event == "thread.message.delta" and event.data.delta.content:
-            print(event.data.delta.content[0].text)
-```
-
-#### An example of iterating over text
-
-You can also iterate over just the text deltas received
-
-```python
-with client.beta.threads.runs.stream(
-  thread_id=thread.id,
-  assistant_id=assistant.id
-) as stream:
-    for text in stream.text_deltas:
-        print(text)
-```
-
-### Creating Streams
-
-There are three helper methods for creating streams:
-
-```python
-client.beta.threads.runs.stream()
-```
-
-This method can be used to start and stream the response to an existing run with an associated thread
-that is already populated with messages.
-
-```python
-client.beta.threads.create_and_run_stream()
-```
-
-This method can be used to add a message to a thread, start a run and then stream the response.
-
-```python
-client.beta.threads.runs.submit_tool_outputs_stream()
-```
-
-This method can be used to submit a tool output to a run waiting on the output and start a stream.
-
-### Assistant Events
-
-The assistant API provides events you can subscribe to for the following events.
-
-```python
-def on_event(self, event: AssistantStreamEvent)
-```
-
-This allows you to subscribe to all the possible raw events sent by the OpenAI streaming API.
-In many cases it will be more convenient to subscribe to a more specific set of events for your use case.
-
-More information on the types of events can be found here: [Events](https://platform.openai.com/docs/api-reference/assistants-streaming/events)
-
-```python
-def on_run_step_created(self, run_step: RunStep)
-def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep)
-def on_run_step_done(self, run_step: RunStep)
-```
-
-These events allow you to subscribe to the creation, delta and completion of a RunStep.
-
-For more information on how Runs and RunSteps work see the documentation [Runs and RunSteps](https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps)
-
-```python
-def on_message_created(self, message: Message)
-def on_message_delta(self, delta: MessageDelta, snapshot: Message)
-def on_message_done(self, message: Message)
-```
-
-This allows you to subscribe to Message creation, delta and completion events. Messages can contain
-different types of content that can be sent from a model (and events are available for specific content types).
-For convenience, the delta event includes both the incremental update and an accumulated snapshot of the content.
-
-More information on messages can be found
-on in the documentation page [Message](https://platform.openai.com/docs/api-reference/messages/object).
-
-```python
-def on_text_created(self, text: Text)
-def on_text_delta(self, delta: TextDelta, snapshot: Text)
-def on_text_done(self, text: Text)
-```
-
-These events allow you to subscribe to the creation, delta and completion of a Text content (a specific type of message).
-For convenience, the delta event includes both the incremental update and an accumulated snapshot of the content.
-
-```python
-def on_image_file_done(self, image_file: ImageFile)
-```
-
-Image files are not sent incrementally so an event is provided for when a image file is available.
-
-```python
-def on_tool_call_created(self, tool_call: ToolCall)
-def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall)
-def on_tool_call_done(self, tool_call: ToolCall)
-```
-
-These events allow you to subscribe to events for the creation, delta and completion of a ToolCall.
-
-More information on tools can be found here [Tools](https://platform.openai.com/docs/assistants/tools)
-
-```python
-def on_end(self)
-```
-
-The last event send when a stream ends.
-
-```python
-def on_timeout(self)
-```
-
-This event is triggered if the request times out.
-
-```python
-def on_exception(self, exception: Exception)
-```
-
-This event is triggered if an exception occurs during streaming.
-
-### Assistant Methods
-
-The assistant streaming object also provides a few methods for convenience:
-
-```python
-def current_event() -> AssistantStreamEvent | None
-def current_run() -> Run | None
-def current_message_snapshot() -> Message | None
-def current_run_step_snapshot() -> RunStep | None
-```
-
-These methods are provided to allow you to access additional context from within event handlers. In many cases
-the handlers should include all the information you need for processing, but if additional context is required it
-can be accessed.
-
-Note: There is not always a relevant context in certain situations (these will be `None` in those cases).
-
-```python
-def get_final_run(self) -> Run
-def get_final_run_steps(self) -> List[RunStep]
-def get_final_messages(self) -> List[Message]
-```
-
-These methods are provided for convenience to collect information at the end of a stream. Calling these events
-will trigger consumption of the stream until completion and then return the relevant accumulated objects.
-
-# Polling Helpers
-
-When interacting with the API some actions such as starting a Run and adding files to vector stores are asynchronous and take time to complete.
-The SDK includes helper functions which will poll the status until it reaches a terminal state and then return the resulting object.
-If an API method results in an action which could benefit from polling there will be a corresponding version of the
-method ending in `_and_poll`.
-
-All methods also allow you to set the polling frequency, how often the API is checked for an update, via a function argument (`poll_interval_ms`).
-
-The polling methods are:
-
-```python
-client.beta.threads.create_and_run_poll(...)
-client.beta.threads.runs.create_and_poll(...)
-client.beta.threads.runs.submit_tool_ouptputs_and_poll(...)
-client.beta.vector_stores.files.upload_and_poll(...)
-client.beta.vector_stores.files.create_and_poll(...)
-client.beta.vector_stores.file_batches.create_and_poll(...)
-client.beta.vector_stores.file_batches.upload_and_poll(...)
-```
diff --git a/mypy.ini b/mypy.ini
index a4517a002d..91d06cb38f 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -5,7 +5,10 @@ show_error_codes = True
 # Exclude _files.py because mypy isn't smart enough to apply
 # the correct type narrowing and as this is an internal module
 # it's fine to just use Pyright.
-exclude = ^(src/openai/_files\.py|_dev/.*\.py)$
+#
+# We also exclude our `tests` as mypy doesn't always infer
+# types correctly and Pyright will still catch any type errors.
+exclude = ^(src/openai/_files\.py|_dev/.*\.py|tests/.*)$
 
 strict_equality = True
 implicit_reexport = True
@@ -38,7 +41,7 @@ cache_fine_grained = True
 # ```
 # Changing this codegen to make mypy happy would increase complexity
 # and would not be worth it.
-disable_error_code = func-returns-value
+disable_error_code = func-returns-value,overload-cannot-match
 
 # https://github.com/python/mypy/issues/12162
 [mypy.overrides]
diff --git a/pyproject.toml b/pyproject.toml
index a33e167244..585db285c3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openai"
-version = "1.30.1"
+version = "1.66.4"
 description = "The official Python library for the openai API"
 dynamic = ["readme"]
 license = "Apache-2.0"
@@ -10,18 +10,15 @@ authors = [
 dependencies = [
     "httpx>=0.23.0, <1",
     "pydantic>=1.9.0, <3",
-    "typing-extensions>=4.7, <5",
+    "typing-extensions>=4.10, <5",
     "anyio>=3.5.0, <5",
     "distro>=1.7.0, <2",
     "sniffio",
-    "cached-property; python_version < '3.8'",
-    "tqdm > 4"
 ]
-requires-python = ">= 3.7.1"
+requires-python = ">= 3.8"
 classifiers = [
   "Typing :: Typed",
   "Intended Audience :: Developers",
-  "Programming Language :: Python :: 3.7",
   "Programming Language :: Python :: 3.8",
   "Programming Language :: Python :: 3.9",
   "Programming Language :: Python :: 3.10",
@@ -36,15 +33,12 @@ classifiers = [
   "License :: OSI Approved :: Apache Software License"
 ]
 
-[project.optional-dependencies]
-datalib = ["numpy >= 1", "pandas >= 1.2.3", "pandas-stubs >= 1.1.0.11"]
-
 [project.urls]
 Homepage = "https://github.com/openai/openai-python"
 Repository = "https://github.com/openai/openai-python"
 
-[project.scripts]
-openai = "openai.cli:main"
+[project.optional-dependencies]
+realtime = ["websockets >= 13, < 15"]
 
 [tool.rye]
 managed = true
@@ -60,11 +54,8 @@ dev-dependencies = [
     "nox",
     "dirty-equals>=0.6.0",
     "importlib-metadata>=6.7.0",
-    "inline-snapshot >=0.7.0",
-    "azure-identity >=1.14.1",
-    "types-tqdm > 4",
-    "types-pyaudio > 0",
-    "trio >=0.22.2"
+    "rich>=13.7.1",
+    "nest_asyncio==1.6.0",
 ]
 
 [tool.rye.scripts]
@@ -72,18 +63,21 @@ format = { chain = [
   "format:ruff",
   "format:docs",
   "fix:ruff",
+  # run formatting again to fix any inconsistencies when imports are stripped
+  "format:ruff",
 ]}
-"format:black" = "black ."
 "format:docs" = "python scripts/utils/ruffen-docs.py README.md api.md"
 "format:ruff" = "ruff format"
-"format:isort" = "isort ."
 
 "lint" = { chain = [
   "check:ruff",
   "typecheck",
+  "check:importable",
 ]}
-"check:ruff" = "ruff ."
-"fix:ruff" = "ruff --fix ."
+"check:ruff" = "ruff check ."
+"fix:ruff" = "ruff check --fix ."
+
+"check:importable" = "python -c 'import openai'"
 
 typecheck = { chain = [
   "typecheck:pyright",
@@ -94,7 +88,7 @@ typecheck = { chain = [
 "typecheck:mypy" = "mypy ."
 
 [build-system]
-requires = ["hatchling", "hatch-fancy-pypi-readme"]
+requires = ["hatchling==1.26.3", "hatch-fancy-pypi-readme"]
 build-backend = "hatchling.build"
 
 [tool.hatch.build]
@@ -105,6 +99,21 @@ include = [
 [tool.hatch.build.targets.wheel]
 packages = ["src/openai"]
 
+[tool.hatch.build.targets.sdist]
+# Basically everything except hidden files/directories (such as .github, .devcontainers, .python-version, etc)
+include = [
+  "/*.toml",
+  "/*.json",
+  "/*.lock",
+  "/*.md",
+  "/mypy.ini",
+  "/noxfile.py",
+  "bin/*",
+  "examples/*",
+  "src/*",
+  "tests/*",
+]
+
 [tool.hatch.metadata.hooks.fancy-pypi-readme]
 content-type = "text/markdown"
 
@@ -116,15 +125,12 @@ path = "README.md"
 pattern = '\[(.+?)\]\(((?!https?://)\S+?)\)'
 replacement = '[\1](https://github.com/openai/openai-python/tree/main/\g<2>)'
 
-[tool.black]
-line-length = 120
-target-version = ["py37"]
-
 [tool.pytest.ini_options]
 testpaths = ["tests"]
 addopts = "--tb=short"
 xfail_strict = true
 asyncio_mode = "auto"
+asyncio_default_fixture_loop_scope = "session"
 filterwarnings = [
   "error"
 ]
@@ -134,7 +140,7 @@ filterwarnings = [
 # there are a couple of flags that are still disabled by
 # default in strict mode as they are experimental and niche.
 typeCheckingMode = "strict"
-pythonVersion = "3.7"
+pythonVersion = "3.8"
 
 exclude = [
     "_dev",
@@ -147,11 +153,15 @@ reportImplicitOverride = true
 reportImportCycles = false
 reportPrivateUsage = false
 
-
 [tool.ruff]
 line-length = 120
 output-format = "grouped"
 target-version = "py37"
+
+[tool.ruff.format]
+docstring-code-format = true
+
+[tool.ruff.lint]
 select = [
   # isort
   "I",
@@ -167,7 +177,7 @@ select = [
   "T201",
   "T203",
   # misuse of typing.TYPE_CHECKING
-  "TCH004",
+  "TC004",
   # import rules
   "TID251",
 ]
@@ -180,10 +190,6 @@ unfixable = [
   "T201",
   "T203",
 ]
-ignore-init-module-imports = true
-
-[tool.ruff.format]
-docstring-code-format = true
 
 [tool.ruff.lint.flake8-tidy-imports.banned-api]
 "functools.lru_cache".msg = "This function does not retain type information for the wrapped function's arguments; The `lru_cache` function from `_utils` should be used instead"
@@ -195,7 +201,7 @@ combine-as-imports = true
 extra-standard-library = ["typing_extensions"]
 known-first-party = ["openai", "tests"]
 
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "bin/**.py" = ["T201", "T203"]
 "scripts/**.py" = ["T201", "T203"]
 "tests/**.py" = ["T201", "T203"]
diff --git a/requirements-dev.lock b/requirements-dev.lock
index 6a4e12022a..be626d274b 100644
--- a/requirements-dev.lock
+++ b/requirements-dev.lock
@@ -6,173 +6,101 @@
 #   features: []
 #   all-features: true
 #   with-sources: false
+#   generate-hashes: false
+#   universal: false
 
 -e file:.
 annotated-types==0.6.0
     # via pydantic
-anyio==4.1.0
+anyio==4.4.0
     # via httpx
     # via openai
 argcomplete==3.1.2
     # via nox
-asttokens==2.4.1
-    # via inline-snapshot
-attrs==23.1.0
-    # via outcome
-    # via pytest
-    # via trio
-azure-core==1.30.1
-    # via azure-identity
-azure-identity==1.15.0
-black==24.4.2
-    # via inline-snapshot
 certifi==2023.7.22
     # via httpcore
     # via httpx
-    # via requests
-cffi==1.16.0
-    # via cryptography
-charset-normalizer==3.3.2
-    # via requests
-click==8.1.7
-    # via black
-    # via inline-snapshot
 colorlog==6.7.0
     # via nox
-cryptography==42.0.7
-    # via azure-identity
-    # via msal
-    # via pyjwt
 dirty-equals==0.6.0
 distlib==0.3.7
     # via virtualenv
 distro==1.8.0
     # via openai
-exceptiongroup==1.1.3
+exceptiongroup==1.2.2
     # via anyio
-    # via trio
-executing==2.0.1
-    # via inline-snapshot
+    # via pytest
 filelock==3.12.4
     # via virtualenv
 h11==0.14.0
     # via httpcore
 httpcore==1.0.2
     # via httpx
-httpx==0.25.2
+httpx==0.28.1
     # via openai
     # via respx
 idna==3.4
     # via anyio
     # via httpx
-    # via requests
-    # via trio
 importlib-metadata==7.0.0
 iniconfig==2.0.0
     # via pytest
-inline-snapshot==0.7.0
-msal==1.28.0
-    # via azure-identity
-    # via msal-extensions
-msal-extensions==1.1.0
-    # via azure-identity
-mypy==1.7.1
+markdown-it-py==3.0.0
+    # via rich
+mdurl==0.1.2
+    # via markdown-it-py
+mypy==1.14.1
 mypy-extensions==1.0.0
-    # via black
     # via mypy
+nest-asyncio==1.6.0
 nodeenv==1.8.0
     # via pyright
 nox==2023.4.22
-numpy==1.26.3
-    # via openai
-    # via pandas
-    # via pandas-stubs
-outcome==1.3.0.post0
-    # via trio
 packaging==23.2
-    # via black
-    # via msal-extensions
     # via nox
     # via pytest
-pandas==2.1.4
-    # via openai
-pandas-stubs==2.1.4.231227
-    # via openai
-pathspec==0.12.1
-    # via black
 platformdirs==3.11.0
-    # via black
     # via virtualenv
-pluggy==1.3.0
-    # via pytest
-portalocker==2.8.2
-    # via msal-extensions
-py==1.11.0
+pluggy==1.5.0
     # via pytest
-pycparser==2.22
-    # via cffi
-pydantic==2.7.1
+pydantic==2.10.3
     # via openai
-pydantic-core==2.18.2
+pydantic-core==2.27.1
     # via pydantic
-pyjwt==2.8.0
-    # via msal
-pyright==1.1.359
-pytest==7.1.1
+pygments==2.18.0
+    # via rich
+pyright==1.1.392.post0
+pytest==8.3.3
     # via pytest-asyncio
-pytest-asyncio==0.21.1
+pytest-asyncio==0.24.0
 python-dateutil==2.8.2
-    # via pandas
     # via time-machine
 pytz==2023.3.post1
     # via dirty-equals
-    # via pandas
-requests==2.31.0
-    # via azure-core
-    # via msal
-respx==0.20.2
-ruff==0.1.9
+respx==0.22.0
+rich==13.7.1
+ruff==0.9.4
 setuptools==68.2.2
     # via nodeenv
 six==1.16.0
-    # via asttokens
-    # via azure-core
     # via python-dateutil
 sniffio==1.3.0
     # via anyio
-    # via httpx
     # via openai
-    # via trio
-sortedcontainers==2.4.0
-    # via trio
 time-machine==2.9.0
-toml==0.10.2
-    # via inline-snapshot
-tomli==2.0.1
-    # via black
+tomli==2.0.2
     # via mypy
     # via pytest
-tqdm==4.66.1
-    # via openai
-trio==0.22.2
-types-pyaudio==0.2.16.20240106
-types-pytz==2024.1.0.20240417
-    # via pandas-stubs
-types-toml==0.10.8.20240310
-    # via inline-snapshot
-types-tqdm==4.66.0.2
-typing-extensions==4.8.0
-    # via azure-core
-    # via black
+typing-extensions==4.12.2
+    # via anyio
     # via mypy
     # via openai
     # via pydantic
     # via pydantic-core
-tzdata==2024.1
-    # via pandas
-urllib3==2.2.1
-    # via requests
+    # via pyright
 virtualenv==20.24.5
     # via nox
+websockets==14.2
+    # via openai
 zipp==3.17.0
     # via importlib-metadata
diff --git a/requirements.lock b/requirements.lock
index 47cf8a40e9..c704fd8abd 100644
--- a/requirements.lock
+++ b/requirements.lock
@@ -6,11 +6,13 @@
 #   features: []
 #   all-features: true
 #   with-sources: false
+#   generate-hashes: false
+#   universal: false
 
 -e file:.
 annotated-types==0.6.0
     # via pydantic
-anyio==4.1.0
+anyio==4.4.0
     # via httpx
     # via openai
 certifi==2023.7.22
@@ -18,46 +20,28 @@ certifi==2023.7.22
     # via httpx
 distro==1.8.0
     # via openai
-exceptiongroup==1.1.3
+exceptiongroup==1.2.2
     # via anyio
 h11==0.14.0
     # via httpcore
 httpcore==1.0.2
     # via httpx
-httpx==0.25.2
+httpx==0.28.1
     # via openai
 idna==3.4
     # via anyio
     # via httpx
-numpy==1.26.4
+pydantic==2.10.3
     # via openai
-    # via pandas
-    # via pandas-stubs
-pandas==2.2.2
-    # via openai
-pandas-stubs==2.2.1.240316
-    # via openai
-pydantic==2.7.1
-    # via openai
-pydantic-core==2.18.2
+pydantic-core==2.27.1
     # via pydantic
-python-dateutil==2.9.0.post0
-    # via pandas
-pytz==2024.1
-    # via pandas
-six==1.16.0
-    # via python-dateutil
 sniffio==1.3.0
     # via anyio
-    # via httpx
     # via openai
-tqdm==4.66.1
-    # via openai
-types-pytz==2024.1.0.20240417
-    # via pandas-stubs
-typing-extensions==4.8.0
+typing-extensions==4.12.2
+    # via anyio
     # via openai
     # via pydantic
     # via pydantic-core
-tzdata==2024.1
-    # via pandas
+websockets==14.2
+    # via openai
diff --git a/scripts/bootstrap b/scripts/bootstrap
index 29df07e77b..e84fe62c38 100755
--- a/scripts/bootstrap
+++ b/scripts/bootstrap
@@ -4,7 +4,7 @@ set -e
 
 cd "$(dirname "$0")/.."
 
-if [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ]; then
+if ! command -v rye >/dev/null 2>&1 && [ -f "Brewfile" ] && [ "$(uname -s)" = "Darwin" ]; then
   brew bundle check >/dev/null 2>&1 || {
     echo "==> Installing Homebrew dependencies…"
     brew bundle
@@ -16,4 +16,4 @@ echo "==> Installing Python dependencies…"
 # experimental uv support makes installations significantly faster
 rye config --set-bool behavior.use-uv=true
 
-rye sync
+rye sync --all-features
diff --git a/scripts/lint b/scripts/lint
index 64495ee345..55bc1dd711 100755
--- a/scripts/lint
+++ b/scripts/lint
@@ -9,4 +9,3 @@ rye run lint
 
 echo "==> Making sure it imports"
 rye run python -c 'import openai'
-
diff --git a/scripts/mock b/scripts/mock
index fe89a1d084..d2814ae6a0 100755
--- a/scripts/mock
+++ b/scripts/mock
@@ -21,7 +21,7 @@ echo "==> Starting mock server with URL ${URL}"
 
 # Run prism mock on the given spec
 if [ "$1" == "--daemon" ]; then
-  npm exec --package=@stoplight/prism-cli@~5.8 -- prism mock "$URL" &> .prism.log &
+  npm exec --package=@stainless-api/prism-cli@5.8.5 -- prism mock "$URL" &> .prism.log &
 
   # Wait for server to come online
   echo -n "Waiting for server"
@@ -37,5 +37,5 @@ if [ "$1" == "--daemon" ]; then
 
   echo
 else
-  npm exec  --package=@stoplight/prism-cli@~5.8 -- prism mock "$URL"
+  npm exec --package=@stainless-api/prism-cli@5.8.5 -- prism mock "$URL"
 fi
diff --git a/scripts/test b/scripts/test
index b3ace9013b..2b87845670 100755
--- a/scripts/test
+++ b/scripts/test
@@ -52,5 +52,10 @@ else
   echo
 fi
 
+export DEFER_PYDANTIC_BUILD=false
+
 echo "==> Running tests"
 rye run pytest "$@"
+
+echo "==> Running Pydantic v1 tests"
+rye run nox -s test-pydantic-v1 -- "$@"
diff --git a/scripts/utils/ruffen-docs.py b/scripts/utils/ruffen-docs.py
index 37b3d94f0f..0cf2bd2fd9 100644
--- a/scripts/utils/ruffen-docs.py
+++ b/scripts/utils/ruffen-docs.py
@@ -47,7 +47,7 @@ def _md_match(match: Match[str]) -> str:
         with _collect_error(match):
             code = format_code_block(code)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
+        return f"{match['before']}{code}{match['after']}"
 
     def _pycon_match(match: Match[str]) -> str:
         code = ""
@@ -97,7 +97,7 @@ def finish_fragment() -> None:
     def _md_pycon_match(match: Match[str]) -> str:
         code = _pycon_match(match)
         code = textwrap.indent(code, match["indent"])
-        return f'{match["before"]}{code}{match["after"]}'
+        return f"{match['before']}{code}{match['after']}"
 
     src = MD_RE.sub(_md_match, src)
     src = MD_PYCON_RE.sub(_md_pycon_match, src)
diff --git a/src/openai/__init__.py b/src/openai/__init__.py
index 0e87ae9259..1107973aed 100644
--- a/src/openai/__init__.py
+++ b/src/openai/__init__.py
@@ -2,11 +2,10 @@
 
 from __future__ import annotations
 
-import os as _os
 from typing_extensions import override
 
 from . import types
-from ._types import NOT_GIVEN, NoneType, NotGiven, Transport, ProxiesTypes
+from ._types import NOT_GIVEN, Omit, NoneType, NotGiven, Transport, ProxiesTypes
 from ._utils import file_from_path
 from ._client import Client, OpenAI, Stream, Timeout, Transport, AsyncClient, AsyncOpenAI, AsyncStream, RequestOptions
 from ._models import BaseModel
@@ -41,6 +40,7 @@
     "ProxiesTypes",
     "NotGiven",
     "NOT_GIVEN",
+    "Omit",
     "OpenAIError",
     "APIError",
     "APIStatusError",
@@ -72,15 +72,6 @@
     "DefaultAsyncHttpxClient",
 ]
 
-from .lib import azure as _azure
-from .version import VERSION as VERSION
-from .lib.azure import AzureOpenAI as AzureOpenAI, AsyncAzureOpenAI as AsyncAzureOpenAI
-from .lib._old_api import *
-from .lib.streaming import (
-    AssistantEventHandler as AssistantEventHandler,
-    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
-)
-
 _setup_logging()
 
 # Update the __module__ attribute for exported symbols so that
@@ -98,7 +89,6 @@
 
 # ------ Module level client ------
 import typing as _t
-import typing_extensions as _te
 
 import httpx as _httpx
 
@@ -122,18 +112,6 @@
 
 http_client: _httpx.Client | None = None
 
-_ApiType = _te.Literal["openai", "azure"]
-
-api_type: _ApiType | None = _t.cast(_ApiType, _os.environ.get("OPENAI_API_TYPE"))
-
-api_version: str | None = _os.environ.get("OPENAI_API_VERSION")
-
-azure_endpoint: str | None = _os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-azure_ad_token: str | None = _os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-azure_ad_token_provider: _azure.AzureADTokenProvider | None = None
-
 
 class _ModuleClient(OpenAI):
     # Note: we have to use type: ignores here as overriding class members
@@ -240,33 +218,6 @@ def _client(self, value: _httpx.Client) -> None:  # type: ignore
         http_client = value
 
 
-class _AzureModuleClient(_ModuleClient, AzureOpenAI):  # type: ignore
-    ...
-
-
-class _AmbiguousModuleClientUsageError(OpenAIError):
-    def __init__(self) -> None:
-        super().__init__(
-            "Ambiguous use of module client; please set `openai.api_type` or the `OPENAI_API_TYPE` environment variable to `openai` or `azure`"
-        )
-
-
-def _has_openai_credentials() -> bool:
-    return _os.environ.get("OPENAI_API_KEY") is not None
-
-
-def _has_azure_credentials() -> bool:
-    return azure_endpoint is not None or _os.environ.get("AZURE_OPENAI_API_KEY") is not None
-
-
-def _has_azure_ad_credentials() -> bool:
-    return (
-        _os.environ.get("AZURE_OPENAI_AD_TOKEN") is not None
-        or azure_ad_token is not None
-        or azure_ad_token_provider is not None
-    )
-
-
 _client: OpenAI | None = None
 
 
@@ -274,52 +225,6 @@ def _load_client() -> OpenAI:  # type: ignore[reportUnusedFunction]
     global _client
 
     if _client is None:
-        global api_type, azure_endpoint, azure_ad_token, api_version
-
-        if azure_endpoint is None:
-            azure_endpoint = _os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-        if azure_ad_token is None:
-            azure_ad_token = _os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-        if api_version is None:
-            api_version = _os.environ.get("OPENAI_API_VERSION")
-
-        if api_type is None:
-            has_openai = _has_openai_credentials()
-            has_azure = _has_azure_credentials()
-            has_azure_ad = _has_azure_ad_credentials()
-
-            if has_openai and (has_azure or has_azure_ad):
-                raise _AmbiguousModuleClientUsageError()
-
-            if (azure_ad_token is not None or azure_ad_token_provider is not None) and _os.environ.get(
-                "AZURE_OPENAI_API_KEY"
-            ) is not None:
-                raise _AmbiguousModuleClientUsageError()
-
-            if has_azure or has_azure_ad:
-                api_type = "azure"
-            else:
-                api_type = "openai"
-
-        if api_type == "azure":
-            _client = _AzureModuleClient(  # type: ignore
-                api_version=api_version,
-                azure_endpoint=azure_endpoint,
-                api_key=api_key,
-                azure_ad_token=azure_ad_token,
-                azure_ad_token_provider=azure_ad_token_provider,
-                organization=organization,
-                base_url=base_url,
-                timeout=timeout,
-                max_retries=max_retries,
-                default_headers=default_headers,
-                default_query=default_query,
-                http_client=http_client,
-            )
-            return _client
-
         _client = _ModuleClient(
             api_key=api_key,
             organization=organization,
@@ -350,8 +255,11 @@ def _reset_client() -> None:  # type: ignore[reportUnusedFunction]
     images as images,
     models as models,
     batches as batches,
+    uploads as uploads,
+    responses as responses,
     embeddings as embeddings,
     completions as completions,
     fine_tuning as fine_tuning,
     moderations as moderations,
+    vector_stores as vector_stores,
 )
diff --git a/src/openai/__main__.py b/src/openai/__main__.py
deleted file mode 100644
index 4e28416e10..0000000000
--- a/src/openai/__main__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .cli import main
-
-main()
diff --git a/src/openai/_base_client.py b/src/openai/_base_client.py
index 5d5d25fca9..2fe1b61a18 100644
--- a/src/openai/_base_client.py
+++ b/src/openai/_base_client.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import sys
 import json
 import time
 import uuid
@@ -8,7 +9,6 @@
 import inspect
 import logging
 import platform
-import warnings
 import email.utils
 from types import TracebackType
 from random import random
@@ -35,7 +35,7 @@
 import httpx
 import distro
 import pydantic
-from httpx import URL, Limits
+from httpx import URL
 from pydantic import PrivateAttr
 
 from . import _exceptions
@@ -50,18 +50,16 @@
     Timeout,
     NotGiven,
     ResponseT,
-    Transport,
     AnyMapping,
     PostParser,
-    ProxiesTypes,
     RequestFiles,
     HttpxSendArgs,
-    AsyncTransport,
     RequestOptions,
+    HttpxRequestFiles,
     ModelBuilderProtocol,
 )
-from ._utils import is_dict, is_list, is_given, lru_cache, is_mapping
-from ._compat import model_copy, model_dump
+from ._utils import is_dict, is_list, asyncify, is_given, lru_cache, is_mapping
+from ._compat import PYDANTIC_V2, model_copy, model_dump
 from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
 from ._response import (
     APIResponse,
@@ -124,16 +122,14 @@ def __init__(
         self,
         *,
         url: URL,
-    ) -> None:
-        ...
+    ) -> None: ...
 
     @overload
     def __init__(
         self,
         *,
         params: Query,
-    ) -> None:
-        ...
+    ) -> None: ...
 
     def __init__(
         self,
@@ -144,6 +140,12 @@ def __init__(
         self.url = url
         self.params = params
 
+    @override
+    def __repr__(self) -> str:
+        if self.url:
+            return f"{self.__class__.__name__}(url={self.url})"
+        return f"{self.__class__.__name__}(params={self.params})"
+
 
 class BasePage(GenericModel, Generic[_T]):
     """
@@ -166,8 +168,7 @@ def has_next_page(self) -> bool:
             return False
         return self.next_page_info() is not None
 
-    def next_page_info(self) -> Optional[PageInfo]:
-        ...
+    def next_page_info(self) -> Optional[PageInfo]: ...
 
     def _get_page_items(self) -> Iterable[_T]:  # type: ignore[empty-body]
         ...
@@ -203,6 +204,9 @@ def _set_private_attributes(
         model: Type[_T],
         options: FinalRequestOptions,
     ) -> None:
+        if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None:
+            self.__pydantic_private__ = {}
+
         self._model = model
         self._client = client
         self._options = options
@@ -288,6 +292,9 @@ def _set_private_attributes(
         client: AsyncAPIClient,
         options: FinalRequestOptions,
     ) -> None:
+        if PYDANTIC_V2 and getattr(self, "__pydantic_private__", None) is None:
+            self.__pydantic_private__ = {}
+
         self._model = model
         self._client = client
         self._options = options
@@ -327,9 +334,6 @@ class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]):
     _base_url: URL
     max_retries: int
     timeout: Union[float, Timeout, None]
-    _limits: httpx.Limits
-    _proxies: ProxiesTypes | None
-    _transport: Transport | AsyncTransport | None
     _strict_response_validation: bool
     _idempotency_header: str | None
     _default_stream_cls: type[_DefaultStreamT] | None = None
@@ -342,9 +346,6 @@ def __init__(
         _strict_response_validation: bool,
         max_retries: int = DEFAULT_MAX_RETRIES,
         timeout: float | Timeout | None = DEFAULT_TIMEOUT,
-        limits: httpx.Limits,
-        transport: Transport | AsyncTransport | None,
-        proxies: ProxiesTypes | None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
     ) -> None:
@@ -352,13 +353,11 @@ def __init__(
         self._base_url = self._enforce_trailing_slash(URL(base_url))
         self.max_retries = max_retries
         self.timeout = timeout
-        self._limits = limits
-        self._proxies = proxies
-        self._transport = transport
         self._custom_headers = custom_headers or {}
         self._custom_query = custom_query or {}
         self._strict_response_validation = _strict_response_validation
         self._idempotency_header = None
+        self._platform: Platform | None = None
 
         if max_retries is None:  # pyright: ignore[reportUnnecessaryComparison]
             raise TypeError(
@@ -401,14 +400,7 @@ def _make_status_error(
     ) -> _exceptions.APIStatusError:
         raise NotImplementedError()
 
-    def _remaining_retries(
-        self,
-        remaining_retries: Optional[int],
-        options: FinalRequestOptions,
-    ) -> int:
-        return remaining_retries if remaining_retries is not None else options.get_max_retries(self.max_retries)
-
-    def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
+    def _build_headers(self, options: FinalRequestOptions, *, retries_taken: int = 0) -> httpx.Headers:
         custom_headers = options.headers or {}
         headers_dict = _merge_mappings(self.default_headers, custom_headers)
         self._validate_headers(headers_dict, custom_headers)
@@ -420,6 +412,18 @@ def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
         if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
             headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
 
+        # Don't set these headers if they were already set or removed by the caller. We check
+        # `custom_headers`, which can contain `Omit()`, instead of `headers` to account for the removal case.
+        lower_custom_headers = [header.lower() for header in custom_headers]
+        if "x-stainless-retry-count" not in lower_custom_headers:
+            headers["x-stainless-retry-count"] = str(retries_taken)
+        if "x-stainless-read-timeout" not in lower_custom_headers:
+            timeout = self.timeout if isinstance(options.timeout, NotGiven) else options.timeout
+            if isinstance(timeout, Timeout):
+                timeout = timeout.read
+            if timeout is not None:
+                headers["x-stainless-read-timeout"] = str(timeout)
+
         return headers
 
     def _prepare_url(https://github.com/self,%20url:%20str) -> URL:
@@ -441,6 +445,8 @@ def _make_sse_decoder(self) -> SSEDecoder | SSEBytesDecoder:
     def _build_request(
         self,
         options: FinalRequestOptions,
+        *,
+        retries_taken: int = 0,
     ) -> httpx.Request:
         if log.isEnabledFor(logging.DEBUG):
             log.debug("Request options: %s", model_dump(options, exclude_unset=True))
@@ -456,9 +462,10 @@ def _build_request(
             else:
                 raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
 
-        headers = self._build_headers(options)
-        params = _merge_mappings(self._custom_query, options.params)
+        headers = self._build_headers(options, retries_taken=retries_taken)
+        params = _merge_mappings(self.default_query, options.params)
         content_type = headers.get("Content-Type")
+        files = options.files
 
         # If the given Content-Type header is multipart/form-data then it
         # has to be removed so that httpx can generate the header with
@@ -472,7 +479,7 @@ def _build_request(
                 headers.pop("Content-Type")
 
             # As we are now sending multipart/form-data instead of application/json
-            # we need to tell httpx to use it, https://www.python-httpx.org/advanced/#multipart-file-encoding
+            # we need to tell httpx to use it, https://www.python-httpx.org/advanced/clients/#multipart-file-encoding
             if json_data:
                 if not is_dict(json_data):
                     raise TypeError(
@@ -480,19 +487,33 @@ def _build_request(
                     )
                 kwargs["data"] = self._serialize_multipartform(json_data)
 
+            # httpx determines whether or not to send a "multipart/form-data"
+            # request based on the truthiness of the "files" argument.
+            # This gets around that issue by generating a dict value that
+            # evaluates to true.
+            #
+            # https://github.com/encode/httpx/discussions/2399#discussioncomment-3814186
+            if not files:
+                files = cast(HttpxRequestFiles, ForceMultipartDict())
+
+        prepared_url = self._prepare_url(https://github.com/options.url)
+        if "_" in prepared_url.host:
+            # work around https://github.com/encode/httpx/discussions/2880
+            kwargs["extensions"] = {"sni_hostname": prepared_url.host.replace("_", "-")}
+
         # TODO: report this error to httpx
         return self._client.build_request(  # pyright: ignore[reportUnknownMemberType]
             headers=headers,
             timeout=self.timeout if isinstance(options.timeout, NotGiven) else options.timeout,
             method=options.method,
-            url=self._prepare_url(https://github.com/options.url),
+            url=prepared_url,
             # the `Query` type that we use is incompatible with qs'
             # `Params` type as it needs to be typed as `Mapping[str, object]`
             # so that passing a `TypedDict` doesn't cause an error.
             # https://github.com/microsoft/pyright/issues/3526#event-6715453066
             params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None,
-            json=json_data,
-            files=options.files,
+            json=json_data if is_given(json_data) else None,
+            files=files,
             **kwargs,
         )
 
@@ -593,6 +614,12 @@ def default_headers(self) -> dict[str, str | Omit]:
             **self._custom_headers,
         }
 
+    @property
+    def default_query(self) -> dict[str, object]:
+        return {
+            **self._custom_query,
+        }
+
     def _validate_headers(
         self,
         headers: Headers,  # noqa: ARG002
@@ -617,7 +644,10 @@ def base_url(https://github.com/self,%20url:%20URL%20|%20str) -> None:
         self._base_url = self._enforce_trailing_slash(url if isinstance(url, URL) else URL(url))
 
     def platform_headers(self) -> Dict[str, str]:
-        return platform_headers(self._version)
+        # the actual implementation is in a separate `lru_cache` decorated
+        # function because adding `lru_cache` to methods will leak memory
+        # https://github.com/python/cpython/issues/88476
+        return platform_headers(self._version, platform=self._platform)
 
     def _parse_retry_after_header(self, response_headers: Optional[httpx.Headers] = None) -> float | None:
         """Returns a float of the number of seconds (not milliseconds) to wait after retrying, or None if unspecified.
@@ -666,7 +696,8 @@ def _calculate_retry_timeout(
         if retry_after is not None and 0 < retry_after <= 60:
             return retry_after
 
-        nb_retries = max_retries - remaining_retries
+        # Also cap retry count to 1000 to avoid any potential overflows with `pow`
+        nb_retries = min(max_retries - remaining_retries, 1000)
 
         # Apply exponential backoff, but not more than the max.
         sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY)
@@ -737,6 +768,9 @@ def __init__(self, **kwargs: Any) -> None:
 
 class SyncHttpxClientWrapper(DefaultHttpxClient):
     def __del__(self) -> None:
+        if self.is_closed:
+            return
+
         try:
             self.close()
         except Exception:
@@ -754,43 +788,11 @@ def __init__(
         base_url: str | URL,
         max_retries: int = DEFAULT_MAX_RETRIES,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        transport: Transport | None = None,
-        proxies: ProxiesTypes | None = None,
-        limits: Limits | None = None,
         http_client: httpx.Client | None = None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
         _strict_response_validation: bool,
     ) -> None:
-        if limits is not None:
-            warnings.warn(
-                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
-        else:
-            limits = DEFAULT_CONNECTION_LIMITS
-
-        if transport is not None:
-            warnings.warn(
-                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
-        if proxies is not None:
-            warnings.warn(
-                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
         if not is_given(timeout):
             # if the user passed in a custom http client with a non-default
             # timeout set then we use that timeout.
@@ -811,12 +813,9 @@ def __init__(
 
         super().__init__(
             version=version,
-            limits=limits,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
             base_url=base_url,
-            transport=transport,
             max_retries=max_retries,
             custom_query=custom_query,
             custom_headers=custom_headers,
@@ -826,10 +825,6 @@ def __init__(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
-            limits=limits,
-            follow_redirects=True,
         )
 
     def is_closed(self) -> bool:
@@ -859,9 +854,9 @@ def __exit__(
     def _prepare_options(
         self,
         options: FinalRequestOptions,  # noqa: ARG002
-    ) -> None:
+    ) -> FinalRequestOptions:
         """Hook for mutating the given options"""
-        return None
+        return options
 
     def _prepare_request(
         self,
@@ -883,8 +878,7 @@ def request(
         *,
         stream: Literal[True],
         stream_cls: Type[_StreamT],
-    ) -> _StreamT:
-        ...
+    ) -> _StreamT: ...
 
     @overload
     def request(
@@ -894,8 +888,7 @@ def request(
         remaining_retries: Optional[int] = None,
         *,
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     def request(
@@ -906,8 +899,7 @@ def request(
         *,
         stream: bool = False,
         stream_cls: Type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
+    ) -> ResponseT | _StreamT: ...
 
     def request(
         self,
@@ -918,12 +910,17 @@ def request(
         stream: bool = False,
         stream_cls: type[_StreamT] | None = None,
     ) -> ResponseT | _StreamT:
+        if remaining_retries is not None:
+            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
+        else:
+            retries_taken = 0
+
         return self._request(
             cast_to=cast_to,
             options=options,
             stream=stream,
             stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
+            retries_taken=retries_taken,
         )
 
     def _request(
@@ -931,15 +928,20 @@ def _request(
         *,
         cast_to: Type[ResponseT],
         options: FinalRequestOptions,
-        remaining_retries: int | None,
+        retries_taken: int,
         stream: bool,
         stream_cls: type[_StreamT] | None,
     ) -> ResponseT | _StreamT:
+        # create a copy of the options we were given so that if the
+        # options are mutated later & we then retry, the retries are
+        # given the original options
+        input_options = model_copy(options)
+
         cast_to = self._maybe_override_cast_to(cast_to, options)
-        self._prepare_options(options)
+        options = self._prepare_options(options)
 
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        request = self._build_request(options, retries_taken=retries_taken)
         self._prepare_request(request)
 
         kwargs: HttpxSendArgs = {}
@@ -957,11 +959,11 @@ def _request(
         except httpx.TimeoutException as err:
             log.debug("Encountered httpx.TimeoutException", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -972,11 +974,11 @@ def _request(
         except Exception as err:
             log.debug("Encountered Exception", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -1000,13 +1002,13 @@ def _request(
         except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
             log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
 
-            if retries > 0 and self._should_retry(err.response):
+            if remaining_retries > 0 and self._should_retry(err.response):
                 err.response.close()
                 return self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
-                    retries,
-                    err.response.headers,
+                    retries_taken=retries_taken,
+                    response_headers=err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
                 )
@@ -1025,25 +1027,26 @@ def _request(
             response=response,
             stream=stream,
             stream_cls=stream_cls,
+            retries_taken=retries_taken,
         )
 
     def _retry_request(
         self,
         options: FinalRequestOptions,
         cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: httpx.Headers | None,
         *,
+        retries_taken: int,
+        response_headers: httpx.Headers | None,
         stream: bool,
         stream_cls: type[_StreamT] | None,
     ) -> ResponseT | _StreamT:
-        remaining = remaining_retries - 1
-        if remaining == 1:
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        if remaining_retries == 1:
             log.debug("1 retry left")
         else:
-            log.debug("%i retries left", remaining)
+            log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
         # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
@@ -1053,7 +1056,7 @@ def _retry_request(
         return self._request(
             options=options,
             cast_to=cast_to,
-            remaining_retries=remaining,
+            retries_taken=retries_taken + 1,
             stream=stream,
             stream_cls=stream_cls,
         )
@@ -1066,6 +1069,7 @@ def _process_response(
         response: httpx.Response,
         stream: bool,
         stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        retries_taken: int = 0,
     ) -> ResponseT:
         if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
             return cast(
@@ -1077,6 +1081,7 @@ def _process_response(
                     stream=stream,
                     stream_cls=stream_cls,
                     options=options,
+                    retries_taken=retries_taken,
                 ),
             )
 
@@ -1096,6 +1101,7 @@ def _process_response(
                     stream=stream,
                     stream_cls=stream_cls,
                     options=options,
+                    retries_taken=retries_taken,
                 ),
             )
 
@@ -1109,6 +1115,7 @@ def _process_response(
             stream=stream,
             stream_cls=stream_cls,
             options=options,
+            retries_taken=retries_taken,
         )
         if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
             return cast(ResponseT, api_response)
@@ -1141,8 +1148,7 @@ def get(
         cast_to: Type[ResponseT],
         options: RequestOptions = {},
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     def get(
@@ -1153,8 +1159,7 @@ def get(
         options: RequestOptions = {},
         stream: Literal[True],
         stream_cls: type[_StreamT],
-    ) -> _StreamT:
-        ...
+    ) -> _StreamT: ...
 
     @overload
     def get(
@@ -1165,8 +1170,7 @@ def get(
         options: RequestOptions = {},
         stream: bool,
         stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
+    ) -> ResponseT | _StreamT: ...
 
     def get(
         self,
@@ -1192,8 +1196,7 @@ def post(
         options: RequestOptions = {},
         files: RequestFiles | None = None,
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     def post(
@@ -1206,8 +1209,7 @@ def post(
         files: RequestFiles | None = None,
         stream: Literal[True],
         stream_cls: type[_StreamT],
-    ) -> _StreamT:
-        ...
+    ) -> _StreamT: ...
 
     @overload
     def post(
@@ -1220,8 +1222,7 @@ def post(
         files: RequestFiles | None = None,
         stream: bool,
         stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
+    ) -> ResponseT | _StreamT: ...
 
     def post(
         self,
@@ -1311,6 +1312,9 @@ def __init__(self, **kwargs: Any) -> None:
 
 class AsyncHttpxClientWrapper(DefaultAsyncHttpxClient):
     def __del__(self) -> None:
+        if self.is_closed:
+            return
+
         try:
             # TODO(someday): support non asyncio runtimes here
             asyncio.get_running_loop().create_task(self.aclose())
@@ -1330,42 +1334,10 @@ def __init__(
         _strict_response_validation: bool,
         max_retries: int = DEFAULT_MAX_RETRIES,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        transport: AsyncTransport | None = None,
-        proxies: ProxiesTypes | None = None,
-        limits: Limits | None = None,
         http_client: httpx.AsyncClient | None = None,
         custom_headers: Mapping[str, str] | None = None,
         custom_query: Mapping[str, object] | None = None,
     ) -> None:
-        if limits is not None:
-            warnings.warn(
-                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
-        else:
-            limits = DEFAULT_CONNECTION_LIMITS
-
-        if transport is not None:
-            warnings.warn(
-                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
-        if proxies is not None:
-            warnings.warn(
-                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
         if not is_given(timeout):
             # if the user passed in a custom http client with a non-default
             # timeout set then we use that timeout.
@@ -1387,11 +1359,8 @@ def __init__(
         super().__init__(
             version=version,
             base_url=base_url,
-            limits=limits,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
             max_retries=max_retries,
             custom_query=custom_query,
             custom_headers=custom_headers,
@@ -1401,10 +1370,6 @@ def __init__(
             base_url=base_url,
             # cast to a valid type because mypy doesn't understand our type narrowing
             timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
-            limits=limits,
-            follow_redirects=True,
         )
 
     def is_closed(self) -> bool:
@@ -1431,9 +1396,9 @@ async def __aexit__(
     async def _prepare_options(
         self,
         options: FinalRequestOptions,  # noqa: ARG002
-    ) -> None:
+    ) -> FinalRequestOptions:
         """Hook for mutating the given options"""
-        return None
+        return options
 
     async def _prepare_request(
         self,
@@ -1454,8 +1419,7 @@ async def request(
         *,
         stream: Literal[False] = False,
         remaining_retries: Optional[int] = None,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     async def request(
@@ -1466,8 +1430,7 @@ async def request(
         stream: Literal[True],
         stream_cls: type[_AsyncStreamT],
         remaining_retries: Optional[int] = None,
-    ) -> _AsyncStreamT:
-        ...
+    ) -> _AsyncStreamT: ...
 
     @overload
     async def request(
@@ -1478,8 +1441,7 @@ async def request(
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None = None,
         remaining_retries: Optional[int] = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
+    ) -> ResponseT | _AsyncStreamT: ...
 
     async def request(
         self,
@@ -1490,12 +1452,17 @@ async def request(
         stream_cls: type[_AsyncStreamT] | None = None,
         remaining_retries: Optional[int] = None,
     ) -> ResponseT | _AsyncStreamT:
+        if remaining_retries is not None:
+            retries_taken = options.get_max_retries(self.max_retries) - remaining_retries
+        else:
+            retries_taken = 0
+
         return await self._request(
             cast_to=cast_to,
             options=options,
             stream=stream,
             stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
+            retries_taken=retries_taken,
         )
 
     async def _request(
@@ -1505,13 +1472,23 @@ async def _request(
         *,
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None,
-        remaining_retries: int | None,
+        retries_taken: int,
     ) -> ResponseT | _AsyncStreamT:
+        if self._platform is None:
+            # `get_platform` can make blocking IO calls so we
+            # execute it earlier while we are in an async context
+            self._platform = await asyncify(get_platform)()
+
+        # create a copy of the options we were given so that if the
+        # options are mutated later & we then retry, the retries are
+        # given the original options
+        input_options = model_copy(options)
+
         cast_to = self._maybe_override_cast_to(cast_to, options)
-        await self._prepare_options(options)
+        options = await self._prepare_options(options)
 
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        request = self._build_request(options, retries_taken=retries_taken)
         await self._prepare_request(request)
 
         kwargs: HttpxSendArgs = {}
@@ -1527,11 +1504,11 @@ async def _request(
         except httpx.TimeoutException as err:
             log.debug("Encountered httpx.TimeoutException", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return await self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -1542,11 +1519,11 @@ async def _request(
         except Exception as err:
             log.debug("Encountered Exception", exc_info=True)
 
-            if retries > 0:
+            if remaining_retries > 0:
                 return await self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
-                    retries,
+                    retries_taken=retries_taken,
                     stream=stream,
                     stream_cls=stream_cls,
                     response_headers=None,
@@ -1564,13 +1541,13 @@ async def _request(
         except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
             log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
 
-            if retries > 0 and self._should_retry(err.response):
+            if remaining_retries > 0 and self._should_retry(err.response):
                 await err.response.aclose()
                 return await self._retry_request(
-                    options,
+                    input_options,
                     cast_to,
-                    retries,
-                    err.response.headers,
+                    retries_taken=retries_taken,
+                    response_headers=err.response.headers,
                     stream=stream,
                     stream_cls=stream_cls,
                 )
@@ -1589,25 +1566,26 @@ async def _request(
             response=response,
             stream=stream,
             stream_cls=stream_cls,
+            retries_taken=retries_taken,
         )
 
     async def _retry_request(
         self,
         options: FinalRequestOptions,
         cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: httpx.Headers | None,
         *,
+        retries_taken: int,
+        response_headers: httpx.Headers | None,
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None,
     ) -> ResponseT | _AsyncStreamT:
-        remaining = remaining_retries - 1
-        if remaining == 1:
+        remaining_retries = options.get_max_retries(self.max_retries) - retries_taken
+        if remaining_retries == 1:
             log.debug("1 retry left")
         else:
-            log.debug("%i retries left", remaining)
+            log.debug("%i retries left", remaining_retries)
 
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
+        timeout = self._calculate_retry_timeout(remaining_retries, options, response_headers)
         log.info("Retrying request to %s in %f seconds", options.url, timeout)
 
         await anyio.sleep(timeout)
@@ -1615,7 +1593,7 @@ async def _retry_request(
         return await self._request(
             options=options,
             cast_to=cast_to,
-            remaining_retries=remaining,
+            retries_taken=retries_taken + 1,
             stream=stream,
             stream_cls=stream_cls,
         )
@@ -1628,6 +1606,7 @@ async def _process_response(
         response: httpx.Response,
         stream: bool,
         stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
+        retries_taken: int = 0,
     ) -> ResponseT:
         if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
             return cast(
@@ -1639,6 +1618,7 @@ async def _process_response(
                     stream=stream,
                     stream_cls=stream_cls,
                     options=options,
+                    retries_taken=retries_taken,
                 ),
             )
 
@@ -1658,6 +1638,7 @@ async def _process_response(
                     stream=stream,
                     stream_cls=stream_cls,
                     options=options,
+                    retries_taken=retries_taken,
                 ),
             )
 
@@ -1671,6 +1652,7 @@ async def _process_response(
             stream=stream,
             stream_cls=stream_cls,
             options=options,
+            retries_taken=retries_taken,
         )
         if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
             return cast(ResponseT, api_response)
@@ -1693,8 +1675,7 @@ async def get(
         cast_to: Type[ResponseT],
         options: RequestOptions = {},
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     async def get(
@@ -1705,8 +1686,7 @@ async def get(
         options: RequestOptions = {},
         stream: Literal[True],
         stream_cls: type[_AsyncStreamT],
-    ) -> _AsyncStreamT:
-        ...
+    ) -> _AsyncStreamT: ...
 
     @overload
     async def get(
@@ -1717,8 +1697,7 @@ async def get(
         options: RequestOptions = {},
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
+    ) -> ResponseT | _AsyncStreamT: ...
 
     async def get(
         self,
@@ -1742,8 +1721,7 @@ async def post(
         files: RequestFiles | None = None,
         options: RequestOptions = {},
         stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
+    ) -> ResponseT: ...
 
     @overload
     async def post(
@@ -1756,8 +1734,7 @@ async def post(
         options: RequestOptions = {},
         stream: Literal[True],
         stream_cls: type[_AsyncStreamT],
-    ) -> _AsyncStreamT:
-        ...
+    ) -> _AsyncStreamT: ...
 
     @overload
     async def post(
@@ -1770,8 +1747,7 @@ async def post(
         options: RequestOptions = {},
         stream: bool,
         stream_cls: type[_AsyncStreamT] | None = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
+    ) -> ResponseT | _AsyncStreamT: ...
 
     async def post(
         self,
@@ -1876,6 +1852,11 @@ def make_request_options(
     return options
 
 
+class ForceMultipartDict(Dict[str, None]):
+    def __bool__(self) -> bool:
+        return True
+
+
 class OtherPlatform:
     def __init__(self, name: str) -> None:
         self.name = name
@@ -1943,11 +1924,11 @@ def get_platform() -> Platform:
 
 
 @lru_cache(maxsize=None)
-def platform_headers(version: str) -> Dict[str, str]:
+def platform_headers(version: str, *, platform: Platform | None) -> Dict[str, str]:
     return {
         "X-Stainless-Lang": "python",
         "X-Stainless-Package-Version": version,
-        "X-Stainless-OS": str(get_platform()),
+        "X-Stainless-OS": str(platform or get_platform()),
         "X-Stainless-Arch": str(get_architecture()),
         "X-Stainless-Runtime": get_python_runtime(),
         "X-Stainless-Runtime-Version": get_python_version(),
@@ -1982,7 +1963,6 @@ def get_python_version() -> str:
 
 def get_architecture() -> Arch:
     try:
-        python_bitness, _ = platform.architecture()
         machine = platform.machine().lower()
     except Exception:
         return "unknown"
@@ -1998,7 +1978,7 @@ def get_architecture() -> Arch:
         return "x64"
 
     # TODO: untested
-    if python_bitness == "32bit":
+    if sys.maxsize <= 2**32:
         return "x32"
 
     if machine:
diff --git a/src/openai/_client.py b/src/openai/_client.py
index 8f3060c6f6..18d96da9a3 100644
--- a/src/openai/_client.py
+++ b/src/openai/_client.py
@@ -8,7 +8,7 @@
 
 import httpx
 
-from . import resources, _exceptions
+from . import _exceptions
 from ._qs import Querystring
 from ._types import (
     NOT_GIVEN,
@@ -25,6 +25,7 @@
     get_async_library,
 )
 from ._version import __version__
+from .resources import files, images, models, batches, embeddings, completions, moderations
 from ._streaming import Stream as Stream, AsyncStream as AsyncStream
 from ._exceptions import OpenAIError, APIStatusError
 from ._base_client import (
@@ -32,32 +33,32 @@
     SyncAPIClient,
     AsyncAPIClient,
 )
+from .resources.beta import beta
+from .resources.chat import chat
+from .resources.audio import audio
+from .resources.uploads import uploads
+from .resources.responses import responses
+from .resources.fine_tuning import fine_tuning
+from .resources.vector_stores import vector_stores
 
-__all__ = [
-    "Timeout",
-    "Transport",
-    "ProxiesTypes",
-    "RequestOptions",
-    "resources",
-    "OpenAI",
-    "AsyncOpenAI",
-    "Client",
-    "AsyncClient",
-]
+__all__ = ["Timeout", "Transport", "ProxiesTypes", "RequestOptions", "OpenAI", "AsyncOpenAI", "Client", "AsyncClient"]
 
 
 class OpenAI(SyncAPIClient):
-    completions: resources.Completions
-    chat: resources.Chat
-    embeddings: resources.Embeddings
-    files: resources.Files
-    images: resources.Images
-    audio: resources.Audio
-    moderations: resources.Moderations
-    models: resources.Models
-    fine_tuning: resources.FineTuning
-    beta: resources.Beta
-    batches: resources.Batches
+    completions: completions.Completions
+    chat: chat.Chat
+    embeddings: embeddings.Embeddings
+    files: files.Files
+    images: images.Images
+    audio: audio.Audio
+    moderations: moderations.Moderations
+    models: models.Models
+    fine_tuning: fine_tuning.FineTuning
+    vector_stores: vector_stores.VectorStores
+    beta: beta.Beta
+    batches: batches.Batches
+    uploads: uploads.Uploads
+    responses: responses.Responses
     with_raw_response: OpenAIWithRawResponse
     with_streaming_response: OpenAIWithStreamedResponse
 
@@ -66,6 +67,14 @@ class OpenAI(SyncAPIClient):
     organization: str | None
     project: str | None
 
+    websocket_base_url: str | httpx.URL | None
+    """Base URL for WebSocket connections.
+
+    If not specified, the default base URL will be used, with 'wss://' replacing the
+    'http://' or 'https://' scheme. For example: 'http://example.com/' becomes
+    'wss://example.com'
+    """
+
     def __init__(
         self,
         *,
@@ -73,6 +82,7 @@ def __init__(
         organization: str | None = None,
         project: str | None = None,
         base_url: str | httpx.URL | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -91,7 +101,7 @@ def __init__(
         # part of our public interface in the future.
         _strict_response_validation: bool = False,
     ) -> None:
-        """Construct a new synchronous openai client instance.
+        """Construct a new synchronous OpenAI client instance.
 
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `OPENAI_API_KEY`
@@ -114,6 +124,8 @@ def __init__(
             project = os.environ.get("OPENAI_PROJECT_ID")
         self.project = project
 
+        self.websocket_base_url = websocket_base_url
+
         if base_url is None:
             base_url = os.environ.get("OPENAI_BASE_URL")
         if base_url is None:
@@ -132,24 +144,27 @@ def __init__(
 
         self._default_stream_cls = Stream
 
-        self.completions = resources.Completions(self)
-        self.chat = resources.Chat(self)
-        self.embeddings = resources.Embeddings(self)
-        self.files = resources.Files(self)
-        self.images = resources.Images(self)
-        self.audio = resources.Audio(self)
-        self.moderations = resources.Moderations(self)
-        self.models = resources.Models(self)
-        self.fine_tuning = resources.FineTuning(self)
-        self.beta = resources.Beta(self)
-        self.batches = resources.Batches(self)
+        self.completions = completions.Completions(self)
+        self.chat = chat.Chat(self)
+        self.embeddings = embeddings.Embeddings(self)
+        self.files = files.Files(self)
+        self.images = images.Images(self)
+        self.audio = audio.Audio(self)
+        self.moderations = moderations.Moderations(self)
+        self.models = models.Models(self)
+        self.fine_tuning = fine_tuning.FineTuning(self)
+        self.vector_stores = vector_stores.VectorStores(self)
+        self.beta = beta.Beta(self)
+        self.batches = batches.Batches(self)
+        self.uploads = uploads.Uploads(self)
+        self.responses = responses.Responses(self)
         self.with_raw_response = OpenAIWithRawResponse(self)
         self.with_streaming_response = OpenAIWithStreamedResponse(self)
 
     @property
     @override
     def qs(self) -> Querystring:
-        return Querystring(array_format="comma")
+        return Querystring(array_format="brackets")
 
     @property
     @override
@@ -174,6 +189,7 @@ def copy(
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.Client | None = None,
@@ -210,6 +226,7 @@ def copy(
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
             project=project or self.project,
+            websocket_base_url=websocket_base_url or self.websocket_base_url,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
@@ -259,17 +276,20 @@ def _make_status_error(
 
 
 class AsyncOpenAI(AsyncAPIClient):
-    completions: resources.AsyncCompletions
-    chat: resources.AsyncChat
-    embeddings: resources.AsyncEmbeddings
-    files: resources.AsyncFiles
-    images: resources.AsyncImages
-    audio: resources.AsyncAudio
-    moderations: resources.AsyncModerations
-    models: resources.AsyncModels
-    fine_tuning: resources.AsyncFineTuning
-    beta: resources.AsyncBeta
-    batches: resources.AsyncBatches
+    completions: completions.AsyncCompletions
+    chat: chat.AsyncChat
+    embeddings: embeddings.AsyncEmbeddings
+    files: files.AsyncFiles
+    images: images.AsyncImages
+    audio: audio.AsyncAudio
+    moderations: moderations.AsyncModerations
+    models: models.AsyncModels
+    fine_tuning: fine_tuning.AsyncFineTuning
+    vector_stores: vector_stores.AsyncVectorStores
+    beta: beta.AsyncBeta
+    batches: batches.AsyncBatches
+    uploads: uploads.AsyncUploads
+    responses: responses.AsyncResponses
     with_raw_response: AsyncOpenAIWithRawResponse
     with_streaming_response: AsyncOpenAIWithStreamedResponse
 
@@ -278,6 +298,14 @@ class AsyncOpenAI(AsyncAPIClient):
     organization: str | None
     project: str | None
 
+    websocket_base_url: str | httpx.URL | None
+    """Base URL for WebSocket connections.
+
+    If not specified, the default base URL will be used, with 'wss://' replacing the
+    'http://' or 'https://' scheme. For example: 'http://example.com/' becomes
+    'wss://example.com'
+    """
+
     def __init__(
         self,
         *,
@@ -285,6 +313,7 @@ def __init__(
         organization: str | None = None,
         project: str | None = None,
         base_url: str | httpx.URL | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
         max_retries: int = DEFAULT_MAX_RETRIES,
         default_headers: Mapping[str, str] | None = None,
@@ -303,7 +332,7 @@ def __init__(
         # part of our public interface in the future.
         _strict_response_validation: bool = False,
     ) -> None:
-        """Construct a new async openai client instance.
+        """Construct a new async AsyncOpenAI client instance.
 
         This automatically infers the following arguments from their corresponding environment variables if they are not provided:
         - `api_key` from `OPENAI_API_KEY`
@@ -326,6 +355,8 @@ def __init__(
             project = os.environ.get("OPENAI_PROJECT_ID")
         self.project = project
 
+        self.websocket_base_url = websocket_base_url
+
         if base_url is None:
             base_url = os.environ.get("OPENAI_BASE_URL")
         if base_url is None:
@@ -344,24 +375,27 @@ def __init__(
 
         self._default_stream_cls = AsyncStream
 
-        self.completions = resources.AsyncCompletions(self)
-        self.chat = resources.AsyncChat(self)
-        self.embeddings = resources.AsyncEmbeddings(self)
-        self.files = resources.AsyncFiles(self)
-        self.images = resources.AsyncImages(self)
-        self.audio = resources.AsyncAudio(self)
-        self.moderations = resources.AsyncModerations(self)
-        self.models = resources.AsyncModels(self)
-        self.fine_tuning = resources.AsyncFineTuning(self)
-        self.beta = resources.AsyncBeta(self)
-        self.batches = resources.AsyncBatches(self)
+        self.completions = completions.AsyncCompletions(self)
+        self.chat = chat.AsyncChat(self)
+        self.embeddings = embeddings.AsyncEmbeddings(self)
+        self.files = files.AsyncFiles(self)
+        self.images = images.AsyncImages(self)
+        self.audio = audio.AsyncAudio(self)
+        self.moderations = moderations.AsyncModerations(self)
+        self.models = models.AsyncModels(self)
+        self.fine_tuning = fine_tuning.AsyncFineTuning(self)
+        self.vector_stores = vector_stores.AsyncVectorStores(self)
+        self.beta = beta.AsyncBeta(self)
+        self.batches = batches.AsyncBatches(self)
+        self.uploads = uploads.AsyncUploads(self)
+        self.responses = responses.AsyncResponses(self)
         self.with_raw_response = AsyncOpenAIWithRawResponse(self)
         self.with_streaming_response = AsyncOpenAIWithStreamedResponse(self)
 
     @property
     @override
     def qs(self) -> Querystring:
-        return Querystring(array_format="comma")
+        return Querystring(array_format="brackets")
 
     @property
     @override
@@ -386,6 +420,7 @@ def copy(
         api_key: str | None = None,
         organization: str | None = None,
         project: str | None = None,
+        websocket_base_url: str | httpx.URL | None = None,
         base_url: str | httpx.URL | None = None,
         timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
         http_client: httpx.AsyncClient | None = None,
@@ -422,6 +457,7 @@ def copy(
             api_key=api_key or self.api_key,
             organization=organization or self.organization,
             project=project or self.project,
+            websocket_base_url=websocket_base_url or self.websocket_base_url,
             base_url=base_url or self.base_url,
             timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
             http_client=http_client,
@@ -472,62 +508,74 @@ def _make_status_error(
 
 class OpenAIWithRawResponse:
     def __init__(self, client: OpenAI) -> None:
-        self.completions = resources.CompletionsWithRawResponse(client.completions)
-        self.chat = resources.ChatWithRawResponse(client.chat)
-        self.embeddings = resources.EmbeddingsWithRawResponse(client.embeddings)
-        self.files = resources.FilesWithRawResponse(client.files)
-        self.images = resources.ImagesWithRawResponse(client.images)
-        self.audio = resources.AudioWithRawResponse(client.audio)
-        self.moderations = resources.ModerationsWithRawResponse(client.moderations)
-        self.models = resources.ModelsWithRawResponse(client.models)
-        self.fine_tuning = resources.FineTuningWithRawResponse(client.fine_tuning)
-        self.beta = resources.BetaWithRawResponse(client.beta)
-        self.batches = resources.BatchesWithRawResponse(client.batches)
+        self.completions = completions.CompletionsWithRawResponse(client.completions)
+        self.chat = chat.ChatWithRawResponse(client.chat)
+        self.embeddings = embeddings.EmbeddingsWithRawResponse(client.embeddings)
+        self.files = files.FilesWithRawResponse(client.files)
+        self.images = images.ImagesWithRawResponse(client.images)
+        self.audio = audio.AudioWithRawResponse(client.audio)
+        self.moderations = moderations.ModerationsWithRawResponse(client.moderations)
+        self.models = models.ModelsWithRawResponse(client.models)
+        self.fine_tuning = fine_tuning.FineTuningWithRawResponse(client.fine_tuning)
+        self.vector_stores = vector_stores.VectorStoresWithRawResponse(client.vector_stores)
+        self.beta = beta.BetaWithRawResponse(client.beta)
+        self.batches = batches.BatchesWithRawResponse(client.batches)
+        self.uploads = uploads.UploadsWithRawResponse(client.uploads)
+        self.responses = responses.ResponsesWithRawResponse(client.responses)
 
 
 class AsyncOpenAIWithRawResponse:
     def __init__(self, client: AsyncOpenAI) -> None:
-        self.completions = resources.AsyncCompletionsWithRawResponse(client.completions)
-        self.chat = resources.AsyncChatWithRawResponse(client.chat)
-        self.embeddings = resources.AsyncEmbeddingsWithRawResponse(client.embeddings)
-        self.files = resources.AsyncFilesWithRawResponse(client.files)
-        self.images = resources.AsyncImagesWithRawResponse(client.images)
-        self.audio = resources.AsyncAudioWithRawResponse(client.audio)
-        self.moderations = resources.AsyncModerationsWithRawResponse(client.moderations)
-        self.models = resources.AsyncModelsWithRawResponse(client.models)
-        self.fine_tuning = resources.AsyncFineTuningWithRawResponse(client.fine_tuning)
-        self.beta = resources.AsyncBetaWithRawResponse(client.beta)
-        self.batches = resources.AsyncBatchesWithRawResponse(client.batches)
+        self.completions = completions.AsyncCompletionsWithRawResponse(client.completions)
+        self.chat = chat.AsyncChatWithRawResponse(client.chat)
+        self.embeddings = embeddings.AsyncEmbeddingsWithRawResponse(client.embeddings)
+        self.files = files.AsyncFilesWithRawResponse(client.files)
+        self.images = images.AsyncImagesWithRawResponse(client.images)
+        self.audio = audio.AsyncAudioWithRawResponse(client.audio)
+        self.moderations = moderations.AsyncModerationsWithRawResponse(client.moderations)
+        self.models = models.AsyncModelsWithRawResponse(client.models)
+        self.fine_tuning = fine_tuning.AsyncFineTuningWithRawResponse(client.fine_tuning)
+        self.vector_stores = vector_stores.AsyncVectorStoresWithRawResponse(client.vector_stores)
+        self.beta = beta.AsyncBetaWithRawResponse(client.beta)
+        self.batches = batches.AsyncBatchesWithRawResponse(client.batches)
+        self.uploads = uploads.AsyncUploadsWithRawResponse(client.uploads)
+        self.responses = responses.AsyncResponsesWithRawResponse(client.responses)
 
 
 class OpenAIWithStreamedResponse:
     def __init__(self, client: OpenAI) -> None:
-        self.completions = resources.CompletionsWithStreamingResponse(client.completions)
-        self.chat = resources.ChatWithStreamingResponse(client.chat)
-        self.embeddings = resources.EmbeddingsWithStreamingResponse(client.embeddings)
-        self.files = resources.FilesWithStreamingResponse(client.files)
-        self.images = resources.ImagesWithStreamingResponse(client.images)
-        self.audio = resources.AudioWithStreamingResponse(client.audio)
-        self.moderations = resources.ModerationsWithStreamingResponse(client.moderations)
-        self.models = resources.ModelsWithStreamingResponse(client.models)
-        self.fine_tuning = resources.FineTuningWithStreamingResponse(client.fine_tuning)
-        self.beta = resources.BetaWithStreamingResponse(client.beta)
-        self.batches = resources.BatchesWithStreamingResponse(client.batches)
+        self.completions = completions.CompletionsWithStreamingResponse(client.completions)
+        self.chat = chat.ChatWithStreamingResponse(client.chat)
+        self.embeddings = embeddings.EmbeddingsWithStreamingResponse(client.embeddings)
+        self.files = files.FilesWithStreamingResponse(client.files)
+        self.images = images.ImagesWithStreamingResponse(client.images)
+        self.audio = audio.AudioWithStreamingResponse(client.audio)
+        self.moderations = moderations.ModerationsWithStreamingResponse(client.moderations)
+        self.models = models.ModelsWithStreamingResponse(client.models)
+        self.fine_tuning = fine_tuning.FineTuningWithStreamingResponse(client.fine_tuning)
+        self.vector_stores = vector_stores.VectorStoresWithStreamingResponse(client.vector_stores)
+        self.beta = beta.BetaWithStreamingResponse(client.beta)
+        self.batches = batches.BatchesWithStreamingResponse(client.batches)
+        self.uploads = uploads.UploadsWithStreamingResponse(client.uploads)
+        self.responses = responses.ResponsesWithStreamingResponse(client.responses)
 
 
 class AsyncOpenAIWithStreamedResponse:
     def __init__(self, client: AsyncOpenAI) -> None:
-        self.completions = resources.AsyncCompletionsWithStreamingResponse(client.completions)
-        self.chat = resources.AsyncChatWithStreamingResponse(client.chat)
-        self.embeddings = resources.AsyncEmbeddingsWithStreamingResponse(client.embeddings)
-        self.files = resources.AsyncFilesWithStreamingResponse(client.files)
-        self.images = resources.AsyncImagesWithStreamingResponse(client.images)
-        self.audio = resources.AsyncAudioWithStreamingResponse(client.audio)
-        self.moderations = resources.AsyncModerationsWithStreamingResponse(client.moderations)
-        self.models = resources.AsyncModelsWithStreamingResponse(client.models)
-        self.fine_tuning = resources.AsyncFineTuningWithStreamingResponse(client.fine_tuning)
-        self.beta = resources.AsyncBetaWithStreamingResponse(client.beta)
-        self.batches = resources.AsyncBatchesWithStreamingResponse(client.batches)
+        self.completions = completions.AsyncCompletionsWithStreamingResponse(client.completions)
+        self.chat = chat.AsyncChatWithStreamingResponse(client.chat)
+        self.embeddings = embeddings.AsyncEmbeddingsWithStreamingResponse(client.embeddings)
+        self.files = files.AsyncFilesWithStreamingResponse(client.files)
+        self.images = images.AsyncImagesWithStreamingResponse(client.images)
+        self.audio = audio.AsyncAudioWithStreamingResponse(client.audio)
+        self.moderations = moderations.AsyncModerationsWithStreamingResponse(client.moderations)
+        self.models = models.AsyncModelsWithStreamingResponse(client.models)
+        self.fine_tuning = fine_tuning.AsyncFineTuningWithStreamingResponse(client.fine_tuning)
+        self.vector_stores = vector_stores.AsyncVectorStoresWithStreamingResponse(client.vector_stores)
+        self.beta = beta.AsyncBetaWithStreamingResponse(client.beta)
+        self.batches = batches.AsyncBatchesWithStreamingResponse(client.batches)
+        self.uploads = uploads.AsyncUploadsWithStreamingResponse(client.uploads)
+        self.responses = responses.AsyncResponsesWithStreamingResponse(client.responses)
 
 
 Client = OpenAI
diff --git a/src/openai/_compat.py b/src/openai/_compat.py
index 74c7639b4c..92d9ee61ee 100644
--- a/src/openai/_compat.py
+++ b/src/openai/_compat.py
@@ -2,12 +2,12 @@
 
 from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, cast, overload
 from datetime import date, datetime
-from typing_extensions import Self
+from typing_extensions import Self, Literal
 
 import pydantic
 from pydantic.fields import FieldInfo
 
-from ._types import StrBytesIntFloat
+from ._types import IncEx, StrBytesIntFloat
 
 _T = TypeVar("_T")
 _ModelT = TypeVar("_ModelT", bound=pydantic.BaseModel)
@@ -118,10 +118,10 @@ def get_model_fields(model: type[pydantic.BaseModel]) -> dict[str, FieldInfo]:
     return model.__fields__  # type: ignore
 
 
-def model_copy(model: _ModelT) -> _ModelT:
+def model_copy(model: _ModelT, *, deep: bool = False) -> _ModelT:
     if PYDANTIC_V2:
-        return model.model_copy()
-    return model.copy()  # type: ignore
+        return model.model_copy(deep=deep)
+    return model.copy(deep=deep)  # type: ignore
 
 
 def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
@@ -133,17 +133,25 @@ def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
 def model_dump(
     model: pydantic.BaseModel,
     *,
+    exclude: IncEx | None = None,
     exclude_unset: bool = False,
     exclude_defaults: bool = False,
+    warnings: bool = True,
+    mode: Literal["json", "python"] = "python",
 ) -> dict[str, Any]:
-    if PYDANTIC_V2:
+    if PYDANTIC_V2 or hasattr(model, "model_dump"):
         return model.model_dump(
+            mode=mode,
+            exclude=exclude,
             exclude_unset=exclude_unset,
             exclude_defaults=exclude_defaults,
+            # warnings are not supported in Pydantic v1
+            warnings=warnings if PYDANTIC_V2 else True,
         )
     return cast(
         "dict[str, Any]",
         model.dict(  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
+            exclude=exclude,
             exclude_unset=exclude_unset,
             exclude_defaults=exclude_defaults,
         ),
@@ -159,22 +167,19 @@ def model_parse(model: type[_ModelT], data: Any) -> _ModelT:
 # generic models
 if TYPE_CHECKING:
 
-    class GenericModel(pydantic.BaseModel):
-        ...
+    class GenericModel(pydantic.BaseModel): ...
 
 else:
     if PYDANTIC_V2:
         # there no longer needs to be a distinction in v2 but
         # we still have to create our own subclass to avoid
         # inconsistent MRO ordering errors
-        class GenericModel(pydantic.BaseModel):
-            ...
+        class GenericModel(pydantic.BaseModel): ...
 
     else:
         import pydantic.generics
 
-        class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel):
-            ...
+        class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel): ...
 
 
 # cached properties
@@ -193,30 +198,22 @@ class typed_cached_property(Generic[_T]):
         func: Callable[[Any], _T]
         attrname: str | None
 
-        def __init__(self, func: Callable[[Any], _T]) -> None:
-            ...
+        def __init__(self, func: Callable[[Any], _T]) -> None: ...
 
         @overload
-        def __get__(self, instance: None, owner: type[Any] | None = None) -> Self:
-            ...
+        def __get__(self, instance: None, owner: type[Any] | None = None) -> Self: ...
 
         @overload
-        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T:
-            ...
+        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T: ...
 
         def __get__(self, instance: object, owner: type[Any] | None = None) -> _T | Self:
             raise NotImplementedError()
 
-        def __set_name__(self, owner: type[Any], name: str) -> None:
-            ...
+        def __set_name__(self, owner: type[Any], name: str) -> None: ...
 
         # __set__ is not defined at runtime, but @cached_property is designed to be settable
-        def __set__(self, instance: object, value: _T) -> None:
-            ...
+        def __set__(self, instance: object, value: _T) -> None: ...
 else:
-    try:
-        from functools import cached_property as cached_property
-    except ImportError:
-        from cached_property import cached_property as cached_property
+    from functools import cached_property as cached_property
 
     typed_cached_property = cached_property
diff --git a/src/openai/_constants.py b/src/openai/_constants.py
index 3f82bed037..7029dc72b0 100644
--- a/src/openai/_constants.py
+++ b/src/openai/_constants.py
@@ -6,7 +6,7 @@
 OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
 
 # default timeout is 10 minutes
-DEFAULT_TIMEOUT = httpx.Timeout(timeout=600.0, connect=5.0)
+DEFAULT_TIMEOUT = httpx.Timeout(timeout=600, connect=5.0)
 DEFAULT_MAX_RETRIES = 2
 DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=1000, max_keepalive_connections=100)
 
diff --git a/src/openai/_extras/__init__.py b/src/openai/_extras/__init__.py
deleted file mode 100644
index 864dac4171..0000000000
--- a/src/openai/_extras/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .numpy_proxy import numpy as numpy, has_numpy as has_numpy
-from .pandas_proxy import pandas as pandas
diff --git a/src/openai/_extras/_common.py b/src/openai/_extras/_common.py
deleted file mode 100644
index 6e71720e64..0000000000
--- a/src/openai/_extras/_common.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from .._exceptions import OpenAIError
-
-INSTRUCTIONS = """
-
-OpenAI error:
-
-    missing `{library}`
-
-This feature requires additional dependencies:
-
-    $ pip install openai[{extra}]
-
-"""
-
-
-def format_instructions(*, library: str, extra: str) -> str:
-    return INSTRUCTIONS.format(library=library, extra=extra)
-
-
-class MissingDependencyError(OpenAIError):
-    pass
diff --git a/src/openai/_extras/numpy_proxy.py b/src/openai/_extras/numpy_proxy.py
deleted file mode 100644
index 27880bf132..0000000000
--- a/src/openai/_extras/numpy_proxy.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-from typing_extensions import override
-
-from .._utils import LazyProxy
-from ._common import MissingDependencyError, format_instructions
-
-if TYPE_CHECKING:
-    import numpy as numpy
-
-
-NUMPY_INSTRUCTIONS = format_instructions(library="numpy", extra="datalib")
-
-
-class NumpyProxy(LazyProxy[Any]):
-    @override
-    def __load__(self) -> Any:
-        try:
-            import numpy
-        except ImportError as err:
-            raise MissingDependencyError(NUMPY_INSTRUCTIONS) from err
-
-        return numpy
-
-
-if not TYPE_CHECKING:
-    numpy = NumpyProxy()
-
-
-def has_numpy() -> bool:
-    try:
-        import numpy  # noqa: F401  # pyright: ignore[reportUnusedImport]
-    except ImportError:
-        return False
-
-    return True
diff --git a/src/openai/_extras/pandas_proxy.py b/src/openai/_extras/pandas_proxy.py
deleted file mode 100644
index 686377bade..0000000000
--- a/src/openai/_extras/pandas_proxy.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-from typing_extensions import override
-
-from .._utils import LazyProxy
-from ._common import MissingDependencyError, format_instructions
-
-if TYPE_CHECKING:
-    import pandas as pandas
-
-
-PANDAS_INSTRUCTIONS = format_instructions(library="pandas", extra="datalib")
-
-
-class PandasProxy(LazyProxy[Any]):
-    @override
-    def __load__(self) -> Any:
-        try:
-            import pandas
-        except ImportError as err:
-            raise MissingDependencyError(PANDAS_INSTRUCTIONS) from err
-
-        return pandas
-
-
-if not TYPE_CHECKING:
-    pandas = PandasProxy()
diff --git a/src/openai/_files.py b/src/openai/_files.py
index ad7b668b4b..801a0d2928 100644
--- a/src/openai/_files.py
+++ b/src/openai/_files.py
@@ -39,13 +39,11 @@ def assert_is_file_content(obj: object, *, key: str | None = None) -> None:
 
 
 @overload
-def to_httpx_files(files: None) -> None:
-    ...
+def to_httpx_files(files: None) -> None: ...
 
 
 @overload
-def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles:
-    ...
+def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles: ...
 
 
 def to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
@@ -83,13 +81,11 @@ def _read_file_content(file: FileContent) -> HttpxFileContent:
 
 
 @overload
-async def async_to_httpx_files(files: None) -> None:
-    ...
+async def async_to_httpx_files(files: None) -> None: ...
 
 
 @overload
-async def async_to_httpx_files(files: RequestFiles) -> HttpxRequestFiles:
-    ...
+async def async_to_httpx_files(files: RequestFiles) -> HttpxRequestFiles: ...
 
 
 async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
diff --git a/src/openai/_legacy_response.py b/src/openai/_legacy_response.py
index 1de906b167..37151fc9a9 100644
--- a/src/openai/_legacy_response.py
+++ b/src/openai/_legacy_response.py
@@ -5,7 +5,18 @@
 import logging
 import datetime
 import functools
-from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, Iterator, AsyncIterator, cast, overload
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    Generic,
+    TypeVar,
+    Callable,
+    Iterator,
+    AsyncIterator,
+    cast,
+    overload,
+)
 from typing_extensions import Awaitable, ParamSpec, override, deprecated, get_origin
 
 import anyio
@@ -13,7 +24,7 @@
 import pydantic
 
 from ._types import NoneType
-from ._utils import is_given, extract_type_arg, is_annotated_type
+from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type
 from ._models import BaseModel, is_basemodel
 from ._constants import RAW_RESPONSE_HEADER
 from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
@@ -53,6 +64,9 @@ class LegacyAPIResponse(Generic[R]):
 
     http_response: httpx.Response
 
+    retries_taken: int
+    """The number of retries made. If no retries happened this will be `0`"""
+
     def __init__(
         self,
         *,
@@ -62,6 +76,7 @@ def __init__(
         stream: bool,
         stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
         options: FinalRequestOptions,
+        retries_taken: int = 0,
     ) -> None:
         self._cast_to = cast_to
         self._client = client
@@ -70,18 +85,17 @@ def __init__(
         self._stream_cls = stream_cls
         self._options = options
         self.http_response = raw
+        self.retries_taken = retries_taken
 
     @property
     def request_id(self) -> str | None:
         return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
 
     @overload
-    def parse(self, *, to: type[_T]) -> _T:
-        ...
+    def parse(self, *, to: type[_T]) -> _T: ...
 
     @overload
-    def parse(self) -> R:
-        ...
+    def parse(self) -> R: ...
 
     def parse(self, *, to: type[_T] | None = None) -> R | _T:
         """Returns the rich python representation of this response's data.
@@ -178,9 +192,17 @@ def elapsed(self) -> datetime.timedelta:
         return self.http_response.elapsed
 
     def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `TypeAlias('Name', T)` -> `T`
+        if is_type_alias_type(cast_to):
+            cast_to = cast_to.__value__  # type: ignore[unreachable]
+
         # unwrap `Annotated[T, ...]` -> `T`
-        if to and is_annotated_type(to):
-            to = extract_type_arg(to, 0)
+        if cast_to and is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
+        origin = get_origin(cast_to) or cast_to
 
         if self._stream:
             if to:
@@ -216,18 +238,12 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
             return cast(
                 R,
                 stream_cls(
-                    cast_to=self._cast_to,
+                    cast_to=cast_to,
                     response=self.http_response,
                     client=cast(Any, self._client),
                 ),
             )
 
-        cast_to = to if to is not None else self._cast_to
-
-        # unwrap `Annotated[T, ...]` -> `T`
-        if is_annotated_type(cast_to):
-            cast_to = extract_type_arg(cast_to, 0)
-
         if cast_to is NoneType:
             return cast(R, None)
 
@@ -241,7 +257,8 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if cast_to == float:
             return cast(R, float(response.text))
 
-        origin = get_origin(cast_to) or cast_to
+        if cast_to == bool:
+            return cast(R, response.text.lower() == "true")
 
         if inspect.isclass(origin) and issubclass(origin, HttpxBinaryResponseContent):
             return cast(R, cast_to(response))  # type: ignore
@@ -249,7 +266,9 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if origin == LegacyAPIResponse:
             raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
 
-        if inspect.isclass(origin) and issubclass(origin, httpx.Response):
+        if inspect.isclass(
+            origin  # pyright: ignore[reportUnknownArgumentType]
+        ) and issubclass(origin, httpx.Response):
             # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
             # and pass that class to our request functions. We cannot change the variance to be either
             # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
@@ -259,7 +278,13 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
                 raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
             return cast(R, response)
 
-        if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
+        if (
+            inspect.isclass(
+                origin  # pyright: ignore[reportUnknownArgumentType]
+            )
+            and not issubclass(origin, BaseModel)
+            and issubclass(origin, pydantic.BaseModel)
+        ):
             raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
 
         if (
diff --git a/src/openai/_models.py b/src/openai/_models.py
index 75c68cc730..b51a1bf5f9 100644
--- a/src/openai/_models.py
+++ b/src/openai/_models.py
@@ -10,6 +10,7 @@
     ClassVar,
     Protocol,
     Required,
+    ParamSpec,
     TypedDict,
     TypeGuard,
     final,
@@ -36,6 +37,7 @@
     PropertyInfo,
     is_list,
     is_given,
+    json_safe,
     lru_cache,
     is_mapping,
     parse_date,
@@ -44,6 +46,7 @@
     strip_not_given,
     extract_type_arg,
     is_annotated_type,
+    is_type_alias_type,
     strip_annotated_type,
 )
 from ._compat import (
@@ -62,11 +65,14 @@
 from ._constants import RAW_RESPONSE_HEADER
 
 if TYPE_CHECKING:
-    from pydantic_core.core_schema import ModelField, LiteralSchema, ModelFieldsSchema
+    from pydantic_core.core_schema import ModelField, ModelSchema, LiteralSchema, ModelFieldsSchema
 
 __all__ = ["BaseModel", "GenericModel"]
 
 _T = TypeVar("_T")
+_BaseModelT = TypeVar("_BaseModelT", bound="BaseModel")
+
+P = ParamSpec("P")
 
 
 @runtime_checkable
@@ -166,21 +172,21 @@ def to_json(
     @override
     def __str__(self) -> str:
         # mypy complains about an invalid self arg
-        return f'{self.__repr_name__()}({self.__repr_str__(", ")})'  # type: ignore[misc]
+        return f"{self.__repr_name__()}({self.__repr_str__(', ')})"  # type: ignore[misc]
 
     # Override the 'construct' method in a way that supports recursive parsing without validation.
     # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
     @classmethod
     @override
-    def construct(
-        cls: Type[ModelT],
+    def construct(  # pyright: ignore[reportIncompatibleMethodOverride]
+        __cls: Type[ModelT],
         _fields_set: set[str] | None = None,
         **values: object,
     ) -> ModelT:
-        m = cls.__new__(cls)
+        m = __cls.__new__(__cls)
         fields_values: dict[str, object] = {}
 
-        config = get_model_config(cls)
+        config = get_model_config(__cls)
         populate_by_name = (
             config.allow_population_by_field_name
             if isinstance(config, _ConfigProtocol)
@@ -190,7 +196,7 @@ def construct(
         if _fields_set is None:
             _fields_set = set()
 
-        model_fields = get_model_fields(cls)
+        model_fields = get_model_fields(__cls)
         for name, field in model_fields.items():
             key = field.alias
             if key is None or (key not in values and populate_by_name):
@@ -244,8 +250,8 @@ def model_dump(
             self,
             *,
             mode: Literal["json", "python"] | str = "python",
-            include: IncEx = None,
-            exclude: IncEx = None,
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
             by_alias: bool = False,
             exclude_unset: bool = False,
             exclude_defaults: bool = False,
@@ -275,8 +281,8 @@ def model_dump(
             Returns:
                 A dictionary representation of the model.
             """
-            if mode != "python":
-                raise ValueError("mode is only supported in Pydantic v2")
+            if mode not in {"json", "python"}:
+                raise ValueError("mode must be either 'json' or 'python'")
             if round_trip != False:
                 raise ValueError("round_trip is only supported in Pydantic v2")
             if warnings != True:
@@ -285,7 +291,7 @@ def model_dump(
                 raise ValueError("context is only supported in Pydantic v2")
             if serialize_as_any != False:
                 raise ValueError("serialize_as_any is only supported in Pydantic v2")
-            return super().dict(  # pyright: ignore[reportDeprecated]
+            dumped = super().dict(  # pyright: ignore[reportDeprecated]
                 include=include,
                 exclude=exclude,
                 by_alias=by_alias,
@@ -294,13 +300,15 @@ def model_dump(
                 exclude_none=exclude_none,
             )
 
+            return cast(dict[str, Any], json_safe(dumped)) if mode == "json" else dumped
+
         @override
         def model_dump_json(
             self,
             *,
             indent: int | None = None,
-            include: IncEx = None,
-            exclude: IncEx = None,
+            include: IncEx | None = None,
+            exclude: IncEx | None = None,
             by_alias: bool = False,
             exclude_unset: bool = False,
             exclude_defaults: bool = False,
@@ -376,17 +384,59 @@ def is_basemodel(type_: type) -> bool:
 
 def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericModel]]:
     origin = get_origin(type_) or type_
+    if not inspect.isclass(origin):
+        return False
     return issubclass(origin, BaseModel) or issubclass(origin, GenericModel)
 
 
+def build(
+    base_model_cls: Callable[P, _BaseModelT],
+    *args: P.args,
+    **kwargs: P.kwargs,
+) -> _BaseModelT:
+    """Construct a BaseModel class without validation.
+
+    This is useful for cases where you need to instantiate a `BaseModel`
+    from an API response as this provides type-safe params which isn't supported
+    by helpers like `construct_type()`.
+
+    ```py
+    build(MyModel, my_field_a="foo", my_field_b=123)
+    ```
+    """
+    if args:
+        raise TypeError(
+            "Received positional arguments which are not supported; Keyword arguments must be used instead",
+        )
+
+    return cast(_BaseModelT, construct_type(type_=base_model_cls, value=kwargs))
+
+
+def construct_type_unchecked(*, value: object, type_: type[_T]) -> _T:
+    """Loose coercion to the expected type with construction of nested values.
+
+    Note: the returned value from this function is not guaranteed to match the
+    given type.
+    """
+    return cast(_T, construct_type(value=value, type_=type_))
+
+
 def construct_type(*, value: object, type_: object) -> object:
     """Loose coercion to the expected type with construction of nested values.
 
     If the given value does not match the expected type then it is returned as-is.
     """
+
+    # store a reference to the original type we were given before we extract any inner
+    # types so that we can properly resolve forward references in `TypeAliasType` annotations
+    original_type = None
+
     # we allow `object` as the input type because otherwise, passing things like
     # `Literal['value']` will be reported as a type error by type checkers
     type_ = cast("type[object]", type_)
+    if is_type_alias_type(type_):
+        original_type = type_  # type: ignore[unreachable]
+        type_ = type_.__value__  # type: ignore[unreachable]
 
     # unwrap `Annotated[T, ...]` -> `T`
     if is_annotated_type(type_):
@@ -402,7 +452,7 @@ def construct_type(*, value: object, type_: object) -> object:
 
     if is_union(origin):
         try:
-            return validate_type(type_=cast("type[object]", type_), value=value)
+            return validate_type(type_=cast("type[object]", original_type or type_), value=value)
         except Exception:
             pass
 
@@ -444,7 +494,11 @@ def construct_type(*, value: object, type_: object) -> object:
         _, items_type = get_args(type_)  # Dict[_, items_type]
         return {key: construct_type(value=item, type_=items_type) for key, item in value.items()}
 
-    if not is_literal_type(type_) and (issubclass(origin, BaseModel) or issubclass(origin, GenericModel)):
+    if (
+        not is_literal_type(type_)
+        and inspect.isclass(origin)
+        and (issubclass(origin, BaseModel) or issubclass(origin, GenericModel))
+    ):
         if is_list(value):
             return [cast(Any, type_).construct(**entry) if is_mapping(entry) else entry for entry in value]
 
@@ -592,15 +646,18 @@ def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any,
 
 def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None:
     schema = model.__pydantic_core_schema__
+    if schema["type"] == "definitions":
+        schema = schema["schema"]
+
     if schema["type"] != "model":
         return None
 
+    schema = cast("ModelSchema", schema)
     fields_schema = schema["schema"]
     if fields_schema["type"] != "model-fields":
         return None
 
     fields_schema = cast("ModelFieldsSchema", fields_schema)
-
     field = fields_schema["fields"].get(field_name)
     if not field:
         return None
@@ -616,6 +673,14 @@ def validate_type(*, type_: type[_T], value: object) -> _T:
     return cast(_T, _validate_non_model_type(type_=type_, value=value))
 
 
+def set_pydantic_config(typ: Any, config: pydantic.ConfigDict) -> None:
+    """Add a pydantic config for the given type.
+
+    Note: this is a no-op on Pydantic v1.
+    """
+    setattr(typ, "__pydantic_config__", config)  # noqa: B010
+
+
 # our use of subclasssing here causes weirdness for type checkers,
 # so we just pretend that we don't subclass
 if TYPE_CHECKING:
diff --git a/src/openai/_module_client.py b/src/openai/_module_client.py
index 6f7356eb3c..e7d2657860 100644
--- a/src/openai/_module_client.py
+++ b/src/openai/_module_client.py
@@ -48,6 +48,18 @@ def __load__(self) -> resources.Batches:
         return _load_client().batches
 
 
+class UploadsProxy(LazyProxy[resources.Uploads]):
+    @override
+    def __load__(self) -> resources.Uploads:
+        return _load_client().uploads
+
+
+class ResponsesProxy(LazyProxy[resources.Responses]):
+    @override
+    def __load__(self) -> resources.Responses:
+        return _load_client().responses
+
+
 class EmbeddingsProxy(LazyProxy[resources.Embeddings]):
     @override
     def __load__(self) -> resources.Embeddings:
@@ -72,6 +84,12 @@ def __load__(self) -> resources.FineTuning:
         return _load_client().fine_tuning
 
 
+class VectorStoresProxy(LazyProxy[resources.VectorStores]):
+    @override
+    def __load__(self) -> resources.VectorStores:
+        return _load_client().vector_stores
+
+
 chat: resources.Chat = ChatProxy().__as_proxied__()
 beta: resources.Beta = BetaProxy().__as_proxied__()
 files: resources.Files = FilesProxy().__as_proxied__()
@@ -79,7 +97,10 @@ def __load__(self) -> resources.FineTuning:
 images: resources.Images = ImagesProxy().__as_proxied__()
 models: resources.Models = ModelsProxy().__as_proxied__()
 batches: resources.Batches = BatchesProxy().__as_proxied__()
+uploads: resources.Uploads = UploadsProxy().__as_proxied__()
+responses: resources.Responses = ResponsesProxy().__as_proxied__()
 embeddings: resources.Embeddings = EmbeddingsProxy().__as_proxied__()
 completions: resources.Completions = CompletionsProxy().__as_proxied__()
 moderations: resources.Moderations = ModerationsProxy().__as_proxied__()
 fine_tuning: resources.FineTuning = FineTuningProxy().__as_proxied__()
+vector_stores: resources.VectorStores = VectorStoresProxy().__as_proxied__()
diff --git a/src/openai/_response.py b/src/openai/_response.py
index 4ba2ae681c..c43fe39e56 100644
--- a/src/openai/_response.py
+++ b/src/openai/_response.py
@@ -25,7 +25,7 @@
 import pydantic
 
 from ._types import NoneType
-from ._utils import is_given, extract_type_arg, is_annotated_type, extract_type_var_from_base
+from ._utils import is_given, extract_type_arg, is_annotated_type, is_type_alias_type, extract_type_var_from_base
 from ._models import BaseModel, is_basemodel
 from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
 from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
@@ -55,6 +55,9 @@ class BaseAPIResponse(Generic[R]):
 
     http_response: httpx.Response
 
+    retries_taken: int
+    """The number of retries made. If no retries happened this will be `0`"""
+
     def __init__(
         self,
         *,
@@ -64,6 +67,7 @@ def __init__(
         stream: bool,
         stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
         options: FinalRequestOptions,
+        retries_taken: int = 0,
     ) -> None:
         self._cast_to = cast_to
         self._client = client
@@ -72,6 +76,7 @@ def __init__(
         self._stream_cls = stream_cls
         self._options = options
         self.http_response = raw
+        self.retries_taken = retries_taken
 
     @property
     def headers(self) -> httpx.Headers:
@@ -121,9 +126,17 @@ def __repr__(self) -> str:
         )
 
     def _parse(self, *, to: type[_T] | None = None) -> R | _T:
+        cast_to = to if to is not None else self._cast_to
+
+        # unwrap `TypeAlias('Name', T)` -> `T`
+        if is_type_alias_type(cast_to):
+            cast_to = cast_to.__value__  # type: ignore[unreachable]
+
         # unwrap `Annotated[T, ...]` -> `T`
-        if to and is_annotated_type(to):
-            to = extract_type_arg(to, 0)
+        if cast_to and is_annotated_type(cast_to):
+            cast_to = extract_type_arg(cast_to, 0)
+
+        origin = get_origin(cast_to) or cast_to
 
         if self._is_sse_stream:
             if to:
@@ -159,18 +172,12 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
             return cast(
                 R,
                 stream_cls(
-                    cast_to=self._cast_to,
+                    cast_to=cast_to,
                     response=self.http_response,
                     client=cast(Any, self._client),
                 ),
             )
 
-        cast_to = to if to is not None else self._cast_to
-
-        # unwrap `Annotated[T, ...]` -> `T`
-        if is_annotated_type(cast_to):
-            cast_to = extract_type_arg(cast_to, 0)
-
         if cast_to is NoneType:
             return cast(R, None)
 
@@ -187,7 +194,8 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
         if cast_to == float:
             return cast(R, float(response.text))
 
-        origin = get_origin(cast_to) or cast_to
+        if cast_to == bool:
+            return cast(R, response.text.lower() == "true")
 
         # handle the legacy binary response case
         if inspect.isclass(cast_to) and cast_to.__name__ == "HttpxBinaryResponseContent":
@@ -206,7 +214,13 @@ def _parse(self, *, to: type[_T] | None = None) -> R | _T:
                 raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
             return cast(R, response)
 
-        if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
+        if (
+            inspect.isclass(
+                origin  # pyright: ignore[reportUnknownArgumentType]
+            )
+            and not issubclass(origin, BaseModel)
+            and issubclass(origin, pydantic.BaseModel)
+        ):
             raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
 
         if (
@@ -263,12 +277,10 @@ def request_id(self) -> str | None:
         return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
 
     @overload
-    def parse(self, *, to: type[_T]) -> _T:
-        ...
+    def parse(self, *, to: type[_T]) -> _T: ...
 
     @overload
-    def parse(self) -> R:
-        ...
+    def parse(self) -> R: ...
 
     def parse(self, *, to: type[_T] | None = None) -> R | _T:
         """Returns the rich python representation of this response's data.
@@ -371,12 +383,10 @@ def request_id(self) -> str | None:
         return self.http_response.headers.get("x-request-id")  # type: ignore[no-any-return]
 
     @overload
-    async def parse(self, *, to: type[_T]) -> _T:
-        ...
+    async def parse(self, *, to: type[_T]) -> _T: ...
 
     @overload
-    async def parse(self) -> R:
-        ...
+    async def parse(self) -> R: ...
 
     async def parse(self, *, to: type[_T] | None = None) -> R | _T:
         """Returns the rich python representation of this response's data.
diff --git a/src/openai/_streaming.py b/src/openai/_streaming.py
index 0fda992cff..7aa7b62f6b 100644
--- a/src/openai/_streaming.py
+++ b/src/openai/_streaming.py
@@ -59,42 +59,22 @@ def __stream__(self) -> Iterator[_T]:
             if sse.data.startswith("[DONE]"):
                 break
 
-            if sse.event is None:
-                data = sse.json()
-                if is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data=data, cast_to=cast_to, response=response)
-
-            else:
-                data = sse.json()
-
-                if sse.event == "error" and is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+            data = sse.json()
+            if is_mapping(data) and data.get("error"):
+                message = None
+                error = data.get("error")
+                if is_mapping(error):
+                    message = error.get("message")
+                if not message or not isinstance(message, str):
+                    message = "An error occurred during streaming"
+
+                raise APIError(
+                    message=message,
+                    request=self.response.request,
+                    body=data["error"],
+                )
+
+            yield process_data(data=data, cast_to=cast_to, response=response)
 
         # Ensure the entire stream is consumed
         for _sse in iterator:
@@ -161,42 +141,22 @@ async def __stream__(self) -> AsyncIterator[_T]:
             if sse.data.startswith("[DONE]"):
                 break
 
-            if sse.event is None:
-                data = sse.json()
-                if is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data=data, cast_to=cast_to, response=response)
-
-            else:
-                data = sse.json()
-
-                if sse.event == "error" and is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
+            data = sse.json()
+            if is_mapping(data) and data.get("error"):
+                message = None
+                error = data.get("error")
+                if is_mapping(error):
+                    message = error.get("message")
+                if not message or not isinstance(message, str):
+                    message = "An error occurred during streaming"
+
+                raise APIError(
+                    message=message,
+                    request=self.response.request,
+                    body=data["error"],
+                )
+
+            yield process_data(data=data, cast_to=cast_to, response=response)
 
         # Ensure the entire stream is consumed
         async for _sse in iterator:
diff --git a/src/openai/_types.py b/src/openai/_types.py
index de9b1dd48b..a5cf207aa3 100644
--- a/src/openai/_types.py
+++ b/src/openai/_types.py
@@ -16,7 +16,7 @@
     Optional,
     Sequence,
 )
-from typing_extensions import Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
+from typing_extensions import Set, Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
 
 import httpx
 import pydantic
@@ -112,8 +112,7 @@ class NotGiven:
     For example:
 
     ```py
-    def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
-        ...
+    def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...
 
 
     get(timeout=1)  # 1s timeout
@@ -163,16 +162,14 @@ def build(
         *,
         response: Response,
         data: object,
-    ) -> _T:
-        ...
+    ) -> _T: ...
 
 
 Headers = Mapping[str, Union[str, Omit]]
 
 
 class HeadersLikeProtocol(Protocol):
-    def get(self, __key: str) -> str | None:
-        ...
+    def get(self, __key: str) -> str | None: ...
 
 
 HeadersLike = Union[Headers, HeadersLikeProtocol]
@@ -197,8 +194,8 @@ def get(self, __key: str) -> str | None:
 StrBytesIntFloat = Union[str, bytes, int, float]
 
 # Note: copied from Pydantic
-# https://github.com/pydantic/pydantic/blob/32ea570bf96e84234d2992e1ddf40ab8a565925a/pydantic/main.py#L49
-IncEx: TypeAlias = "set[int] | set[str] | dict[int, Any] | dict[str, Any] | None"
+# https://github.com/pydantic/pydantic/blob/6f31f8f68ef011f84357330186f603ff295312fd/pydantic/main.py#L79
+IncEx: TypeAlias = Union[Set[int], Set[str], Mapping[int, Union["IncEx", bool]], Mapping[str, Union["IncEx", bool]]]
 
 PostParser = Callable[[Any], Any]
 
diff --git a/src/openai/_utils/__init__.py b/src/openai/_utils/__init__.py
index 31b5b22799..d4fda26f3c 100644
--- a/src/openai/_utils/__init__.py
+++ b/src/openai/_utils/__init__.py
@@ -6,6 +6,7 @@
     is_list as is_list,
     is_given as is_given,
     is_tuple as is_tuple,
+    json_safe as json_safe,
     lru_cache as lru_cache,
     is_mapping as is_mapping,
     is_tuple_t as is_tuple_t,
@@ -38,6 +39,7 @@
     is_iterable_type as is_iterable_type,
     is_required_type as is_required_type,
     is_annotated_type as is_annotated_type,
+    is_type_alias_type as is_type_alias_type,
     strip_annotated_type as strip_annotated_type,
     extract_type_var_from_base as extract_type_var_from_base,
 )
@@ -49,3 +51,7 @@
     maybe_transform as maybe_transform,
     async_maybe_transform as async_maybe_transform,
 )
+from ._reflection import (
+    function_has_argument as function_has_argument,
+    assert_signatures_in_sync as assert_signatures_in_sync,
+)
diff --git a/src/openai/_utils/_proxy.py b/src/openai/_utils/_proxy.py
index c46a62a698..ffd883e9dd 100644
--- a/src/openai/_utils/_proxy.py
+++ b/src/openai/_utils/_proxy.py
@@ -59,5 +59,4 @@ def __as_proxied__(self) -> T:
         return cast(T, self)
 
     @abstractmethod
-    def __load__(self) -> T:
-        ...
+    def __load__(self) -> T: ...
diff --git a/src/openai/_utils/_reflection.py b/src/openai/_utils/_reflection.py
new file mode 100644
index 0000000000..89aa712ac4
--- /dev/null
+++ b/src/openai/_utils/_reflection.py
@@ -0,0 +1,42 @@
+from __future__ import annotations
+
+import inspect
+from typing import Any, Callable
+
+
+def function_has_argument(func: Callable[..., Any], arg_name: str) -> bool:
+    """Returns whether or not the given function has a specific parameter"""
+    sig = inspect.signature(func)
+    return arg_name in sig.parameters
+
+
+def assert_signatures_in_sync(
+    source_func: Callable[..., Any],
+    check_func: Callable[..., Any],
+    *,
+    exclude_params: set[str] = set(),
+) -> None:
+    """Ensure that the signature of the second function matches the first."""
+
+    check_sig = inspect.signature(check_func)
+    source_sig = inspect.signature(source_func)
+
+    errors: list[str] = []
+
+    for name, source_param in source_sig.parameters.items():
+        if name in exclude_params:
+            continue
+
+        custom_param = check_sig.parameters.get(name)
+        if not custom_param:
+            errors.append(f"the `{name}` param is missing")
+            continue
+
+        if custom_param.annotation != source_param.annotation:
+            errors.append(
+                f"types for the `{name}` param are do not match; source={repr(source_param.annotation)} checking={repr(custom_param.annotation)}"
+            )
+            continue
+
+    if errors:
+        raise AssertionError(f"{len(errors)} errors encountered when comparing signatures:\n\n" + "\n\n".join(errors))
diff --git a/src/openai/_utils/_sync.py b/src/openai/_utils/_sync.py
index 595924e5b1..ad7ec71b76 100644
--- a/src/openai/_utils/_sync.py
+++ b/src/openai/_utils/_sync.py
@@ -1,54 +1,77 @@
 from __future__ import annotations
 
+import sys
+import asyncio
 import functools
-from typing import TypeVar, Callable, Awaitable
+import contextvars
+from typing import Any, TypeVar, Callable, Awaitable
 from typing_extensions import ParamSpec
 
 import anyio
+import sniffio
 import anyio.to_thread
 
 T_Retval = TypeVar("T_Retval")
 T_ParamSpec = ParamSpec("T_ParamSpec")
 
 
-# copied from `asyncer`, https://github.com/tiangolo/asyncer
-def asyncify(
-    function: Callable[T_ParamSpec, T_Retval],
-    *,
-    cancellable: bool = False,
-    limiter: anyio.CapacityLimiter | None = None,
-) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
+if sys.version_info >= (3, 9):
+    _asyncio_to_thread = asyncio.to_thread
+else:
+    # backport of https://docs.python.org/3/library/asyncio-task.html#asyncio.to_thread
+    # for Python 3.8 support
+    async def _asyncio_to_thread(
+        func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+    ) -> Any:
+        """Asynchronously run function *func* in a separate thread.
+
+        Any *args and **kwargs supplied for this function are directly passed
+        to *func*. Also, the current :class:`contextvars.Context` is propagated,
+        allowing context variables from the main thread to be accessed in the
+        separate thread.
+
+        Returns a coroutine that can be awaited to get the eventual result of *func*.
+        """
+        loop = asyncio.events.get_running_loop()
+        ctx = contextvars.copy_context()
+        func_call = functools.partial(ctx.run, func, *args, **kwargs)
+        return await loop.run_in_executor(None, func_call)
+
+
+async def to_thread(
+    func: Callable[T_ParamSpec, T_Retval], /, *args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs
+) -> T_Retval:
+    if sniffio.current_async_library() == "asyncio":
+        return await _asyncio_to_thread(func, *args, **kwargs)
+
+    return await anyio.to_thread.run_sync(
+        functools.partial(func, *args, **kwargs),
+    )
+
+
+# inspired by `asyncer`, https://github.com/tiangolo/asyncer
+def asyncify(function: Callable[T_ParamSpec, T_Retval]) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
     """
     Take a blocking function and create an async one that receives the same
-    positional and keyword arguments, and that when called, calls the original function
-    in a worker thread using `anyio.to_thread.run_sync()`. Internally,
-    `asyncer.asyncify()` uses the same `anyio.to_thread.run_sync()`, but it supports
-    keyword arguments additional to positional arguments and it adds better support for
-    autocompletion and inline errors for the arguments of the function called and the
-    return value.
-
-    If the `cancellable` option is enabled and the task waiting for its completion is
-    cancelled, the thread will still run its course but its return value (or any raised
-    exception) will be ignored.
+    positional and keyword arguments. For python version 3.9 and above, it uses
+    asyncio.to_thread to run the function in a separate thread. For python version
+    3.8, it uses locally defined copy of the asyncio.to_thread function which was
+    introduced in python 3.9.
 
-    Use it like this:
+    Usage:
 
-    ```Python
-    def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
-        # Do work
-        return "Some result"
+    ```python
+    def blocking_func(arg1, arg2, kwarg1=None):
+        # blocking code
+        return result
 
 
-    result = await to_thread.asyncify(do_work)("spam", "ham", kwarg1="a", kwarg2="b")
-    print(result)
+    result = asyncify(blocking_function)(arg1, arg2, kwarg1=value1)
     ```
 
     ## Arguments
 
     `function`: a blocking regular callable (e.g. a function)
-    `cancellable`: `True` to allow cancellation of the operation
-    `limiter`: capacity limiter to use to limit the total amount of threads running
-        (if omitted, the default limiter is used)
 
     ## Return
 
@@ -58,7 +81,6 @@ def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
     """
 
     async def wrapper(*args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs) -> T_Retval:
-        partial_f = functools.partial(function, *args, **kwargs)
-        return await anyio.to_thread.run_sync(partial_f, cancellable=cancellable, limiter=limiter)
+        return await to_thread(function, *args, **kwargs)
 
     return wrapper
diff --git a/src/openai/_utils/_transform.py b/src/openai/_utils/_transform.py
index 47e262a515..18afd9d8bd 100644
--- a/src/openai/_utils/_transform.py
+++ b/src/openai/_utils/_transform.py
@@ -25,7 +25,7 @@
     is_annotated_type,
     strip_annotated_type,
 )
-from .._compat import model_dump, is_typeddict
+from .._compat import get_origin, model_dump, is_typeddict
 
 _T = TypeVar("_T")
 
@@ -164,15 +164,25 @@ def _transform_recursive(
         inner_type = annotation
 
     stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
     if is_typeddict(stripped_type) and is_mapping(data):
         return _transform_typeddict(data, stripped_type)
 
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
     if (
         # List[T]
         (is_list_type(stripped_type) and is_list(data))
         # Iterable[T]
         or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
     ):
+        # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
+        # intended as an iterable, so we don't transform it.
+        if isinstance(data, dict):
+            return cast(object, data)
+
         inner_type = extract_type_arg(stripped_type, 0)
         return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
@@ -186,7 +196,7 @@ def _transform_recursive(
         return data
 
     if isinstance(data, pydantic.BaseModel):
-        return model_dump(data, exclude_unset=True)
+        return model_dump(data, exclude_unset=True, mode="json")
 
     annotated_type = _get_annotated_type(annotation)
     if annotated_type is None:
@@ -302,15 +312,25 @@ async def _async_transform_recursive(
         inner_type = annotation
 
     stripped_type = strip_annotated_type(inner_type)
+    origin = get_origin(stripped_type) or stripped_type
     if is_typeddict(stripped_type) and is_mapping(data):
         return await _async_transform_typeddict(data, stripped_type)
 
+    if origin == dict and is_mapping(data):
+        items_type = get_args(stripped_type)[1]
+        return {key: _transform_recursive(value, annotation=items_type) for key, value in data.items()}
+
     if (
         # List[T]
         (is_list_type(stripped_type) and is_list(data))
         # Iterable[T]
         or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
     ):
+        # dicts are technically iterable, but it is an iterable on the keys of the dict and is not usually
+        # intended as an iterable, so we don't transform it.
+        if isinstance(data, dict):
+            return cast(object, data)
+
         inner_type = extract_type_arg(stripped_type, 0)
         return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
 
@@ -324,7 +344,7 @@ async def _async_transform_recursive(
         return data
 
     if isinstance(data, pydantic.BaseModel):
-        return model_dump(data, exclude_unset=True)
+        return model_dump(data, exclude_unset=True, mode="json")
 
     annotated_type = _get_annotated_type(annotation)
     if annotated_type is None:
diff --git a/src/openai/_utils/_typing.py b/src/openai/_utils/_typing.py
index c036991f04..278749b147 100644
--- a/src/openai/_utils/_typing.py
+++ b/src/openai/_utils/_typing.py
@@ -1,8 +1,17 @@
 from __future__ import annotations
 
+import sys
+import typing
+import typing_extensions
 from typing import Any, TypeVar, Iterable, cast
 from collections import abc as _c_abc
-from typing_extensions import Required, Annotated, get_args, get_origin
+from typing_extensions import (
+    TypeIs,
+    Required,
+    Annotated,
+    get_args,
+    get_origin,
+)
 
 from .._types import InheritsGeneric
 from .._compat import is_union as _is_union
@@ -36,6 +45,26 @@ def is_typevar(typ: type) -> bool:
     return type(typ) == TypeVar  # type: ignore
 
 
+_TYPE_ALIAS_TYPES: tuple[type[typing_extensions.TypeAliasType], ...] = (typing_extensions.TypeAliasType,)
+if sys.version_info >= (3, 12):
+    _TYPE_ALIAS_TYPES = (*_TYPE_ALIAS_TYPES, typing.TypeAliasType)
+
+
+def is_type_alias_type(tp: Any, /) -> TypeIs[typing_extensions.TypeAliasType]:
+    """Return whether the provided argument is an instance of `TypeAliasType`.
+
+    ```python
+    type Int = int
+    is_type_alias_type(Int)
+    # > True
+    Str = TypeAliasType("Str", str)
+    is_type_alias_type(Str)
+    # > True
+    ```
+    """
+    return isinstance(tp, _TYPE_ALIAS_TYPES)
+
+
 # Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
 def strip_annotated_type(typ: type) -> type:
     if is_required_type(typ) or is_annotated_type(typ):
diff --git a/src/openai/_utils/_utils.py b/src/openai/_utils/_utils.py
index 17904ce60d..e5811bba42 100644
--- a/src/openai/_utils/_utils.py
+++ b/src/openai/_utils/_utils.py
@@ -16,11 +16,12 @@
     overload,
 )
 from pathlib import Path
+from datetime import date, datetime
 from typing_extensions import TypeGuard
 
 import sniffio
 
-from .._types import Headers, NotGiven, FileTypes, NotGivenOr, HeadersLike
+from .._types import NotGiven, FileTypes, NotGivenOr, HeadersLike
 from .._compat import parse_date as parse_date, parse_datetime as parse_datetime
 
 _T = TypeVar("_T")
@@ -211,20 +212,17 @@ def required_args(*variants: Sequence[str]) -> Callable[[CallableT], CallableT]:
     Example usage:
     ```py
     @overload
-    def foo(*, a: str) -> str:
-        ...
+    def foo(*, a: str) -> str: ...
 
 
     @overload
-    def foo(*, b: bool) -> str:
-        ...
+    def foo(*, b: bool) -> str: ...
 
 
     # This enforces the same constraints that a static type checker would
     # i.e. that either a or b must be passed to the function
     @required_args(["a"], ["b"])
-    def foo(*, a: str | None = None, b: bool | None = None) -> str:
-        ...
+    def foo(*, a: str | None = None, b: bool | None = None) -> str: ...
     ```
     """
 
@@ -286,18 +284,15 @@ def wrapper(*args: object, **kwargs: object) -> object:
 
 
 @overload
-def strip_not_given(obj: None) -> None:
-    ...
+def strip_not_given(obj: None) -> None: ...
 
 
 @overload
-def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]:
-    ...
+def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]: ...
 
 
 @overload
-def strip_not_given(obj: object) -> object:
-    ...
+def strip_not_given(obj: object) -> object: ...
 
 
 def strip_not_given(obj: object | None) -> object:
@@ -369,13 +364,13 @@ def file_from_path(path: str) -> FileTypes:
 
 def get_required_header(headers: HeadersLike, header: str) -> str:
     lower_header = header.lower()
-    if isinstance(headers, Mapping):
-        headers = cast(Headers, headers)
-        for k, v in headers.items():
+    if is_mapping_t(headers):
+        # mypy doesn't understand the type narrowing here
+        for k, v in headers.items():  # type: ignore
             if k.lower() == lower_header and isinstance(v, str):
                 return v
 
-    """ to deal with the case where the header looks like Stainless-Event-Id """
+    # to deal with the case where the header looks like Stainless-Event-Id
     intercaps_header = re.sub(r"([^\w])(\w)", lambda pat: pat.group(1) + pat.group(2).upper(), header.capitalize())
 
     for normalized_header in [header, lower_header, header.upper(), intercaps_header]:
@@ -401,3 +396,19 @@ def lru_cache(*, maxsize: int | None = 128) -> Callable[[CallableT], CallableT]:
         maxsize=maxsize,
     )
     return cast(Any, wrapper)  # type: ignore[no-any-return]
+
+
+def json_safe(data: object) -> object:
+    """Translates a mapping / sequence recursively in the same fashion
+    as `pydantic` v2's `model_dump(mode="json")`.
+    """
+    if is_mapping(data):
+        return {json_safe(key): json_safe(value) for key, value in data.items()}
+
+    if is_iterable(data) and not isinstance(data, (str, bytes, bytearray)):
+        return [json_safe(item) for item in data]
+
+    if isinstance(data, (datetime, date)):
+        return data.isoformat()
+
+    return data
diff --git a/src/openai/_version.py b/src/openai/_version.py
index 83411041ae..df2f60a7dc 100644
--- a/src/openai/_version.py
+++ b/src/openai/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "openai"
-__version__ = "1.30.1"  # x-release-please-version
+__version__ = "1.66.4"  # x-release-please-version
diff --git a/src/openai/cli/__init__.py b/src/openai/cli/__init__.py
deleted file mode 100644
index d453d5e179..0000000000
--- a/src/openai/cli/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from ._cli import main as main
diff --git a/src/openai/cli/_api/__init__.py b/src/openai/cli/_api/__init__.py
deleted file mode 100644
index 56a0260a6d..0000000000
--- a/src/openai/cli/_api/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from ._main import register_commands as register_commands
diff --git a/src/openai/cli/_api/_main.py b/src/openai/cli/_api/_main.py
deleted file mode 100644
index fe5a5e6fc0..0000000000
--- a/src/openai/cli/_api/_main.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from __future__ import annotations
-
-from argparse import ArgumentParser
-
-from . import chat, audio, files, image, models, completions
-
-
-def register_commands(parser: ArgumentParser) -> None:
-    subparsers = parser.add_subparsers(help="All API subcommands")
-
-    chat.register(subparsers)
-    image.register(subparsers)
-    audio.register(subparsers)
-    files.register(subparsers)
-    models.register(subparsers)
-    completions.register(subparsers)
diff --git a/src/openai/cli/_api/audio.py b/src/openai/cli/_api/audio.py
deleted file mode 100644
index 90d21b9932..0000000000
--- a/src/openai/cli/_api/audio.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, Optional, cast
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from ..._types import NOT_GIVEN
-from .._models import BaseModel
-from .._progress import BufferReader
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    # transcriptions
-    sub = subparser.add_parser("audio.transcriptions.create")
-
-    # Required
-    sub.add_argument("-m", "--model", type=str, default="whisper-1")
-    sub.add_argument("-f", "--file", type=str, required=True)
-    # Optional
-    sub.add_argument("--response-format", type=str)
-    sub.add_argument("--language", type=str)
-    sub.add_argument("-t", "--temperature", type=float)
-    sub.add_argument("--prompt", type=str)
-    sub.set_defaults(func=CLIAudio.transcribe, args_model=CLITranscribeArgs)
-
-    # translations
-    sub = subparser.add_parser("audio.translations.create")
-
-    # Required
-    sub.add_argument("-f", "--file", type=str, required=True)
-    # Optional
-    sub.add_argument("-m", "--model", type=str, default="whisper-1")
-    sub.add_argument("--response-format", type=str)
-    # TODO: doesn't seem to be supported by the API
-    # sub.add_argument("--language", type=str)
-    sub.add_argument("-t", "--temperature", type=float)
-    sub.add_argument("--prompt", type=str)
-    sub.set_defaults(func=CLIAudio.translate, args_model=CLITranslationArgs)
-
-
-class CLITranscribeArgs(BaseModel):
-    model: str
-    file: str
-    response_format: Optional[str] = None
-    language: Optional[str] = None
-    temperature: Optional[float] = None
-    prompt: Optional[str] = None
-
-
-class CLITranslationArgs(BaseModel):
-    model: str
-    file: str
-    response_format: Optional[str] = None
-    language: Optional[str] = None
-    temperature: Optional[float] = None
-    prompt: Optional[str] = None
-
-
-class CLIAudio:
-    @staticmethod
-    def transcribe(args: CLITranscribeArgs) -> None:
-        with open(args.file, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        model = get_client().audio.transcriptions.create(
-            file=(args.file, buffer_reader),
-            model=args.model,
-            language=args.language or NOT_GIVEN,
-            temperature=args.temperature or NOT_GIVEN,
-            prompt=args.prompt or NOT_GIVEN,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(model)
-
-    @staticmethod
-    def translate(args: CLITranslationArgs) -> None:
-        with open(args.file, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        model = get_client().audio.translations.create(
-            file=(args.file, buffer_reader),
-            model=args.model,
-            temperature=args.temperature or NOT_GIVEN,
-            prompt=args.prompt or NOT_GIVEN,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(model)
diff --git a/src/openai/cli/_api/chat/__init__.py b/src/openai/cli/_api/chat/__init__.py
deleted file mode 100644
index 87d971630a..0000000000
--- a/src/openai/cli/_api/chat/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from . import completions
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    completions.register(subparser)
diff --git a/src/openai/cli/_api/chat/completions.py b/src/openai/cli/_api/chat/completions.py
deleted file mode 100644
index c299741fe0..0000000000
--- a/src/openai/cli/_api/chat/completions.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from __future__ import annotations
-
-import sys
-from typing import TYPE_CHECKING, List, Optional, cast
-from argparse import ArgumentParser
-from typing_extensions import Literal, NamedTuple
-
-from ..._utils import get_client
-from ..._models import BaseModel
-from ...._streaming import Stream
-from ....types.chat import (
-    ChatCompletionRole,
-    ChatCompletionChunk,
-    CompletionCreateParams,
-)
-from ....types.chat.completion_create_params import (
-    CompletionCreateParamsStreaming,
-    CompletionCreateParamsNonStreaming,
-)
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("chat.completions.create")
-
-    sub._action_groups.pop()
-    req = sub.add_argument_group("required arguments")
-    opt = sub.add_argument_group("optional arguments")
-
-    req.add_argument(
-        "-g",
-        "--message",
-        action="https://github.com/append",
-        nargs=2,
-        metavar=("ROLE", "CONTENT"),
-        help="A message in `{role} {content}` format. Use this argument multiple times to add multiple messages.",
-        required=True,
-    )
-    req.add_argument(
-        "-m",
-        "--model",
-        help="The model to use.",
-        required=True,
-    )
-
-    opt.add_argument(
-        "-n",
-        "--n",
-        help="How many completions to generate for the conversation.",
-        type=int,
-    )
-    opt.add_argument("-M", "--max-tokens", help="The maximum number of tokens to generate.", type=int)
-    opt.add_argument(
-        "-t",
-        "--temperature",
-        help="""What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
-
-Mutually exclusive with `top_p`.""",
-        type=float,
-    )
-    opt.add_argument(
-        "-P",
-        "--top_p",
-        help="""An alternative to sampling with temperature, called nucleus sampling, where the considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%% probability mass are considered.
-
-            Mutually exclusive with `temperature`.""",
-        type=float,
-    )
-    opt.add_argument(
-        "--stop",
-        help="A stop sequence at which to stop generating tokens for the message.",
-    )
-    opt.add_argument("--stream", help="Stream messages as they're ready.", action="https://github.com/store_true")
-    sub.set_defaults(func=CLIChatCompletion.create, args_model=CLIChatCompletionCreateArgs)
-
-
-class CLIMessage(NamedTuple):
-    role: ChatCompletionRole
-    content: str
-
-
-class CLIChatCompletionCreateArgs(BaseModel):
-    message: List[CLIMessage]
-    model: str
-    n: Optional[int] = None
-    max_tokens: Optional[int] = None
-    temperature: Optional[float] = None
-    top_p: Optional[float] = None
-    stop: Optional[str] = None
-    stream: bool = False
-
-
-class CLIChatCompletion:
-    @staticmethod
-    def create(args: CLIChatCompletionCreateArgs) -> None:
-        params: CompletionCreateParams = {
-            "model": args.model,
-            "messages": [
-                {"role": cast(Literal["user"], message.role), "content": message.content} for message in args.message
-            ],
-            "n": args.n,
-            "temperature": args.temperature,
-            "top_p": args.top_p,
-            "stop": args.stop,
-            # type checkers are not good at inferring union types so we have to set stream afterwards
-            "stream": False,
-        }
-        if args.stream:
-            params["stream"] = args.stream  # type: ignore
-        if args.max_tokens is not None:
-            params["max_tokens"] = args.max_tokens
-
-        if args.stream:
-            return CLIChatCompletion._stream_create(cast(CompletionCreateParamsStreaming, params))
-
-        return CLIChatCompletion._create(cast(CompletionCreateParamsNonStreaming, params))
-
-    @staticmethod
-    def _create(params: CompletionCreateParamsNonStreaming) -> None:
-        completion = get_client().chat.completions.create(**params)
-        should_print_header = len(completion.choices) > 1
-        for choice in completion.choices:
-            if should_print_header:
-                sys.stdout.write("===== Chat Completion {} =====\n".format(choice.index))
-
-            content = choice.message.content if choice.message.content is not None else "None"
-            sys.stdout.write(content)
-
-            if should_print_header or not content.endswith("\n"):
-                sys.stdout.write("\n")
-
-            sys.stdout.flush()
-
-    @staticmethod
-    def _stream_create(params: CompletionCreateParamsStreaming) -> None:
-        # cast is required for mypy
-        stream = cast(  # pyright: ignore[reportUnnecessaryCast]
-            Stream[ChatCompletionChunk], get_client().chat.completions.create(**params)
-        )
-        for chunk in stream:
-            should_print_header = len(chunk.choices) > 1
-            for choice in chunk.choices:
-                if should_print_header:
-                    sys.stdout.write("===== Chat Completion {} =====\n".format(choice.index))
-
-                content = choice.delta.content or ""
-                sys.stdout.write(content)
-
-                if should_print_header:
-                    sys.stdout.write("\n")
-
-                sys.stdout.flush()
-
-        sys.stdout.write("\n")
diff --git a/src/openai/cli/_api/completions.py b/src/openai/cli/_api/completions.py
deleted file mode 100644
index cbdb35bf3a..0000000000
--- a/src/openai/cli/_api/completions.py
+++ /dev/null
@@ -1,173 +0,0 @@
-from __future__ import annotations
-
-import sys
-from typing import TYPE_CHECKING, Optional, cast
-from argparse import ArgumentParser
-from functools import partial
-
-from openai.types.completion import Completion
-
-from .._utils import get_client
-from ..._types import NOT_GIVEN, NotGivenOr
-from ..._utils import is_given
-from .._errors import CLIError
-from .._models import BaseModel
-from ..._streaming import Stream
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("completions.create")
-
-    # Required
-    sub.add_argument(
-        "-m",
-        "--model",
-        help="The model to use",
-        required=True,
-    )
-
-    # Optional
-    sub.add_argument("-p", "--prompt", help="An optional prompt to complete from")
-    sub.add_argument("--stream", help="Stream tokens as they're ready.", action="https://github.com/store_true")
-    sub.add_argument("-M", "--max-tokens", help="The maximum number of tokens to generate", type=int)
-    sub.add_argument(
-        "-t",
-        "--temperature",
-        help="""What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
-
-Mutually exclusive with `top_p`.""",
-        type=float,
-    )
-    sub.add_argument(
-        "-P",
-        "--top_p",
-        help="""An alternative to sampling with temperature, called nucleus sampling, where the considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%% probability mass are considered.
-
-            Mutually exclusive with `temperature`.""",
-        type=float,
-    )
-    sub.add_argument(
-        "-n",
-        "--n",
-        help="How many sub-completions to generate for each prompt.",
-        type=int,
-    )
-    sub.add_argument(
-        "--logprobs",
-        help="Include the log probabilities on the `logprobs` most likely tokens, as well the chosen tokens. So for example, if `logprobs` is 10, the API will return a list of the 10 most likely tokens. If `logprobs` is 0, only the chosen tokens will have logprobs returned.",
-        type=int,
-    )
-    sub.add_argument(
-        "--best_of",
-        help="Generates `best_of` completions server-side and returns the 'best' (the one with the highest log probability per token). Results cannot be streamed.",
-        type=int,
-    )
-    sub.add_argument(
-        "--echo",
-        help="Echo back the prompt in addition to the completion",
-        action="https://github.com/store_true",
-    )
-    sub.add_argument(
-        "--frequency_penalty",
-        help="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
-        type=float,
-    )
-    sub.add_argument(
-        "--presence_penalty",
-        help="Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
-        type=float,
-    )
-    sub.add_argument("--suffix", help="The suffix that comes after a completion of inserted text.")
-    sub.add_argument("--stop", help="A stop sequence at which to stop generating tokens.")
-    sub.add_argument(
-        "--user",
-        help="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.",
-    )
-    # TODO: add support for logit_bias
-    sub.set_defaults(func=CLICompletions.create, args_model=CLICompletionCreateArgs)
-
-
-class CLICompletionCreateArgs(BaseModel):
-    model: str
-    stream: bool = False
-
-    prompt: Optional[str] = None
-    n: NotGivenOr[int] = NOT_GIVEN
-    stop: NotGivenOr[str] = NOT_GIVEN
-    user: NotGivenOr[str] = NOT_GIVEN
-    echo: NotGivenOr[bool] = NOT_GIVEN
-    suffix: NotGivenOr[str] = NOT_GIVEN
-    best_of: NotGivenOr[int] = NOT_GIVEN
-    top_p: NotGivenOr[float] = NOT_GIVEN
-    logprobs: NotGivenOr[int] = NOT_GIVEN
-    max_tokens: NotGivenOr[int] = NOT_GIVEN
-    temperature: NotGivenOr[float] = NOT_GIVEN
-    presence_penalty: NotGivenOr[float] = NOT_GIVEN
-    frequency_penalty: NotGivenOr[float] = NOT_GIVEN
-
-
-class CLICompletions:
-    @staticmethod
-    def create(args: CLICompletionCreateArgs) -> None:
-        if is_given(args.n) and args.n > 1 and args.stream:
-            raise CLIError("Can't stream completions with n>1 with the current CLI")
-
-        make_request = partial(
-            get_client().completions.create,
-            n=args.n,
-            echo=args.echo,
-            stop=args.stop,
-            user=args.user,
-            model=args.model,
-            top_p=args.top_p,
-            prompt=args.prompt,
-            suffix=args.suffix,
-            best_of=args.best_of,
-            logprobs=args.logprobs,
-            max_tokens=args.max_tokens,
-            temperature=args.temperature,
-            presence_penalty=args.presence_penalty,
-            frequency_penalty=args.frequency_penalty,
-        )
-
-        if args.stream:
-            return CLICompletions._stream_create(
-                # mypy doesn't understand the `partial` function but pyright does
-                cast(Stream[Completion], make_request(stream=True))  # pyright: ignore[reportUnnecessaryCast]
-            )
-
-        return CLICompletions._create(make_request())
-
-    @staticmethod
-    def _create(completion: Completion) -> None:
-        should_print_header = len(completion.choices) > 1
-        for choice in completion.choices:
-            if should_print_header:
-                sys.stdout.write("===== Completion {} =====\n".format(choice.index))
-
-            sys.stdout.write(choice.text)
-
-            if should_print_header or not choice.text.endswith("\n"):
-                sys.stdout.write("\n")
-
-            sys.stdout.flush()
-
-    @staticmethod
-    def _stream_create(stream: Stream[Completion]) -> None:
-        for completion in stream:
-            should_print_header = len(completion.choices) > 1
-            for choice in sorted(completion.choices, key=lambda c: c.index):
-                if should_print_header:
-                    sys.stdout.write("===== Chat Completion {} =====\n".format(choice.index))
-
-                sys.stdout.write(choice.text)
-
-                if should_print_header:
-                    sys.stdout.write("\n")
-
-                sys.stdout.flush()
-
-        sys.stdout.write("\n")
diff --git a/src/openai/cli/_api/files.py b/src/openai/cli/_api/files.py
deleted file mode 100644
index 5f3631b284..0000000000
--- a/src/openai/cli/_api/files.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, cast
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from .._models import BaseModel
-from .._progress import BufferReader
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("files.create")
-
-    sub.add_argument(
-        "-f",
-        "--file",
-        required=True,
-        help="File to upload",
-    )
-    sub.add_argument(
-        "-p",
-        "--purpose",
-        help="Why are you uploading this file? (see https://platform.openai.com/docs/api-reference/ for purposes)",
-        required=True,
-    )
-    sub.set_defaults(func=CLIFile.create, args_model=CLIFileCreateArgs)
-
-    sub = subparser.add_parser("files.retrieve")
-    sub.add_argument("-i", "--id", required=True, help="The files ID")
-    sub.set_defaults(func=CLIFile.get, args_model=CLIFileCreateArgs)
-
-    sub = subparser.add_parser("files.delete")
-    sub.add_argument("-i", "--id", required=True, help="The files ID")
-    sub.set_defaults(func=CLIFile.delete, args_model=CLIFileCreateArgs)
-
-    sub = subparser.add_parser("files.list")
-    sub.set_defaults(func=CLIFile.list)
-
-
-class CLIFileIDArgs(BaseModel):
-    id: str
-
-
-class CLIFileCreateArgs(BaseModel):
-    file: str
-    purpose: str
-
-
-class CLIFile:
-    @staticmethod
-    def create(args: CLIFileCreateArgs) -> None:
-        with open(args.file, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        file = get_client().files.create(
-            file=(args.file, buffer_reader),
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            purpose=cast(Any, args.purpose),
-        )
-        print_model(file)
-
-    @staticmethod
-    def get(args: CLIFileIDArgs) -> None:
-        file = get_client().files.retrieve(file_id=args.id)
-        print_model(file)
-
-    @staticmethod
-    def delete(args: CLIFileIDArgs) -> None:
-        file = get_client().files.delete(file_id=args.id)
-        print_model(file)
-
-    @staticmethod
-    def list() -> None:
-        files = get_client().files.list()
-        for file in files:
-            print_model(file)
diff --git a/src/openai/cli/_api/image.py b/src/openai/cli/_api/image.py
deleted file mode 100644
index 3e2a0a90f1..0000000000
--- a/src/openai/cli/_api/image.py
+++ /dev/null
@@ -1,139 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, cast
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from ..._types import NOT_GIVEN, NotGiven, NotGivenOr
-from .._models import BaseModel
-from .._progress import BufferReader
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("images.generate")
-    sub.add_argument("-m", "--model", type=str)
-    sub.add_argument("-p", "--prompt", type=str, required=True)
-    sub.add_argument("-n", "--num-images", type=int, default=1)
-    sub.add_argument("-s", "--size", type=str, default="1024x1024", help="Size of the output image")
-    sub.add_argument("--response-format", type=str, default="url")
-    sub.set_defaults(func=CLIImage.create, args_model=CLIImageCreateArgs)
-
-    sub = subparser.add_parser("images.edit")
-    sub.add_argument("-m", "--model", type=str)
-    sub.add_argument("-p", "--prompt", type=str, required=True)
-    sub.add_argument("-n", "--num-images", type=int, default=1)
-    sub.add_argument(
-        "-I",
-        "--image",
-        type=str,
-        required=True,
-        help="Image to modify. Should be a local path and a PNG encoded image.",
-    )
-    sub.add_argument("-s", "--size", type=str, default="1024x1024", help="Size of the output image")
-    sub.add_argument("--response-format", type=str, default="url")
-    sub.add_argument(
-        "-M",
-        "--mask",
-        type=str,
-        required=False,
-        help="Path to a mask image. It should be the same size as the image you're editing and a RGBA PNG image. The Alpha channel acts as the mask.",
-    )
-    sub.set_defaults(func=CLIImage.edit, args_model=CLIImageEditArgs)
-
-    sub = subparser.add_parser("images.create_variation")
-    sub.add_argument("-m", "--model", type=str)
-    sub.add_argument("-n", "--num-images", type=int, default=1)
-    sub.add_argument(
-        "-I",
-        "--image",
-        type=str,
-        required=True,
-        help="Image to modify. Should be a local path and a PNG encoded image.",
-    )
-    sub.add_argument("-s", "--size", type=str, default="1024x1024", help="Size of the output image")
-    sub.add_argument("--response-format", type=str, default="url")
-    sub.set_defaults(func=CLIImage.create_variation, args_model=CLIImageCreateVariationArgs)
-
-
-class CLIImageCreateArgs(BaseModel):
-    prompt: str
-    num_images: int
-    size: str
-    response_format: str
-    model: NotGivenOr[str] = NOT_GIVEN
-
-
-class CLIImageCreateVariationArgs(BaseModel):
-    image: str
-    num_images: int
-    size: str
-    response_format: str
-    model: NotGivenOr[str] = NOT_GIVEN
-
-
-class CLIImageEditArgs(BaseModel):
-    image: str
-    num_images: int
-    size: str
-    response_format: str
-    prompt: str
-    mask: NotGivenOr[str] = NOT_GIVEN
-    model: NotGivenOr[str] = NOT_GIVEN
-
-
-class CLIImage:
-    @staticmethod
-    def create(args: CLIImageCreateArgs) -> None:
-        image = get_client().images.generate(
-            model=args.model,
-            prompt=args.prompt,
-            n=args.num_images,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            size=cast(Any, args.size),
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(image)
-
-    @staticmethod
-    def create_variation(args: CLIImageCreateVariationArgs) -> None:
-        with open(args.image, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        image = get_client().images.create_variation(
-            model=args.model,
-            image=("image", buffer_reader),
-            n=args.num_images,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            size=cast(Any, args.size),
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(image)
-
-    @staticmethod
-    def edit(args: CLIImageEditArgs) -> None:
-        with open(args.image, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Image upload progress")
-
-        if isinstance(args.mask, NotGiven):
-            mask: NotGivenOr[BufferReader] = NOT_GIVEN
-        else:
-            with open(args.mask, "rb") as file_reader:
-                mask = BufferReader(file_reader.read(), desc="Mask progress")
-
-        image = get_client().images.edit(
-            model=args.model,
-            prompt=args.prompt,
-            image=("image", buffer_reader),
-            n=args.num_images,
-            mask=("mask", mask) if not isinstance(mask, NotGiven) else mask,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            size=cast(Any, args.size),
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(image)
diff --git a/src/openai/cli/_api/models.py b/src/openai/cli/_api/models.py
deleted file mode 100644
index 017218fa6e..0000000000
--- a/src/openai/cli/_api/models.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from .._models import BaseModel
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("models.list")
-    sub.set_defaults(func=CLIModels.list)
-
-    sub = subparser.add_parser("models.retrieve")
-    sub.add_argument("-i", "--id", required=True, help="The model ID")
-    sub.set_defaults(func=CLIModels.get, args_model=CLIModelIDArgs)
-
-    sub = subparser.add_parser("models.delete")
-    sub.add_argument("-i", "--id", required=True, help="The model ID")
-    sub.set_defaults(func=CLIModels.delete, args_model=CLIModelIDArgs)
-
-
-class CLIModelIDArgs(BaseModel):
-    id: str
-
-
-class CLIModels:
-    @staticmethod
-    def get(args: CLIModelIDArgs) -> None:
-        model = get_client().models.retrieve(model=args.id)
-        print_model(model)
-
-    @staticmethod
-    def delete(args: CLIModelIDArgs) -> None:
-        model = get_client().models.delete(model=args.id)
-        print_model(model)
-
-    @staticmethod
-    def list() -> None:
-        models = get_client().models.list()
-        for model in models:
-            print_model(model)
diff --git a/src/openai/cli/_cli.py b/src/openai/cli/_cli.py
deleted file mode 100644
index 72e5c923bd..0000000000
--- a/src/openai/cli/_cli.py
+++ /dev/null
@@ -1,234 +0,0 @@
-from __future__ import annotations
-
-import sys
-import logging
-import argparse
-from typing import Any, List, Type, Optional
-from typing_extensions import ClassVar
-
-import httpx
-import pydantic
-
-import openai
-
-from . import _tools
-from .. import _ApiType, __version__
-from ._api import register_commands
-from ._utils import can_use_http2
-from .._types import ProxiesDict
-from ._errors import CLIError, display_error
-from .._compat import PYDANTIC_V2, ConfigDict, model_parse
-from .._models import BaseModel
-from .._exceptions import APIError
-
-logger = logging.getLogger()
-formatter = logging.Formatter("[%(asctime)s] %(message)s")
-handler = logging.StreamHandler(sys.stderr)
-handler.setFormatter(formatter)
-logger.addHandler(handler)
-
-
-class Arguments(BaseModel):
-    if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(
-            extra="ignore",
-        )
-    else:
-
-        class Config(pydantic.BaseConfig):  # type: ignore
-            extra: Any = pydantic.Extra.ignore  # type: ignore
-
-    verbosity: int
-    version: Optional[str] = None
-
-    api_key: Optional[str]
-    api_base: Optional[str]
-    organization: Optional[str]
-    proxy: Optional[List[str]]
-    api_type: Optional[_ApiType] = None
-    api_version: Optional[str] = None
-
-    # azure
-    azure_endpoint: Optional[str] = None
-    azure_ad_token: Optional[str] = None
-
-    # internal, set by subparsers to parse their specific args
-    args_model: Optional[Type[BaseModel]] = None
-
-    # internal, used so that subparsers can forward unknown arguments
-    unknown_args: List[str] = []
-    allow_unknown_args: bool = False
-
-
-def _build_parser() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser(description=None, prog="openai")
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        action="https://github.com/count",
-        dest="verbosity",
-        default=0,
-        help="Set verbosity.",
-    )
-    parser.add_argument("-b", "--api-base", help="What API base url to use.")
-    parser.add_argument("-k", "--api-key", help="What API key to use.")
-    parser.add_argument("-p", "--proxy", nargs="+", help="What proxy to use.")
-    parser.add_argument(
-        "-o",
-        "--organization",
-        help="Which organization to run as (will use your default organization if not specified)",
-    )
-    parser.add_argument(
-        "-t",
-        "--api-type",
-        type=str,
-        choices=("openai", "azure"),
-        help="The backend API to call, must be `openai` or `azure`",
-    )
-    parser.add_argument(
-        "--api-version",
-        help="The Azure API version, e.g. 'https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning'",
-    )
-
-    # azure
-    parser.add_argument(
-        "--azure-endpoint",
-        help="The Azure endpoint, e.g. 'https://endpoint.openai.azure.com/'",
-    )
-    parser.add_argument(
-        "--azure-ad-token",
-        help="A token from Azure Active Directory, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id",
-    )
-
-    # prints the package version
-    parser.add_argument(
-        "-V",
-        "--version",
-        action="https://github.com/version",
-        version="%(prog)s " + __version__,
-    )
-
-    def help() -> None:
-        parser.print_help()
-
-    parser.set_defaults(func=help)
-
-    subparsers = parser.add_subparsers()
-    sub_api = subparsers.add_parser("api", help="Direct API calls")
-
-    register_commands(sub_api)
-
-    sub_tools = subparsers.add_parser("tools", help="Client side tools for convenience")
-    _tools.register_commands(sub_tools, subparsers)
-
-    return parser
-
-
-def main() -> int:
-    try:
-        _main()
-    except (APIError, CLIError, pydantic.ValidationError) as err:
-        display_error(err)
-        return 1
-    except KeyboardInterrupt:
-        sys.stderr.write("\n")
-        return 1
-    return 0
-
-
-def _parse_args(parser: argparse.ArgumentParser) -> tuple[argparse.Namespace, Arguments, list[str]]:
-    # argparse by default will strip out the `--` but we want to keep it for unknown arguments
-    if "--" in sys.argv:
-        idx = sys.argv.index("--")
-        known_args = sys.argv[1:idx]
-        unknown_args = sys.argv[idx:]
-    else:
-        known_args = sys.argv[1:]
-        unknown_args = []
-
-    parsed, remaining_unknown = parser.parse_known_args(known_args)
-
-    # append any remaining unknown arguments from the initial parsing
-    remaining_unknown.extend(unknown_args)
-
-    args = model_parse(Arguments, vars(parsed))
-    if not args.allow_unknown_args:
-        # we have to parse twice to ensure any unknown arguments
-        # result in an error if that behaviour is desired
-        parser.parse_args()
-
-    return parsed, args, remaining_unknown
-
-
-def _main() -> None:
-    parser = _build_parser()
-    parsed, args, unknown = _parse_args(parser)
-
-    if args.verbosity != 0:
-        sys.stderr.write("Warning: --verbosity isn't supported yet\n")
-
-    proxies: ProxiesDict = {}
-    if args.proxy is not None:
-        for proxy in args.proxy:
-            key = "https://" if proxy.startswith("https") else "http://"
-            if key in proxies:
-                raise CLIError(f"Multiple {key} proxies given - only the last one would be used")
-
-            proxies[key] = proxy
-
-    http_client = httpx.Client(
-        proxies=proxies or None,
-        http2=can_use_http2(),
-    )
-    openai.http_client = http_client
-
-    if args.organization:
-        openai.organization = args.organization
-
-    if args.api_key:
-        openai.api_key = args.api_key
-
-    if args.api_base:
-        openai.base_url = args.api_base
-
-    # azure
-    if args.api_type is not None:
-        openai.api_type = args.api_type
-
-    if args.azure_endpoint is not None:
-        openai.azure_endpoint = args.azure_endpoint
-
-    if args.api_version is not None:
-        openai.api_version = args.api_version
-
-    if args.azure_ad_token is not None:
-        openai.azure_ad_token = args.azure_ad_token
-
-    try:
-        if args.args_model:
-            parsed.func(
-                model_parse(
-                    args.args_model,
-                    {
-                        **{
-                            # we omit None values so that they can be defaulted to `NotGiven`
-                            # and we'll strip it from the API request
-                            key: value
-                            for key, value in vars(parsed).items()
-                            if value is not None
-                        },
-                        "unknown_args": unknown,
-                    },
-                )
-            )
-        else:
-            parsed.func()
-    finally:
-        try:
-            http_client.close()
-        except Exception:
-            pass
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/src/openai/cli/_errors.py b/src/openai/cli/_errors.py
deleted file mode 100644
index 2bf06070d6..0000000000
--- a/src/openai/cli/_errors.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-import sys
-
-import pydantic
-
-from ._utils import Colors, organization_info
-from .._exceptions import APIError, OpenAIError
-
-
-class CLIError(OpenAIError):
-    ...
-
-
-class SilentCLIError(CLIError):
-    ...
-
-
-def display_error(err: CLIError | APIError | pydantic.ValidationError) -> None:
-    if isinstance(err, SilentCLIError):
-        return
-
-    sys.stderr.write("{}{}Error:{} {}\n".format(organization_info(), Colors.FAIL, Colors.ENDC, err))
diff --git a/src/openai/cli/_models.py b/src/openai/cli/_models.py
deleted file mode 100644
index 5583db2609..0000000000
--- a/src/openai/cli/_models.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from typing import Any
-from typing_extensions import ClassVar
-
-import pydantic
-
-from .. import _models
-from .._compat import PYDANTIC_V2, ConfigDict
-
-
-class BaseModel(_models.BaseModel):
-    if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(extra="ignore", arbitrary_types_allowed=True)
-    else:
-
-        class Config(pydantic.BaseConfig):  # type: ignore
-            extra: Any = pydantic.Extra.ignore  # type: ignore
-            arbitrary_types_allowed: bool = True
diff --git a/src/openai/cli/_progress.py b/src/openai/cli/_progress.py
deleted file mode 100644
index 8a7f2525de..0000000000
--- a/src/openai/cli/_progress.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from __future__ import annotations
-
-import io
-from typing import Callable
-from typing_extensions import override
-
-
-class CancelledError(Exception):
-    def __init__(self, msg: str) -> None:
-        self.msg = msg
-        super().__init__(msg)
-
-    @override
-    def __str__(self) -> str:
-        return self.msg
-
-    __repr__ = __str__
-
-
-class BufferReader(io.BytesIO):
-    def __init__(self, buf: bytes = b"", desc: str | None = None) -> None:
-        super().__init__(buf)
-        self._len = len(buf)
-        self._progress = 0
-        self._callback = progress(len(buf), desc=desc)
-
-    def __len__(self) -> int:
-        return self._len
-
-    @override
-    def read(self, n: int | None = -1) -> bytes:
-        chunk = io.BytesIO.read(self, n)
-        self._progress += len(chunk)
-
-        try:
-            self._callback(self._progress)
-        except Exception as e:  # catches exception from the callback
-            raise CancelledError("The upload was cancelled: {}".format(e)) from e
-
-        return chunk
-
-
-def progress(total: float, desc: str | None) -> Callable[[float], None]:
-    import tqdm
-
-    meter = tqdm.tqdm(total=total, unit_scale=True, desc=desc)
-
-    def incr(progress: float) -> None:
-        meter.n = progress
-        if progress == total:
-            meter.close()
-        else:
-            meter.refresh()
-
-    return incr
-
-
-def MB(i: int) -> int:
-    return int(i // 1024**2)
diff --git a/src/openai/cli/_tools/__init__.py b/src/openai/cli/_tools/__init__.py
deleted file mode 100644
index 56a0260a6d..0000000000
--- a/src/openai/cli/_tools/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from ._main import register_commands as register_commands
diff --git a/src/openai/cli/_tools/_main.py b/src/openai/cli/_tools/_main.py
deleted file mode 100644
index bd6cda408f..0000000000
--- a/src/openai/cli/_tools/_main.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from . import migrate, fine_tunes
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register_commands(parser: ArgumentParser, subparser: _SubParsersAction[ArgumentParser]) -> None:
-    migrate.register(subparser)
-
-    namespaced = parser.add_subparsers(title="Tools", help="Convenience client side tools")
-
-    fine_tunes.register(namespaced)
diff --git a/src/openai/cli/_tools/fine_tunes.py b/src/openai/cli/_tools/fine_tunes.py
deleted file mode 100644
index 2128b88952..0000000000
--- a/src/openai/cli/_tools/fine_tunes.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import annotations
-
-import sys
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from .._models import BaseModel
-from ...lib._validators import (
-    get_validators,
-    write_out_file,
-    read_any_format,
-    apply_validators,
-    apply_necessary_remediation,
-)
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("fine_tunes.prepare_data")
-    sub.add_argument(
-        "-f",
-        "--file",
-        required=True,
-        help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing prompt-completion examples to be analyzed."
-        "This should be the local file path.",
-    )
-    sub.add_argument(
-        "-q",
-        "--quiet",
-        required=False,
-        action="https://github.com/store_true",
-        help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
-    )
-    sub.set_defaults(func=prepare_data, args_model=PrepareDataArgs)
-
-
-class PrepareDataArgs(BaseModel):
-    file: str
-
-    quiet: bool
-
-
-def prepare_data(args: PrepareDataArgs) -> None:
-    sys.stdout.write("Analyzing...\n")
-    fname = args.file
-    auto_accept = args.quiet
-    df, remediation = read_any_format(fname)
-    apply_necessary_remediation(None, remediation)
-
-    validators = get_validators()
-
-    assert df is not None
-
-    apply_validators(
-        df,
-        fname,
-        remediation,
-        validators,
-        auto_accept,
-        write_out_file_func=write_out_file,
-    )
diff --git a/src/openai/cli/_tools/migrate.py b/src/openai/cli/_tools/migrate.py
deleted file mode 100644
index 53073b866f..0000000000
--- a/src/openai/cli/_tools/migrate.py
+++ /dev/null
@@ -1,181 +0,0 @@
-from __future__ import annotations
-
-import os
-import sys
-import json
-import shutil
-import tarfile
-import platform
-import subprocess
-from typing import TYPE_CHECKING, List
-from pathlib import Path
-from argparse import ArgumentParser
-
-import httpx
-
-from .._errors import CLIError, SilentCLIError
-from .._models import BaseModel
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("migrate")
-    sub.set_defaults(func=migrate, args_model=MigrateArgs, allow_unknown_args=True)
-
-    sub = subparser.add_parser("grit")
-    sub.set_defaults(func=grit, args_model=GritArgs, allow_unknown_args=True)
-
-
-class GritArgs(BaseModel):
-    # internal
-    unknown_args: List[str] = []
-
-
-def grit(args: GritArgs) -> None:
-    grit_path = install()
-
-    try:
-        subprocess.check_call([grit_path, *args.unknown_args])
-    except subprocess.CalledProcessError:
-        # stdout and stderr are forwarded by subprocess so an error will already
-        # have been displayed
-        raise SilentCLIError() from None
-
-
-class MigrateArgs(BaseModel):
-    # internal
-    unknown_args: List[str] = []
-
-
-def migrate(args: MigrateArgs) -> None:
-    grit_path = install()
-
-    try:
-        subprocess.check_call([grit_path, "apply", "openai", *args.unknown_args])
-    except subprocess.CalledProcessError:
-        # stdout and stderr are forwarded by subprocess so an error will already
-        # have been displayed
-        raise SilentCLIError() from None
-
-
-# handles downloading the Grit CLI until they provide their own PyPi package
-
-KEYGEN_ACCOUNT = "custodian-dev"
-
-
-def _cache_dir() -> Path:
-    xdg = os.environ.get("XDG_CACHE_HOME")
-    if xdg is not None:
-        return Path(xdg)
-
-    return Path.home() / ".cache"
-
-
-def _debug(message: str) -> None:
-    if not os.environ.get("DEBUG"):
-        return
-
-    sys.stdout.write(f"[DEBUG]: {message}\n")
-
-
-def install() -> Path:
-    """Installs the Grit CLI and returns the location of the binary"""
-    if sys.platform == "win32":
-        raise CLIError("Windows is not supported yet in the migration CLI")
-
-    platform = "macos" if sys.platform == "darwin" else "linux"
-
-    dir_name = _cache_dir() / "openai-python"
-    install_dir = dir_name / ".install"
-    target_dir = install_dir / "bin"
-
-    target_path = target_dir / "marzano"
-    temp_file = target_dir / "marzano.tmp"
-
-    if target_path.exists():
-        _debug(f"{target_path} already exists")
-        sys.stdout.flush()
-        return target_path
-
-    _debug(f"Using Grit CLI path: {target_path}")
-
-    target_dir.mkdir(parents=True, exist_ok=True)
-
-    if temp_file.exists():
-        temp_file.unlink()
-
-    arch = _get_arch()
-    _debug(f"Using architecture {arch}")
-
-    file_name = f"marzano-{platform}-{arch}"
-    meta_url = f"https://api.keygen.sh/v1/accounts/%7BKEYGEN_ACCOUNT%7D/artifacts/%7Bfile_name%7D"
-
-    sys.stdout.write(f"Retrieving Grit CLI metadata from {meta_url}\n")
-    with httpx.Client() as client:
-        response = client.get(meta_url)  # pyright: ignore[reportUnknownMemberType]
-
-        data = response.json()
-        errors = data.get("errors")
-        if errors:
-            for error in errors:
-                sys.stdout.write(f"{error}\n")
-
-            raise CLIError("Could not locate Grit CLI binary - see above errors")
-
-        write_manifest(install_dir, data["data"]["relationships"]["release"]["data"]["id"])
-
-        link = data["data"]["links"]["redirect"]
-        _debug(f"Redirect URL {link}")
-
-        download_response = client.get(link)  # pyright: ignore[reportUnknownMemberType]
-        with open(temp_file, "wb") as file:
-            for chunk in download_response.iter_bytes():
-                file.write(chunk)
-
-    unpacked_dir = target_dir / "cli-bin"
-    unpacked_dir.mkdir(parents=True, exist_ok=True)
-
-    with tarfile.open(temp_file, "r:gz") as archive:
-        archive.extractall(unpacked_dir, filter="data")
-
-    for item in unpacked_dir.iterdir():
-        item.rename(target_dir / item.name)
-
-    shutil.rmtree(unpacked_dir)
-    os.remove(temp_file)
-    os.chmod(target_path, 0o755)
-
-    sys.stdout.flush()
-
-    return target_path
-
-
-def _get_arch() -> str:
-    architecture = platform.machine().lower()
-
-    # Map the architecture names to Node.js equivalents
-    arch_map = {
-        "x86_64": "x64",
-        "amd64": "x64",
-        "armv7l": "arm",
-        "aarch64": "arm64",
-    }
-
-    return arch_map.get(architecture, architecture)
-
-
-def write_manifest(install_path: Path, release: str) -> None:
-    manifest = {
-        "installPath": str(install_path),
-        "binaries": {
-            "marzano": {
-                "name": "marzano",
-                "release": release,
-            },
-        },
-    }
-    manifest_path = Path(install_path) / "manifests.json"
-    with open(manifest_path, "w") as f:
-        json.dump(manifest, f, indent=2)
diff --git a/src/openai/cli/_utils.py b/src/openai/cli/_utils.py
deleted file mode 100644
index 673eed613c..0000000000
--- a/src/openai/cli/_utils.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from __future__ import annotations
-
-import sys
-
-import openai
-
-from .. import OpenAI, _load_client
-from .._compat import model_json
-from .._models import BaseModel
-
-
-class Colors:
-    HEADER = "\033[95m"
-    OKBLUE = "\033[94m"
-    OKGREEN = "\033[92m"
-    WARNING = "\033[93m"
-    FAIL = "\033[91m"
-    ENDC = "\033[0m"
-    BOLD = "\033[1m"
-    UNDERLINE = "\033[4m"
-
-
-def get_client() -> OpenAI:
-    return _load_client()
-
-
-def organization_info() -> str:
-    organization = openai.organization
-    if organization is not None:
-        return "[organization={}] ".format(organization)
-
-    return ""
-
-
-def print_model(model: BaseModel) -> None:
-    sys.stdout.write(model_json(model, indent=2) + "\n")
-
-
-def can_use_http2() -> bool:
-    try:
-        import h2  # type: ignore  # noqa
-    except ImportError:
-        return False
-
-    return True
diff --git a/src/openai/lib/_old_api.py b/src/openai/lib/_old_api.py
deleted file mode 100644
index 929c87e80b..0000000000
--- a/src/openai/lib/_old_api.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-from typing_extensions import override
-
-from .._utils import LazyProxy
-from .._exceptions import OpenAIError
-
-INSTRUCTIONS = """
-
-You tried to access openai.{symbol}, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.
-
-You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 
-
-Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`
-
-A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
-"""
-
-
-class APIRemovedInV1(OpenAIError):
-    def __init__(self, *, symbol: str) -> None:
-        super().__init__(INSTRUCTIONS.format(symbol=symbol))
-
-
-class APIRemovedInV1Proxy(LazyProxy[Any]):
-    def __init__(self, *, symbol: str) -> None:
-        super().__init__()
-        self._symbol = symbol
-
-    @override
-    def __load__(self) -> Any:
-        # return the proxy until it is eventually called so that
-        # we don't break people that are just checking the attributes
-        # of a module
-        return self
-
-    def __call__(self, *_args: Any, **_kwargs: Any) -> Any:
-        raise APIRemovedInV1(symbol=self._symbol)
-
-
-SYMBOLS = [
-    "Edit",
-    "File",
-    "Audio",
-    "Image",
-    "Model",
-    "Engine",
-    "Customer",
-    "FineTune",
-    "Embedding",
-    "Completion",
-    "Deployment",
-    "Moderation",
-    "ErrorObject",
-    "FineTuningJob",
-    "ChatCompletion",
-]
-
-# we explicitly tell type checkers that nothing is exported
-# from this file so that when we re-export the old symbols
-# in `openai/__init__.py` they aren't added to the auto-complete
-# suggestions given by editors
-if TYPE_CHECKING:
-    __all__: list[str] = []
-else:
-    __all__ = SYMBOLS
-
-
-__locals = locals()
-for symbol in SYMBOLS:
-    __locals[symbol] = APIRemovedInV1Proxy(symbol=symbol)
diff --git a/src/openai/lib/_validators.py b/src/openai/lib/_validators.py
deleted file mode 100644
index cf24cd2294..0000000000
--- a/src/openai/lib/_validators.py
+++ /dev/null
@@ -1,809 +0,0 @@
-# pyright: basic
-from __future__ import annotations
-
-import os
-import sys
-from typing import Any, TypeVar, Callable, Optional, NamedTuple
-from typing_extensions import TypeAlias
-
-from .._extras import pandas as pd
-
-
-class Remediation(NamedTuple):
-    name: str
-    immediate_msg: Optional[str] = None
-    necessary_msg: Optional[str] = None
-    necessary_fn: Optional[Callable[[Any], Any]] = None
-    optional_msg: Optional[str] = None
-    optional_fn: Optional[Callable[[Any], Any]] = None
-    error_msg: Optional[str] = None
-
-
-OptionalDataFrameT = TypeVar("OptionalDataFrameT", bound="Optional[pd.DataFrame]")
-
-
-def num_examples_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will only print out the number of examples and recommend to the user to increase the number of examples if less than 100.
-    """
-    MIN_EXAMPLES = 100
-    optional_suggestion = (
-        ""
-        if len(df) >= MIN_EXAMPLES
-        else ". In general, we recommend having at least a few hundred examples. We've found that performance tends to linearly increase for every doubling of the number of examples"
-    )
-    immediate_msg = f"\n- Your file contains {len(df)} prompt-completion pairs{optional_suggestion}"
-    return Remediation(name="num_examples", immediate_msg=immediate_msg)
-
-
-def necessary_column_validator(df: pd.DataFrame, necessary_column: str) -> Remediation:
-    """
-    This validator will ensure that the necessary column is present in the dataframe.
-    """
-
-    def lower_case_column(df: pd.DataFrame, column: Any) -> pd.DataFrame:
-        cols = [c for c in df.columns if str(c).lower() == column]
-        df.rename(columns={cols[0]: column.lower()}, inplace=True)
-        return df
-
-    immediate_msg = None
-    necessary_fn = None
-    necessary_msg = None
-    error_msg = None
-
-    if necessary_column not in df.columns:
-        if necessary_column in [str(c).lower() for c in df.columns]:
-
-            def lower_case_column_creator(df: pd.DataFrame) -> pd.DataFrame:
-                return lower_case_column(df, necessary_column)
-
-            necessary_fn = lower_case_column_creator
-            immediate_msg = f"\n- The `{necessary_column}` column/key should be lowercase"
-            necessary_msg = f"Lower case column name to `{necessary_column}`"
-        else:
-            error_msg = f"`{necessary_column}` column/key is missing. Please make sure you name your columns/keys appropriately, then retry"
-
-    return Remediation(
-        name="necessary_column",
-        immediate_msg=immediate_msg,
-        necessary_msg=necessary_msg,
-        necessary_fn=necessary_fn,
-        error_msg=error_msg,
-    )
-
-
-def additional_column_validator(df: pd.DataFrame, fields: list[str] = ["prompt", "completion"]) -> Remediation:
-    """
-    This validator will remove additional columns from the dataframe.
-    """
-    additional_columns = []
-    necessary_msg = None
-    immediate_msg = None
-    necessary_fn = None  # type: ignore
-
-    if len(df.columns) > 2:
-        additional_columns = [c for c in df.columns if c not in fields]
-        warn_message = ""
-        for ac in additional_columns:
-            dups = [c for c in additional_columns if ac in c]
-            if len(dups) > 0:
-                warn_message += f"\n  WARNING: Some of the additional columns/keys contain `{ac}` in their name. These will be ignored, and the column/key `{ac}` will be used instead. This could also result from a duplicate column/key in the provided file."
-        immediate_msg = f"\n- The input file should contain exactly two columns/keys per row. Additional columns/keys present are: {additional_columns}{warn_message}"
-        necessary_msg = f"Remove additional columns/keys: {additional_columns}"
-
-        def necessary_fn(x: Any) -> Any:
-            return x[fields]
-
-    return Remediation(
-        name="additional_column",
-        immediate_msg=immediate_msg,
-        necessary_msg=necessary_msg,
-        necessary_fn=necessary_fn,
-    )
-
-
-def non_empty_field_validator(df: pd.DataFrame, field: str = "completion") -> Remediation:
-    """
-    This validator will ensure that no completion is empty.
-    """
-    necessary_msg = None
-    necessary_fn = None  # type: ignore
-    immediate_msg = None
-
-    if df[field].apply(lambda x: x == "").any() or df[field].isnull().any():
-        empty_rows = (df[field] == "") | (df[field].isnull())
-        empty_indexes = df.reset_index().index[empty_rows].tolist()
-        immediate_msg = f"\n- `{field}` column/key should not contain empty strings. These are rows: {empty_indexes}"
-
-        def necessary_fn(x: Any) -> Any:
-            return x[x[field] != ""].dropna(subset=[field])
-
-        necessary_msg = f"Remove {len(empty_indexes)} rows with empty {field}s"
-
-    return Remediation(
-        name=f"empty_{field}",
-        immediate_msg=immediate_msg,
-        necessary_msg=necessary_msg,
-        necessary_fn=necessary_fn,
-    )
-
-
-def duplicated_rows_validator(df: pd.DataFrame, fields: list[str] = ["prompt", "completion"]) -> Remediation:
-    """
-    This validator will suggest to the user to remove duplicate rows if they exist.
-    """
-    duplicated_rows = df.duplicated(subset=fields)
-    duplicated_indexes = df.reset_index().index[duplicated_rows].tolist()
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    if len(duplicated_indexes) > 0:
-        immediate_msg = f"\n- There are {len(duplicated_indexes)} duplicated {'-'.join(fields)} sets. These are rows: {duplicated_indexes}"
-        optional_msg = f"Remove {len(duplicated_indexes)} duplicate rows"
-
-        def optional_fn(x: Any) -> Any:
-            return x.drop_duplicates(subset=fields)
-
-    return Remediation(
-        name="duplicated_rows",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def long_examples_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to the user to remove examples that are too long.
-    """
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    ft_type = infer_task_type(df)
-    if ft_type != "open-ended generation":
-
-        def get_long_indexes(d: pd.DataFrame) -> Any:
-            long_examples = d.apply(lambda x: len(x.prompt) + len(x.completion) > 10000, axis=1)
-            return d.reset_index().index[long_examples].tolist()
-
-        long_indexes = get_long_indexes(df)
-
-        if len(long_indexes) > 0:
-            immediate_msg = f"\n- There are {len(long_indexes)} examples that are very long. These are rows: {long_indexes}\nFor conditional generation, and for classification the examples shouldn't be longer than 2048 tokens."
-            optional_msg = f"Remove {len(long_indexes)} long examples"
-
-            def optional_fn(x: Any) -> Any:
-                long_indexes_to_drop = get_long_indexes(x)
-                if long_indexes != long_indexes_to_drop:
-                    sys.stdout.write(
-                        f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n"
-                    )
-                return x.drop(long_indexes_to_drop)
-
-    return Remediation(
-        name="long_examples",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def common_prompt_suffix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to add a common suffix to the prompt if one doesn't already exist in case of classification or conditional generation.
-    """
-    error_msg = None
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    # Find a suffix which is not contained within the prompt otherwise
-    suggested_suffix = "\n\n### =>\n\n"
-    suffix_options = [
-        " ->",
-        "\n\n###\n\n",
-        "\n\n===\n\n",
-        "\n\n---\n\n",
-        "\n\n===>\n\n",
-        "\n\n--->\n\n",
-    ]
-    for suffix_option in suffix_options:
-        if suffix_option == " ->":
-            if df.prompt.str.contains("\n").any():
-                continue
-        if df.prompt.str.contains(suffix_option, regex=False).any():
-            continue
-        suggested_suffix = suffix_option
-        break
-    display_suggested_suffix = suggested_suffix.replace("\n", "\\n")
-
-    ft_type = infer_task_type(df)
-    if ft_type == "open-ended generation":
-        return Remediation(name="common_suffix")
-
-    def add_suffix(x: Any, suffix: Any) -> Any:
-        x["prompt"] += suffix
-        return x
-
-    common_suffix = get_common_xfix(df.prompt, xfix="suffix")
-    if (df.prompt == common_suffix).all():
-        error_msg = f"All prompts are identical: `{common_suffix}`\nConsider leaving the prompts blank if you want to do open-ended generation, otherwise ensure prompts are different"
-        return Remediation(name="common_suffix", error_msg=error_msg)
-
-    if common_suffix != "":
-        common_suffix_new_line_handled = common_suffix.replace("\n", "\\n")
-        immediate_msg = f"\n- All prompts end with suffix `{common_suffix_new_line_handled}`"
-        if len(common_suffix) > 10:
-            immediate_msg += f". This suffix seems very long. Consider replacing with a shorter suffix, such as `{display_suggested_suffix}`"
-        if df.prompt.str[: -len(common_suffix)].str.contains(common_suffix, regex=False).any():
-            immediate_msg += f"\n  WARNING: Some of your prompts contain the suffix `{common_suffix}` more than once. We strongly suggest that you review your prompts and add a unique suffix"
-
-    else:
-        immediate_msg = "\n- Your data does not contain a common separator at the end of your prompts. Having a separator string appended to the end of the prompt makes it clearer to the fine-tuned model where the completion should begin. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples. If you intend to do open-ended generation, then you should leave the prompts empty"
-
-    if common_suffix == "":
-        optional_msg = f"Add a suffix separator `{display_suggested_suffix}` to all prompts"
-
-        def optional_fn(x: Any) -> Any:
-            return add_suffix(x, suggested_suffix)
-
-    return Remediation(
-        name="common_completion_suffix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-        error_msg=error_msg,
-    )
-
-
-def common_prompt_prefix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to remove a common prefix from the prompt if a long one exist.
-    """
-    MAX_PREFIX_LEN = 12
-
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    common_prefix = get_common_xfix(df.prompt, xfix="prefix")
-    if common_prefix == "":
-        return Remediation(name="common_prefix")
-
-    def remove_common_prefix(x: Any, prefix: Any) -> Any:
-        x["prompt"] = x["prompt"].str[len(prefix) :]
-        return x
-
-    if (df.prompt == common_prefix).all():
-        # already handled by common_suffix_validator
-        return Remediation(name="common_prefix")
-
-    if common_prefix != "":
-        immediate_msg = f"\n- All prompts start with prefix `{common_prefix}`"
-        if MAX_PREFIX_LEN < len(common_prefix):
-            immediate_msg += ". Fine-tuning doesn't require the instruction specifying the task, or a few-shot example scenario. Most of the time you should only add the input data into the prompt, and the desired output into the completion"
-            optional_msg = f"Remove prefix `{common_prefix}` from all prompts"
-
-            def optional_fn(x: Any) -> Any:
-                return remove_common_prefix(x, common_prefix)
-
-    return Remediation(
-        name="common_prompt_prefix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def common_completion_prefix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to remove a common prefix from the completion if a long one exist.
-    """
-    MAX_PREFIX_LEN = 5
-
-    common_prefix = get_common_xfix(df.completion, xfix="prefix")
-    ws_prefix = len(common_prefix) > 0 and common_prefix[0] == " "
-    if len(common_prefix) < MAX_PREFIX_LEN:
-        return Remediation(name="common_prefix")
-
-    def remove_common_prefix(x: Any, prefix: Any, ws_prefix: Any) -> Any:
-        x["completion"] = x["completion"].str[len(prefix) :]
-        if ws_prefix:
-            # keep the single whitespace as prefix
-            x["completion"] = f" {x['completion']}"
-        return x
-
-    if (df.completion == common_prefix).all():
-        # already handled by common_suffix_validator
-        return Remediation(name="common_prefix")
-
-    immediate_msg = f"\n- All completions start with prefix `{common_prefix}`. Most of the time you should only add the output data into the completion, without any prefix"
-    optional_msg = f"Remove prefix `{common_prefix}` from all completions"
-
-    def optional_fn(x: Any) -> Any:
-        return remove_common_prefix(x, common_prefix, ws_prefix)
-
-    return Remediation(
-        name="common_completion_prefix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def common_completion_suffix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to add a common suffix to the completion if one doesn't already exist in case of classification or conditional generation.
-    """
-    error_msg = None
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    ft_type = infer_task_type(df)
-    if ft_type == "open-ended generation" or ft_type == "classification":
-        return Remediation(name="common_suffix")
-
-    common_suffix = get_common_xfix(df.completion, xfix="suffix")
-    if (df.completion == common_suffix).all():
-        error_msg = f"All completions are identical: `{common_suffix}`\nEnsure completions are different, otherwise the model will just repeat `{common_suffix}`"
-        return Remediation(name="common_suffix", error_msg=error_msg)
-
-    # Find a suffix which is not contained within the completion otherwise
-    suggested_suffix = " [END]"
-    suffix_options = [
-        "\n",
-        ".",
-        " END",
-        "***",
-        "+++",
-        "&&&",
-        "$$$",
-        "@@@",
-        "%%%",
-    ]
-    for suffix_option in suffix_options:
-        if df.completion.str.contains(suffix_option, regex=False).any():
-            continue
-        suggested_suffix = suffix_option
-        break
-    display_suggested_suffix = suggested_suffix.replace("\n", "\\n")
-
-    def add_suffix(x: Any, suffix: Any) -> Any:
-        x["completion"] += suffix
-        return x
-
-    if common_suffix != "":
-        common_suffix_new_line_handled = common_suffix.replace("\n", "\\n")
-        immediate_msg = f"\n- All completions end with suffix `{common_suffix_new_line_handled}`"
-        if len(common_suffix) > 10:
-            immediate_msg += f". This suffix seems very long. Consider replacing with a shorter suffix, such as `{display_suggested_suffix}`"
-        if df.completion.str[: -len(common_suffix)].str.contains(common_suffix, regex=False).any():
-            immediate_msg += f"\n  WARNING: Some of your completions contain the suffix `{common_suffix}` more than once. We suggest that you review your completions and add a unique ending"
-
-    else:
-        immediate_msg = "\n- Your data does not contain a common ending at the end of your completions. Having a common ending string appended to the end of the completion makes it clearer to the fine-tuned model where the completion should end. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples."
-
-    if common_suffix == "":
-        optional_msg = f"Add a suffix ending `{display_suggested_suffix}` to all completions"
-
-        def optional_fn(x: Any) -> Any:
-            return add_suffix(x, suggested_suffix)
-
-    return Remediation(
-        name="common_completion_suffix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-        error_msg=error_msg,
-    )
-
-
-def completions_space_start_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to add a space at the start of the completion if it doesn't already exist. This helps with tokenization.
-    """
-
-    def add_space_start(x: Any) -> Any:
-        x["completion"] = x["completion"].apply(lambda s: ("" if s.startswith(" ") else " ") + s)
-        return x
-
-    optional_msg = None
-    optional_fn = None
-    immediate_msg = None
-
-    if df.completion.str[:1].nunique() != 1 or df.completion.values[0][0] != " ":
-        immediate_msg = "\n- The completion should start with a whitespace character (` `). This tends to produce better results due to the tokenization we use. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more details"
-        optional_msg = "Add a whitespace character to the beginning of the completion"
-        optional_fn = add_space_start
-    return Remediation(
-        name="completion_space_start",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def lower_case_validator(df: pd.DataFrame, column: Any) -> Remediation | None:
-    """
-    This validator will suggest to lowercase the column values, if more than a third of letters are uppercase.
-    """
-
-    def lower_case(x: Any) -> Any:
-        x[column] = x[column].str.lower()
-        return x
-
-    count_upper = df[column].apply(lambda x: sum(1 for c in x if c.isalpha() and c.isupper())).sum()
-    count_lower = df[column].apply(lambda x: sum(1 for c in x if c.isalpha() and c.islower())).sum()
-
-    if count_upper * 2 > count_lower:
-        return Remediation(
-            name="lower_case",
-            immediate_msg=f"\n- More than a third of your `{column}` column/key is uppercase. Uppercase {column}s tends to perform worse than a mixture of case encountered in normal language. We recommend to lower case the data if that makes sense in your domain. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more details",
-            optional_msg=f"Lowercase all your data in column/key `{column}`",
-            optional_fn=lower_case,
-        )
-    return None
-
-
-def read_any_format(
-    fname: str, fields: list[str] = ["prompt", "completion"]
-) -> tuple[pd.DataFrame | None, Remediation]:
-    """
-    This function will read a file saved in .csv, .json, .txt, .xlsx or .tsv format using pandas.
-     - for .xlsx it will read the first sheet
-     - for .txt it will assume completions and split on newline
-    """
-    remediation = None
-    necessary_msg = None
-    immediate_msg = None
-    error_msg = None
-    df = None
-
-    if os.path.isfile(fname):
-        try:
-            if fname.lower().endswith(".csv") or fname.lower().endswith(".tsv"):
-                file_extension_str, separator = ("CSV", ",") if fname.lower().endswith(".csv") else ("TSV", "\t")
-                immediate_msg = (
-                    f"\n- Based on your file extension, your file is formatted as a {file_extension_str} file"
-                )
-                necessary_msg = f"Your format `{file_extension_str}` will be converted to `JSONL`"
-                df = pd.read_csv(fname, sep=separator, dtype=str).fillna("")
-            elif fname.lower().endswith(".xlsx"):
-                immediate_msg = "\n- Based on your file extension, your file is formatted as an Excel file"
-                necessary_msg = "Your format `XLSX` will be converted to `JSONL`"
-                xls = pd.ExcelFile(fname)
-                sheets = xls.sheet_names
-                if len(sheets) > 1:
-                    immediate_msg += "\n- Your Excel file contains more than one sheet. Please either save as csv or ensure all data is present in the first sheet. WARNING: Reading only the first sheet..."
-                df = pd.read_excel(fname, dtype=str).fillna("")
-            elif fname.lower().endswith(".txt"):
-                immediate_msg = "\n- Based on your file extension, you provided a text file"
-                necessary_msg = "Your format `TXT` will be converted to `JSONL`"
-                with open(fname, "r") as f:
-                    content = f.read()
-                    df = pd.DataFrame(
-                        [["", line] for line in content.split("\n")],
-                        columns=fields,
-                        dtype=str,
-                    ).fillna("")
-            elif fname.lower().endswith(".jsonl"):
-                df = pd.read_json(fname, lines=True, dtype=str).fillna("")  # type: ignore
-                if len(df) == 1:  # type: ignore
-                    # this is NOT what we expect for a .jsonl file
-                    immediate_msg = "\n- Your JSONL file appears to be in a JSON format. Your file will be converted to JSONL format"
-                    necessary_msg = "Your format `JSON` will be converted to `JSONL`"
-                    df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
-                else:
-                    pass  # this is what we expect for a .jsonl file
-            elif fname.lower().endswith(".json"):
-                try:
-                    # to handle case where .json file is actually a .jsonl file
-                    df = pd.read_json(fname, lines=True, dtype=str).fillna("")  # type: ignore
-                    if len(df) == 1:  # type: ignore
-                        # this code path corresponds to a .json file that has one line
-                        df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
-                    else:
-                        # this is NOT what we expect for a .json file
-                        immediate_msg = "\n- Your JSON file appears to be in a JSONL format. Your file will be converted to JSONL format"
-                        necessary_msg = "Your format `JSON` will be converted to `JSONL`"
-                except ValueError:
-                    # this code path corresponds to a .json file that has multiple lines (i.e. it is indented)
-                    df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
-            else:
-                error_msg = (
-                    "Your file must have one of the following extensions: .CSV, .TSV, .XLSX, .TXT, .JSON or .JSONL"
-                )
-                if "." in fname:
-                    error_msg += f" Your file `{fname}` ends with the extension `.{fname.split('.')[-1]}` which is not supported."
-                else:
-                    error_msg += f" Your file `{fname}` is missing a file extension."
-
-        except (ValueError, TypeError):
-            file_extension_str = fname.split(".")[-1].upper()
-            error_msg = f"Your file `{fname}` does not appear to be in valid {file_extension_str} format. Please ensure your file is formatted as a valid {file_extension_str} file."
-
-    else:
-        error_msg = f"File {fname} does not exist."
-
-    remediation = Remediation(
-        name="read_any_format",
-        necessary_msg=necessary_msg,
-        immediate_msg=immediate_msg,
-        error_msg=error_msg,
-    )
-    return df, remediation
-
-
-def format_inferrer_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will infer the likely fine-tuning format of the data, and display it to the user if it is classification.
-    It will also suggest to use ada and explain train/validation split benefits.
-    """
-    ft_type = infer_task_type(df)
-    immediate_msg = None
-    if ft_type == "classification":
-        immediate_msg = f"\n- Based on your data it seems like you're trying to fine-tune a model for {ft_type}\n- For classification, we recommend you try one of the faster and cheaper models, such as `ada`\n- For classification, you can estimate the expected model performance by keeping a held out dataset, which is not used for training"
-    return Remediation(name="num_examples", immediate_msg=immediate_msg)
-
-
-def apply_necessary_remediation(df: OptionalDataFrameT, remediation: Remediation) -> OptionalDataFrameT:
-    """
-    This function will apply a necessary remediation to a dataframe, or print an error message if one exists.
-    """
-    if remediation.error_msg is not None:
-        sys.stderr.write(f"\n\nERROR in {remediation.name} validator: {remediation.error_msg}\n\nAborting...")
-        sys.exit(1)
-    if remediation.immediate_msg is not None:
-        sys.stdout.write(remediation.immediate_msg)
-    if remediation.necessary_fn is not None:
-        df = remediation.necessary_fn(df)
-    return df
-
-
-def accept_suggestion(input_text: str, auto_accept: bool) -> bool:
-    sys.stdout.write(input_text)
-    if auto_accept:
-        sys.stdout.write("Y\n")
-        return True
-    return input().lower() != "n"
-
-
-def apply_optional_remediation(
-    df: pd.DataFrame, remediation: Remediation, auto_accept: bool
-) -> tuple[pd.DataFrame, bool]:
-    """
-    This function will apply an optional remediation to a dataframe, based on the user input.
-    """
-    optional_applied = False
-    input_text = f"- [Recommended] {remediation.optional_msg} [Y/n]: "
-    if remediation.optional_msg is not None:
-        if accept_suggestion(input_text, auto_accept):
-            assert remediation.optional_fn is not None
-            df = remediation.optional_fn(df)
-            optional_applied = True
-    if remediation.necessary_msg is not None:
-        sys.stdout.write(f"- [Necessary] {remediation.necessary_msg}\n")
-    return df, optional_applied
-
-
-def estimate_fine_tuning_time(df: pd.DataFrame) -> None:
-    """
-    Estimate the time it'll take to fine-tune the dataset
-    """
-    ft_format = infer_task_type(df)
-    expected_time = 1.0
-    if ft_format == "classification":
-        num_examples = len(df)
-        expected_time = num_examples * 1.44
-    else:
-        size = df.memory_usage(index=True).sum()
-        expected_time = size * 0.0515
-
-    def format_time(time: float) -> str:
-        if time < 60:
-            return f"{round(time, 2)} seconds"
-        elif time < 3600:
-            return f"{round(time / 60, 2)} minutes"
-        elif time < 86400:
-            return f"{round(time / 3600, 2)} hours"
-        else:
-            return f"{round(time / 86400, 2)} days"
-
-    time_string = format_time(expected_time + 140)
-    sys.stdout.write(
-        f"Once your model starts training, it'll approximately take {time_string} to train a `curie` model, and less for `ada` and `babbage`. Queue will approximately take half an hour per job ahead of you.\n"
-    )
-
-
-def get_outfnames(fname: str, split: bool) -> list[str]:
-    suffixes = ["_train", "_valid"] if split else [""]
-    i = 0
-    while True:
-        index_suffix = f" ({i})" if i > 0 else ""
-        candidate_fnames = [f"{os.path.splitext(fname)[0]}_prepared{suffix}{index_suffix}.jsonl" for suffix in suffixes]
-        if not any(os.path.isfile(f) for f in candidate_fnames):
-            return candidate_fnames
-        i += 1
-
-
-def get_classification_hyperparams(df: pd.DataFrame) -> tuple[int, object]:
-    n_classes = df.completion.nunique()
-    pos_class = None
-    if n_classes == 2:
-        pos_class = df.completion.value_counts().index[0]
-    return n_classes, pos_class
-
-
-def write_out_file(df: pd.DataFrame, fname: str, any_remediations: bool, auto_accept: bool) -> None:
-    """
-    This function will write out a dataframe to a file, if the user would like to proceed, and also offer a fine-tuning command with the newly created file.
-    For classification it will optionally ask the user if they would like to split the data into train/valid files, and modify the suggested command to include the valid set.
-    """
-    ft_format = infer_task_type(df)
-    common_prompt_suffix = get_common_xfix(df.prompt, xfix="suffix")
-    common_completion_suffix = get_common_xfix(df.completion, xfix="suffix")
-
-    split = False
-    input_text = "- [Recommended] Would you like to split into training and validation set? [Y/n]: "
-    if ft_format == "classification":
-        if accept_suggestion(input_text, auto_accept):
-            split = True
-
-    additional_params = ""
-    common_prompt_suffix_new_line_handled = common_prompt_suffix.replace("\n", "\\n")
-    common_completion_suffix_new_line_handled = common_completion_suffix.replace("\n", "\\n")
-    optional_ending_string = (
-        f' Make sure to include `stop=["{common_completion_suffix_new_line_handled}"]` so that the generated texts ends at the expected place.'
-        if len(common_completion_suffix_new_line_handled) > 0
-        else ""
-    )
-
-    input_text = "\n\nYour data will be written to a new JSONL file. Proceed [Y/n]: "
-
-    if not any_remediations and not split:
-        sys.stdout.write(
-            f'\nYou can use your file for fine-tuning:\n> openai api fine_tunes.create -t "{fname}"{additional_params}\n\nAfter you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `{common_prompt_suffix_new_line_handled}` for the model to start generating completions, rather than continuing with the prompt.{optional_ending_string}\n'
-        )
-        estimate_fine_tuning_time(df)
-
-    elif accept_suggestion(input_text, auto_accept):
-        fnames = get_outfnames(fname, split)
-        if split:
-            assert len(fnames) == 2 and "train" in fnames[0] and "valid" in fnames[1]
-            MAX_VALID_EXAMPLES = 1000
-            n_train = max(len(df) - MAX_VALID_EXAMPLES, int(len(df) * 0.8))
-            df_train = df.sample(n=n_train, random_state=42)
-            df_valid = df.drop(df_train.index)
-            df_train[["prompt", "completion"]].to_json(  # type: ignore
-                fnames[0], lines=True, orient="records", force_ascii=False, indent=None
-            )
-            df_valid[["prompt", "completion"]].to_json(
-                fnames[1], lines=True, orient="records", force_ascii=False, indent=None
-            )
-
-            n_classes, pos_class = get_classification_hyperparams(df)
-            additional_params += " --compute_classification_metrics"
-            if n_classes == 2:
-                additional_params += f' --classification_positive_class "{pos_class}"'
-            else:
-                additional_params += f" --classification_n_classes {n_classes}"
-        else:
-            assert len(fnames) == 1
-            df[["prompt", "completion"]].to_json(
-                fnames[0], lines=True, orient="records", force_ascii=False, indent=None
-            )
-
-        # Add -v VALID_FILE if we split the file into train / valid
-        files_string = ("s" if split else "") + " to `" + ("` and `".join(fnames))
-        valid_string = f' -v "{fnames[1]}"' if split else ""
-        separator_reminder = (
-            ""
-            if len(common_prompt_suffix_new_line_handled) == 0
-            else f"After you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `{common_prompt_suffix_new_line_handled}` for the model to start generating completions, rather than continuing with the prompt."
-        )
-        sys.stdout.write(
-            f'\nWrote modified file{files_string}`\nFeel free to take a look!\n\nNow use that file when fine-tuning:\n> openai api fine_tunes.create -t "{fnames[0]}"{valid_string}{additional_params}\n\n{separator_reminder}{optional_ending_string}\n'
-        )
-        estimate_fine_tuning_time(df)
-    else:
-        sys.stdout.write("Aborting... did not write the file\n")
-
-
-def infer_task_type(df: pd.DataFrame) -> str:
-    """
-    Infer the likely fine-tuning task type from the data
-    """
-    CLASSIFICATION_THRESHOLD = 3  # min_average instances of each class
-    if sum(df.prompt.str.len()) == 0:
-        return "open-ended generation"
-
-    if len(df.completion.unique()) < len(df) / CLASSIFICATION_THRESHOLD:
-        return "classification"
-
-    return "conditional generation"
-
-
-def get_common_xfix(series: Any, xfix: str = "suffix") -> str:
-    """
-    Finds the longest common suffix or prefix of all the values in a series
-    """
-    common_xfix = ""
-    while True:
-        common_xfixes = (
-            series.str[-(len(common_xfix) + 1) :] if xfix == "suffix" else series.str[: len(common_xfix) + 1]
-        )  # first few or last few characters
-        if common_xfixes.nunique() != 1:  # we found the character at which we don't have a unique xfix anymore
-            break
-        elif common_xfix == common_xfixes.values[0]:  # the entire first row is a prefix of every other row
-            break
-        else:  # the first or last few characters are still common across all rows - let's try to add one more
-            common_xfix = common_xfixes.values[0]
-    return common_xfix
-
-
-Validator: TypeAlias = "Callable[[pd.DataFrame], Remediation | None]"
-
-
-def get_validators() -> list[Validator]:
-    return [
-        num_examples_validator,
-        lambda x: necessary_column_validator(x, "prompt"),
-        lambda x: necessary_column_validator(x, "completion"),
-        additional_column_validator,
-        non_empty_field_validator,
-        format_inferrer_validator,
-        duplicated_rows_validator,
-        long_examples_validator,
-        lambda x: lower_case_validator(x, "prompt"),
-        lambda x: lower_case_validator(x, "completion"),
-        common_prompt_suffix_validator,
-        common_prompt_prefix_validator,
-        common_completion_prefix_validator,
-        common_completion_suffix_validator,
-        completions_space_start_validator,
-    ]
-
-
-def apply_validators(
-    df: pd.DataFrame,
-    fname: str,
-    remediation: Remediation | None,
-    validators: list[Validator],
-    auto_accept: bool,
-    write_out_file_func: Callable[..., Any],
-) -> None:
-    optional_remediations: list[Remediation] = []
-    if remediation is not None:
-        optional_remediations.append(remediation)
-    for validator in validators:
-        remediation = validator(df)
-        if remediation is not None:
-            optional_remediations.append(remediation)
-            df = apply_necessary_remediation(df, remediation)
-
-    any_optional_or_necessary_remediations = any(
-        [
-            remediation
-            for remediation in optional_remediations
-            if remediation.optional_msg is not None or remediation.necessary_msg is not None
-        ]
-    )
-    any_necessary_applied = any(
-        [remediation for remediation in optional_remediations if remediation.necessary_msg is not None]
-    )
-    any_optional_applied = False
-
-    if any_optional_or_necessary_remediations:
-        sys.stdout.write("\n\nBased on the analysis we will perform the following actions:\n")
-        for remediation in optional_remediations:
-            df, optional_applied = apply_optional_remediation(df, remediation, auto_accept)
-            any_optional_applied = any_optional_applied or optional_applied
-    else:
-        sys.stdout.write("\n\nNo remediations found.\n")
-
-    any_optional_or_necessary_applied = any_optional_applied or any_necessary_applied
-
-    write_out_file_func(df, fname, any_optional_or_necessary_applied, auto_accept)
diff --git a/src/openai/lib/azure.py b/src/openai/lib/azure.py
deleted file mode 100644
index b76b83c61c..0000000000
--- a/src/openai/lib/azure.py
+++ /dev/null
@@ -1,542 +0,0 @@
-from __future__ import annotations
-
-import os
-import inspect
-from typing import Any, Union, Mapping, TypeVar, Callable, Awaitable, overload
-from typing_extensions import Self, override
-
-import httpx
-
-from .._types import NOT_GIVEN, Omit, Timeout, NotGiven
-from .._utils import is_given, is_mapping
-from .._client import OpenAI, AsyncOpenAI
-from .._models import FinalRequestOptions
-from .._streaming import Stream, AsyncStream
-from .._exceptions import OpenAIError
-from .._base_client import DEFAULT_MAX_RETRIES, BaseClient
-
-_deployments_endpoints = set(
-    [
-        "/completions",
-        "/chat/completions",
-        "/embeddings",
-        "/audio/transcriptions",
-        "/audio/translations",
-        "/audio/speech",
-        "/images/generations",
-    ]
-)
-
-
-AzureADTokenProvider = Callable[[], str]
-AsyncAzureADTokenProvider = Callable[[], "str | Awaitable[str]"]
-_HttpxClientT = TypeVar("_HttpxClientT", bound=Union[httpx.Client, httpx.AsyncClient])
-_DefaultStreamT = TypeVar("_DefaultStreamT", bound=Union[Stream[Any], AsyncStream[Any]])
-
-
-# we need to use a sentinel API key value for Azure AD
-# as we don't want to make the `api_key` in the main client Optional
-# and Azure AD tokens may be retrieved on a per-request basis
-API_KEY_SENTINEL = "".join(["<", "missing API key", ">"])
-
-
-class MutuallyExclusiveAuthError(OpenAIError):
-    def __init__(self) -> None:
-        super().__init__(
-            "The `api_key`, `azure_ad_token` and `azure_ad_token_provider` arguments are mutually exclusive; Only one can be passed at a time"
-        )
-
-
-class BaseAzureClient(BaseClient[_HttpxClientT, _DefaultStreamT]):
-    @override
-    def _build_request(
-        self,
-        options: FinalRequestOptions,
-    ) -> httpx.Request:
-        if options.url in _deployments_endpoints and is_mapping(options.json_data):
-            model = options.json_data.get("model")
-            if model is not None and not "/deployments" in str(self.base_url):
-                options.url = f"/deployments/{model}{options.url}"
-
-        return super()._build_request(options)
-
-
-class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_endpoint: str,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        base_url: str,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    def __init__(
-        self,
-        *,
-        api_version: str | None = None,
-        azure_endpoint: str | None = None,
-        azure_deployment: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        base_url: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        """Construct a new synchronous azure openai client instance.
-
-        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
-        - `api_key` from `AZURE_OPENAI_API_KEY`
-        - `organization` from `OPENAI_ORG_ID`
-        - `project` from `OPENAI_PROJECT_ID`
-        - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
-        - `api_version` from `OPENAI_API_VERSION`
-        - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
-
-        Args:
-            azure_endpoint: Your Azure endpoint, including the resource, e.g. `https://example-resource.azure.openai.com/`
-
-            azure_ad_token: Your Azure Active Directory token, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id
-
-            azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
-
-            azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
-                Note: this means you won't be able to use non-deployment endpoints. Not supported with Assistants APIs.
-        """
-        if api_key is None:
-            api_key = os.environ.get("AZURE_OPENAI_API_KEY")
-
-        if azure_ad_token is None:
-            azure_ad_token = os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-        if api_key is None and azure_ad_token is None and azure_ad_token_provider is None:
-            raise OpenAIError(
-                "Missing credentials. Please pass one of `api_key`, `azure_ad_token`, `azure_ad_token_provider`, or the `AZURE_OPENAI_API_KEY` or `AZURE_OPENAI_AD_TOKEN` environment variables."
-            )
-
-        if api_version is None:
-            api_version = os.environ.get("OPENAI_API_VERSION")
-
-        if api_version is None:
-            raise ValueError(
-                "Must provide either the `api_version` argument or the `OPENAI_API_VERSION` environment variable"
-            )
-
-        if default_query is None:
-            default_query = {"api-version": api_version}
-        else:
-            default_query = {**default_query, "api-version": api_version}
-
-        if base_url is None:
-            if azure_endpoint is None:
-                azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-            if azure_endpoint is None:
-                raise ValueError(
-                    "Must provide one of the `base_url` or `azure_endpoint` arguments, or the `AZURE_OPENAI_ENDPOINT` environment variable"
-                )
-
-            if azure_deployment is not None:
-                base_url = f"{azure_endpoint}/openai/deployments/{azure_deployment}"
-            else:
-                base_url = f"{azure_endpoint}/openai"
-        else:
-            if azure_endpoint is not None:
-                raise ValueError("base_url and azure_endpoint are mutually exclusive")
-
-        if api_key is None:
-            # define a sentinel value to avoid any typing issues
-            api_key = API_KEY_SENTINEL
-
-        super().__init__(
-            api_key=api_key,
-            organization=organization,
-            project=project,
-            base_url=base_url,
-            timeout=timeout,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            default_query=default_query,
-            http_client=http_client,
-            _strict_response_validation=_strict_response_validation,
-        )
-        self._api_version = api_version
-        self._azure_ad_token = azure_ad_token
-        self._azure_ad_token_provider = azure_ad_token_provider
-
-    @override
-    def copy(
-        self,
-        *,
-        api_key: str | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        api_version: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        http_client: httpx.Client | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: Mapping[str, str] | None = None,
-        set_default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        set_default_query: Mapping[str, object] | None = None,
-        _extra_kwargs: Mapping[str, Any] = {},
-    ) -> Self:
-        """
-        Create a new client instance re-using the same options given to the current client with optional overriding.
-        """
-        return super().copy(
-            api_key=api_key,
-            organization=organization,
-            project=project,
-            base_url=base_url,
-            timeout=timeout,
-            http_client=http_client,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            set_default_headers=set_default_headers,
-            default_query=default_query,
-            set_default_query=set_default_query,
-            _extra_kwargs={
-                "api_version": api_version or self._api_version,
-                "azure_ad_token": azure_ad_token or self._azure_ad_token,
-                "azure_ad_token_provider": azure_ad_token_provider or self._azure_ad_token_provider,
-                **_extra_kwargs,
-            },
-        )
-
-    with_options = copy
-
-    def _get_azure_ad_token(self) -> str | None:
-        if self._azure_ad_token is not None:
-            return self._azure_ad_token
-
-        provider = self._azure_ad_token_provider
-        if provider is not None:
-            token = provider()
-            if not token or not isinstance(token, str):  # pyright: ignore[reportUnnecessaryIsInstance]
-                raise ValueError(
-                    f"Expected `azure_ad_token_provider` argument to return a string but it returned {token}",
-                )
-            return token
-
-        return None
-
-    @override
-    def _prepare_options(self, options: FinalRequestOptions) -> None:
-        headers: dict[str, str | Omit] = {**options.headers} if is_given(options.headers) else {}
-        options.headers = headers
-
-        azure_ad_token = self._get_azure_ad_token()
-        if azure_ad_token is not None:
-            if headers.get("Authorization") is None:
-                headers["Authorization"] = f"Bearer {azure_ad_token}"
-        elif self.api_key is not API_KEY_SENTINEL:
-            if headers.get("api-key") is None:
-                headers["api-key"] = self.api_key
-        else:
-            # should never be hit
-            raise ValueError("Unable to handle auth")
-
-        return super()._prepare_options(options)
-
-
-class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], AsyncOpenAI):
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_endpoint: str,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        base_url: str,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    def __init__(
-        self,
-        *,
-        azure_endpoint: str | None = None,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        base_url: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        """Construct a new asynchronous azure openai client instance.
-
-        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
-        - `api_key` from `AZURE_OPENAI_API_KEY`
-        - `organization` from `OPENAI_ORG_ID`
-        - `project` from `OPENAI_PROJECT_ID`
-        - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
-        - `api_version` from `OPENAI_API_VERSION`
-        - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
-
-        Args:
-            azure_endpoint: Your Azure endpoint, including the resource, e.g. `https://example-resource.azure.openai.com/`
-
-            azure_ad_token: Your Azure Active Directory token, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id
-
-            azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
-
-            azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
-                Note: this means you won't be able to use non-deployment endpoints. Not supported with Assistants APIs.
-        """
-        if api_key is None:
-            api_key = os.environ.get("AZURE_OPENAI_API_KEY")
-
-        if azure_ad_token is None:
-            azure_ad_token = os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-        if api_key is None and azure_ad_token is None and azure_ad_token_provider is None:
-            raise OpenAIError(
-                "Missing credentials. Please pass one of `api_key`, `azure_ad_token`, `azure_ad_token_provider`, or the `AZURE_OPENAI_API_KEY` or `AZURE_OPENAI_AD_TOKEN` environment variables."
-            )
-
-        if api_version is None:
-            api_version = os.environ.get("OPENAI_API_VERSION")
-
-        if api_version is None:
-            raise ValueError(
-                "Must provide either the `api_version` argument or the `OPENAI_API_VERSION` environment variable"
-            )
-
-        if default_query is None:
-            default_query = {"api-version": api_version}
-        else:
-            default_query = {**default_query, "api-version": api_version}
-
-        if base_url is None:
-            if azure_endpoint is None:
-                azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-            if azure_endpoint is None:
-                raise ValueError(
-                    "Must provide one of the `base_url` or `azure_endpoint` arguments, or the `AZURE_OPENAI_ENDPOINT` environment variable"
-                )
-
-            if azure_deployment is not None:
-                base_url = f"{azure_endpoint}/openai/deployments/{azure_deployment}"
-            else:
-                base_url = f"{azure_endpoint}/openai"
-        else:
-            if azure_endpoint is not None:
-                raise ValueError("base_url and azure_endpoint are mutually exclusive")
-
-        if api_key is None:
-            # define a sentinel value to avoid any typing issues
-            api_key = API_KEY_SENTINEL
-
-        super().__init__(
-            api_key=api_key,
-            organization=organization,
-            project=project,
-            base_url=base_url,
-            timeout=timeout,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            default_query=default_query,
-            http_client=http_client,
-            _strict_response_validation=_strict_response_validation,
-        )
-        self._api_version = api_version
-        self._azure_ad_token = azure_ad_token
-        self._azure_ad_token_provider = azure_ad_token_provider
-
-    @override
-    def copy(
-        self,
-        *,
-        api_key: str | None = None,
-        organization: str | None = None,
-        project: str | None = None,
-        api_version: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        http_client: httpx.AsyncClient | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: Mapping[str, str] | None = None,
-        set_default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        set_default_query: Mapping[str, object] | None = None,
-        _extra_kwargs: Mapping[str, Any] = {},
-    ) -> Self:
-        """
-        Create a new client instance re-using the same options given to the current client with optional overriding.
-        """
-        return super().copy(
-            api_key=api_key,
-            organization=organization,
-            project=project,
-            base_url=base_url,
-            timeout=timeout,
-            http_client=http_client,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            set_default_headers=set_default_headers,
-            default_query=default_query,
-            set_default_query=set_default_query,
-            _extra_kwargs={
-                "api_version": api_version or self._api_version,
-                "azure_ad_token": azure_ad_token or self._azure_ad_token,
-                "azure_ad_token_provider": azure_ad_token_provider or self._azure_ad_token_provider,
-                **_extra_kwargs,
-            },
-        )
-
-    with_options = copy
-
-    async def _get_azure_ad_token(self) -> str | None:
-        if self._azure_ad_token is not None:
-            return self._azure_ad_token
-
-        provider = self._azure_ad_token_provider
-        if provider is not None:
-            token = provider()
-            if inspect.isawaitable(token):
-                token = await token
-            if not token or not isinstance(token, str):
-                raise ValueError(
-                    f"Expected `azure_ad_token_provider` argument to return a string but it returned {token}",
-                )
-            return token
-
-        return None
-
-    @override
-    async def _prepare_options(self, options: FinalRequestOptions) -> None:
-        headers: dict[str, str | Omit] = {**options.headers} if is_given(options.headers) else {}
-        options.headers = headers
-
-        azure_ad_token = await self._get_azure_ad_token()
-        if azure_ad_token is not None:
-            if headers.get("Authorization") is None:
-                headers["Authorization"] = f"Bearer {azure_ad_token}"
-        elif self.api_key is not API_KEY_SENTINEL:
-            if headers.get("api-key") is None:
-                headers["api-key"] = self.api_key
-        else:
-            # should never be hit
-            raise ValueError("Unable to handle auth")
-
-        return await super()._prepare_options(options)
diff --git a/src/openai/lib/streaming/__init__.py b/src/openai/lib/streaming/__init__.py
deleted file mode 100644
index eb378d2561..0000000000
--- a/src/openai/lib/streaming/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from ._assistants import (
-    AssistantEventHandler as AssistantEventHandler,
-    AssistantEventHandlerT as AssistantEventHandlerT,
-    AssistantStreamManager as AssistantStreamManager,
-    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
-    AsyncAssistantEventHandlerT as AsyncAssistantEventHandlerT,
-    AsyncAssistantStreamManager as AsyncAssistantStreamManager,
-)
diff --git a/src/openai/lib/streaming/_assistants.py b/src/openai/lib/streaming/_assistants.py
deleted file mode 100644
index 03d97ec2eb..0000000000
--- a/src/openai/lib/streaming/_assistants.py
+++ /dev/null
@@ -1,1035 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-from types import TracebackType
-from typing import TYPE_CHECKING, Any, Generic, TypeVar, Callable, Iterable, Iterator, cast
-from typing_extensions import Awaitable, AsyncIterable, AsyncIterator, assert_never
-
-import httpx
-
-from ..._utils import is_dict, is_list, consume_sync_iterator, consume_async_iterator
-from ..._models import construct_type
-from ..._streaming import Stream, AsyncStream
-from ...types.beta import AssistantStreamEvent
-from ...types.beta.threads import (
-    Run,
-    Text,
-    Message,
-    ImageFile,
-    TextDelta,
-    MessageDelta,
-    MessageContent,
-    MessageContentDelta,
-)
-from ...types.beta.threads.runs import RunStep, ToolCall, RunStepDelta, ToolCallDelta
-
-
-class AssistantEventHandler:
-    text_deltas: Iterable[str]
-    """Iterator over just the text deltas in the stream.
-
-    This corresponds to the `thread.message.delta` event
-    in the API.
-
-    ```py
-    for text in stream.text_deltas:
-        print(text, end="", flush=True)
-    print()
-    ```
-    """
-
-    def __init__(self) -> None:
-        self._current_event: AssistantStreamEvent | None = None
-        self._current_message_content_index: int | None = None
-        self._current_message_content: MessageContent | None = None
-        self._current_tool_call_index: int | None = None
-        self._current_tool_call: ToolCall | None = None
-        self.__current_run_step_id: str | None = None
-        self.__current_run: Run | None = None
-        self.__run_step_snapshots: dict[str, RunStep] = {}
-        self.__message_snapshots: dict[str, Message] = {}
-        self.__current_message_snapshot: Message | None = None
-
-        self.text_deltas = self.__text_deltas__()
-        self._iterator = self.__stream__()
-        self.__stream: Stream[AssistantStreamEvent] | None = None
-
-    def _init(self, stream: Stream[AssistantStreamEvent]) -> None:
-        if self.__stream:
-            raise RuntimeError(
-                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
-            )
-
-        self.__stream = stream
-
-    def __next__(self) -> AssistantStreamEvent:
-        return self._iterator.__next__()
-
-    def __iter__(self) -> Iterator[AssistantStreamEvent]:
-        for item in self._iterator:
-            yield item
-
-    @property
-    def current_event(self) -> AssistantStreamEvent | None:
-        return self._current_event
-
-    @property
-    def current_run(self) -> Run | None:
-        return self.__current_run
-
-    @property
-    def current_run_step_snapshot(self) -> RunStep | None:
-        if not self.__current_run_step_id:
-            return None
-
-        return self.__run_step_snapshots[self.__current_run_step_id]
-
-    @property
-    def current_message_snapshot(self) -> Message | None:
-        return self.__current_message_snapshot
-
-    def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called when the context manager exits.
-        """
-        if self.__stream:
-            self.__stream.close()
-
-    def until_done(self) -> None:
-        """Waits until the stream has been consumed"""
-        consume_sync_iterator(self)
-
-    def get_final_run(self) -> Run:
-        """Wait for the stream to finish and returns the completed Run object"""
-        self.until_done()
-
-        if not self.__current_run:
-            raise RuntimeError("No final run object found")
-
-        return self.__current_run
-
-    def get_final_run_steps(self) -> list[RunStep]:
-        """Wait for the stream to finish and returns the steps taken in this run"""
-        self.until_done()
-
-        if not self.__run_step_snapshots:
-            raise RuntimeError("No run steps found")
-
-        return [step for step in self.__run_step_snapshots.values()]
-
-    def get_final_messages(self) -> list[Message]:
-        """Wait for the stream to finish and returns the messages emitted in this run"""
-        self.until_done()
-
-        if not self.__message_snapshots:
-            raise RuntimeError("No messages found")
-
-        return [message for message in self.__message_snapshots.values()]
-
-    def __text_deltas__(self) -> Iterator[str]:
-        for event in self:
-            if event.event != "thread.message.delta":
-                continue
-
-            for content_delta in event.data.delta.content or []:
-                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
-                    yield content_delta.text.value
-
-    # event handlers
-
-    def on_end(self) -> None:
-        """Fires when the stream has finished.
-
-        This happens if the stream is read to completion
-        or if an exception occurs during iteration.
-        """
-
-    def on_event(self, event: AssistantStreamEvent) -> None:
-        """Callback that is fired for every Server-Sent-Event"""
-
-    def on_run_step_created(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is created"""
-
-    def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
-        """Callback that is fired whenever a run step delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the run step. For example, a tool calls event may
-        look like this:
-
-        # delta
-        tool_calls=[
-            RunStepDeltaToolCallsCodeInterpreter(
-                index=0,
-                type='code_interpreter',
-                id=None,
-                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
-            )
-        ]
-        # snapshot
-        tool_calls=[
-            CodeToolCall(
-                id='call_wKayJlcYV12NiadiZuJXxcfx',
-                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
-                type='code_interpreter',
-                index=0
-            )
-        ],
-        """
-
-    def on_run_step_done(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is completed"""
-
-    def on_tool_call_created(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call is created"""
-
-    def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    def on_tool_call_done(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    def on_exception(self, exception: Exception) -> None:
-        """Fired whenever an exception happens during streaming"""
-
-    def on_timeout(self) -> None:
-        """Fires if the request times out"""
-
-    def on_message_created(self, message: Message) -> None:
-        """Callback that is fired when a message is created"""
-
-    def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
-        """Callback that is fired whenever a message delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the message. For example, a text content event may
-        look like this:
-
-        # delta
-        MessageDeltaText(
-            index=0,
-            type='text',
-            text=Text(
-                value=' Jane'
-            ),
-        )
-        # snapshot
-        MessageContentText(
-            index=0,
-            type='text',
-            text=Text(
-                value='Certainly, Jane'
-            ),
-        )
-        """
-
-    def on_message_done(self, message: Message) -> None:
-        """Callback that is fired when a message is completed"""
-
-    def on_text_created(self, text: Text) -> None:
-        """Callback that is fired when a text content block is created"""
-
-    def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
-        """Callback that is fired whenever a text content delta is returned
-        by the API.
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the text. For example:
-
-        on_text_delta(TextDelta(value="The"), Text(value="The")),
-        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
-        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
-        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
-        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
-        """
-
-    def on_text_done(self, text: Text) -> None:
-        """Callback that is fired when a text content block is finished"""
-
-    def on_image_file_done(self, image_file: ImageFile) -> None:
-        """Callback that is fired when an image file block is finished"""
-
-    def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
-        self._current_event = event
-        self.on_event(event)
-
-        self.__current_message_snapshot, new_content = accumulate_event(
-            event=event,
-            current_message_snapshot=self.__current_message_snapshot,
-        )
-        if self.__current_message_snapshot is not None:
-            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
-
-        accumulate_run_step(
-            event=event,
-            run_step_snapshots=self.__run_step_snapshots,
-        )
-
-        for content_delta in new_content:
-            assert self.__current_message_snapshot is not None
-
-            block = self.__current_message_snapshot.content[content_delta.index]
-            if block.type == "text":
-                self.on_text_created(block.text)
-
-        if (
-            event.event == "thread.run.completed"
-            or event.event == "thread.run.cancelled"
-            or event.event == "thread.run.expired"
-            or event.event == "thread.run.failed"
-            or event.event == "thread.run.requires_action"
-        ):
-            self.__current_run = event.data
-            if self._current_tool_call:
-                self.on_tool_call_done(self._current_tool_call)
-        elif (
-            event.event == "thread.run.created"
-            or event.event == "thread.run.in_progress"
-            or event.event == "thread.run.cancelling"
-            or event.event == "thread.run.queued"
-        ):
-            self.__current_run = event.data
-        elif event.event == "thread.message.created":
-            self.on_message_created(event.data)
-        elif event.event == "thread.message.delta":
-            snapshot = self.__current_message_snapshot
-            assert snapshot is not None
-
-            message_delta = event.data.delta
-            if message_delta.content is not None:
-                for content_delta in message_delta.content:
-                    if content_delta.type == "text" and content_delta.text:
-                        snapshot_content = snapshot.content[content_delta.index]
-                        assert snapshot_content.type == "text"
-                        self.on_text_delta(content_delta.text, snapshot_content.text)
-
-                    # If the delta is for a new message content:
-                    # - emit on_text_done/on_image_file_done for the previous message content
-                    # - emit on_text_created/on_image_created for the new message content
-                    if content_delta.index != self._current_message_content_index:
-                        if self._current_message_content is not None:
-                            if self._current_message_content.type == "text":
-                                self.on_text_done(self._current_message_content.text)
-                            elif self._current_message_content.type == "image_file":
-                                self.on_image_file_done(self._current_message_content.image_file)
-
-                        self._current_message_content_index = content_delta.index
-                        self._current_message_content = snapshot.content[content_delta.index]
-
-                    # Update the current_message_content (delta event is correctly emitted already)
-                    self._current_message_content = snapshot.content[content_delta.index]
-
-            self.on_message_delta(event.data.delta, snapshot)
-        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
-            self.__current_message_snapshot = event.data
-            self.__message_snapshots[event.data.id] = event.data
-
-            if self._current_message_content_index is not None:
-                content = event.data.content[self._current_message_content_index]
-                if content.type == "text":
-                    self.on_text_done(content.text)
-                elif content.type == "image_file":
-                    self.on_image_file_done(content.image_file)
-
-            self.on_message_done(event.data)
-        elif event.event == "thread.run.step.created":
-            self.__current_run_step_id = event.data.id
-            self.on_run_step_created(event.data)
-        elif event.event == "thread.run.step.in_progress":
-            self.__current_run_step_id = event.data.id
-        elif event.event == "thread.run.step.delta":
-            step_snapshot = self.__run_step_snapshots[event.data.id]
-
-            run_step_delta = event.data.delta
-            if (
-                run_step_delta.step_details
-                and run_step_delta.step_details.type == "tool_calls"
-                and run_step_delta.step_details.tool_calls is not None
-            ):
-                assert step_snapshot.step_details.type == "tool_calls"
-                for tool_call_delta in run_step_delta.step_details.tool_calls:
-                    if tool_call_delta.index == self._current_tool_call_index:
-                        self.on_tool_call_delta(
-                            tool_call_delta,
-                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
-                        )
-
-                    # If the delta is for a new tool call:
-                    # - emit on_tool_call_done for the previous tool_call
-                    # - emit on_tool_call_created for the new tool_call
-                    if tool_call_delta.index != self._current_tool_call_index:
-                        if self._current_tool_call is not None:
-                            self.on_tool_call_done(self._current_tool_call)
-
-                        self._current_tool_call_index = tool_call_delta.index
-                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-                        self.on_tool_call_created(self._current_tool_call)
-
-                    # Update the current_tool_call (delta event is correctly emitted already)
-                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-
-            self.on_run_step_delta(
-                event.data.delta,
-                step_snapshot,
-            )
-        elif (
-            event.event == "thread.run.step.completed"
-            or event.event == "thread.run.step.cancelled"
-            or event.event == "thread.run.step.expired"
-            or event.event == "thread.run.step.failed"
-        ):
-            if self._current_tool_call:
-                self.on_tool_call_done(self._current_tool_call)
-
-            self.on_run_step_done(event.data)
-            self.__current_run_step_id = None
-        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
-            # currently no special handling
-            ...
-        else:
-            # we only want to error at build-time
-            if TYPE_CHECKING:  # type: ignore[unreachable]
-                assert_never(event)
-
-        self._current_event = None
-
-    def __stream__(self) -> Iterator[AssistantStreamEvent]:
-        stream = self.__stream
-        if not stream:
-            raise RuntimeError("Stream has not been started yet")
-
-        try:
-            for event in stream:
-                self._emit_sse_event(event)
-
-                yield event
-        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
-            self.on_timeout()
-            self.on_exception(exc)
-            raise
-        except Exception as exc:
-            self.on_exception(exc)
-            raise
-        finally:
-            self.on_end()
-
-
-AssistantEventHandlerT = TypeVar("AssistantEventHandlerT", bound=AssistantEventHandler)
-
-
-class AssistantStreamManager(Generic[AssistantEventHandlerT]):
-    """Wrapper over AssistantStreamEventHandler that is returned by `.stream()`
-    so that a context manager can be used.
-
-    ```py
-    with client.threads.create_and_run_stream(...) as stream:
-        for event in stream:
-            ...
-    ```
-    """
-
-    def __init__(
-        self,
-        api_request: Callable[[], Stream[AssistantStreamEvent]],
-        *,
-        event_handler: AssistantEventHandlerT,
-    ) -> None:
-        self.__stream: Stream[AssistantStreamEvent] | None = None
-        self.__event_handler = event_handler
-        self.__api_request = api_request
-
-    def __enter__(self) -> AssistantEventHandlerT:
-        self.__stream = self.__api_request()
-        self.__event_handler._init(self.__stream)
-        return self.__event_handler
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__stream is not None:
-            self.__stream.close()
-
-
-class AsyncAssistantEventHandler:
-    text_deltas: AsyncIterable[str]
-    """Iterator over just the text deltas in the stream.
-
-    This corresponds to the `thread.message.delta` event
-    in the API.
-
-    ```py
-    async for text in stream.text_deltas:
-        print(text, end="", flush=True)
-    print()
-    ```
-    """
-
-    def __init__(self) -> None:
-        self._current_event: AssistantStreamEvent | None = None
-        self._current_message_content_index: int | None = None
-        self._current_message_content: MessageContent | None = None
-        self._current_tool_call_index: int | None = None
-        self._current_tool_call: ToolCall | None = None
-        self.__current_run_step_id: str | None = None
-        self.__current_run: Run | None = None
-        self.__run_step_snapshots: dict[str, RunStep] = {}
-        self.__message_snapshots: dict[str, Message] = {}
-        self.__current_message_snapshot: Message | None = None
-
-        self.text_deltas = self.__text_deltas__()
-        self._iterator = self.__stream__()
-        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
-
-    def _init(self, stream: AsyncStream[AssistantStreamEvent]) -> None:
-        if self.__stream:
-            raise RuntimeError(
-                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
-            )
-
-        self.__stream = stream
-
-    async def __anext__(self) -> AssistantStreamEvent:
-        return await self._iterator.__anext__()
-
-    async def __aiter__(self) -> AsyncIterator[AssistantStreamEvent]:
-        async for item in self._iterator:
-            yield item
-
-    async def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called when the context manager exits.
-        """
-        if self.__stream:
-            await self.__stream.close()
-
-    @property
-    def current_event(self) -> AssistantStreamEvent | None:
-        return self._current_event
-
-    @property
-    def current_run(self) -> Run | None:
-        return self.__current_run
-
-    @property
-    def current_run_step_snapshot(self) -> RunStep | None:
-        if not self.__current_run_step_id:
-            return None
-
-        return self.__run_step_snapshots[self.__current_run_step_id]
-
-    @property
-    def current_message_snapshot(self) -> Message | None:
-        return self.__current_message_snapshot
-
-    async def until_done(self) -> None:
-        """Waits until the stream has been consumed"""
-        await consume_async_iterator(self)
-
-    async def get_final_run(self) -> Run:
-        """Wait for the stream to finish and returns the completed Run object"""
-        await self.until_done()
-
-        if not self.__current_run:
-            raise RuntimeError("No final run object found")
-
-        return self.__current_run
-
-    async def get_final_run_steps(self) -> list[RunStep]:
-        """Wait for the stream to finish and returns the steps taken in this run"""
-        await self.until_done()
-
-        if not self.__run_step_snapshots:
-            raise RuntimeError("No run steps found")
-
-        return [step for step in self.__run_step_snapshots.values()]
-
-    async def get_final_messages(self) -> list[Message]:
-        """Wait for the stream to finish and returns the messages emitted in this run"""
-        await self.until_done()
-
-        if not self.__message_snapshots:
-            raise RuntimeError("No messages found")
-
-        return [message for message in self.__message_snapshots.values()]
-
-    async def __text_deltas__(self) -> AsyncIterator[str]:
-        async for event in self:
-            if event.event != "thread.message.delta":
-                continue
-
-            for content_delta in event.data.delta.content or []:
-                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
-                    yield content_delta.text.value
-
-    # event handlers
-
-    async def on_end(self) -> None:
-        """Fires when the stream has finished.
-
-        This happens if the stream is read to completion
-        or if an exception occurs during iteration.
-        """
-
-    async def on_event(self, event: AssistantStreamEvent) -> None:
-        """Callback that is fired for every Server-Sent-Event"""
-
-    async def on_run_step_created(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is created"""
-
-    async def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
-        """Callback that is fired whenever a run step delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the run step. For example, a tool calls event may
-        look like this:
-
-        # delta
-        tool_calls=[
-            RunStepDeltaToolCallsCodeInterpreter(
-                index=0,
-                type='code_interpreter',
-                id=None,
-                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
-            )
-        ]
-        # snapshot
-        tool_calls=[
-            CodeToolCall(
-                id='call_wKayJlcYV12NiadiZuJXxcfx',
-                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
-                type='code_interpreter',
-                index=0
-            )
-        ],
-        """
-
-    async def on_run_step_done(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is completed"""
-
-    async def on_tool_call_created(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call is created"""
-
-    async def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    async def on_tool_call_done(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    async def on_exception(self, exception: Exception) -> None:
-        """Fired whenever an exception happens during streaming"""
-
-    async def on_timeout(self) -> None:
-        """Fires if the request times out"""
-
-    async def on_message_created(self, message: Message) -> None:
-        """Callback that is fired when a message is created"""
-
-    async def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
-        """Callback that is fired whenever a message delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the message. For example, a text content event may
-        look like this:
-
-        # delta
-        MessageDeltaText(
-            index=0,
-            type='text',
-            text=Text(
-                value=' Jane'
-            ),
-        )
-        # snapshot
-        MessageContentText(
-            index=0,
-            type='text',
-            text=Text(
-                value='Certainly, Jane'
-            ),
-        )
-        """
-
-    async def on_message_done(self, message: Message) -> None:
-        """Callback that is fired when a message is completed"""
-
-    async def on_text_created(self, text: Text) -> None:
-        """Callback that is fired when a text content block is created"""
-
-    async def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
-        """Callback that is fired whenever a text content delta is returned
-        by the API.
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the text. For example:
-
-        on_text_delta(TextDelta(value="The"), Text(value="The")),
-        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
-        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
-        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
-        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
-        """
-
-    async def on_text_done(self, text: Text) -> None:
-        """Callback that is fired when a text content block is finished"""
-
-    async def on_image_file_done(self, image_file: ImageFile) -> None:
-        """Callback that is fired when an image file block is finished"""
-
-    async def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
-        self._current_event = event
-        await self.on_event(event)
-
-        self.__current_message_snapshot, new_content = accumulate_event(
-            event=event,
-            current_message_snapshot=self.__current_message_snapshot,
-        )
-        if self.__current_message_snapshot is not None:
-            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
-
-        accumulate_run_step(
-            event=event,
-            run_step_snapshots=self.__run_step_snapshots,
-        )
-
-        for content_delta in new_content:
-            assert self.__current_message_snapshot is not None
-
-            block = self.__current_message_snapshot.content[content_delta.index]
-            if block.type == "text":
-                await self.on_text_created(block.text)
-
-        if (
-            event.event == "thread.run.completed"
-            or event.event == "thread.run.cancelled"
-            or event.event == "thread.run.expired"
-            or event.event == "thread.run.failed"
-            or event.event == "thread.run.requires_action"
-        ):
-            self.__current_run = event.data
-            if self._current_tool_call:
-                await self.on_tool_call_done(self._current_tool_call)
-        elif (
-            event.event == "thread.run.created"
-            or event.event == "thread.run.in_progress"
-            or event.event == "thread.run.cancelling"
-            or event.event == "thread.run.queued"
-        ):
-            self.__current_run = event.data
-        elif event.event == "thread.message.created":
-            await self.on_message_created(event.data)
-        elif event.event == "thread.message.delta":
-            snapshot = self.__current_message_snapshot
-            assert snapshot is not None
-
-            message_delta = event.data.delta
-            if message_delta.content is not None:
-                for content_delta in message_delta.content:
-                    if content_delta.type == "text" and content_delta.text:
-                        snapshot_content = snapshot.content[content_delta.index]
-                        assert snapshot_content.type == "text"
-                        await self.on_text_delta(content_delta.text, snapshot_content.text)
-
-                    # If the delta is for a new message content:
-                    # - emit on_text_done/on_image_file_done for the previous message content
-                    # - emit on_text_created/on_image_created for the new message content
-                    if content_delta.index != self._current_message_content_index:
-                        if self._current_message_content is not None:
-                            if self._current_message_content.type == "text":
-                                await self.on_text_done(self._current_message_content.text)
-                            elif self._current_message_content.type == "image_file":
-                                await self.on_image_file_done(self._current_message_content.image_file)
-
-                        self._current_message_content_index = content_delta.index
-                        self._current_message_content = snapshot.content[content_delta.index]
-
-                    # Update the current_message_content (delta event is correctly emitted already)
-                    self._current_message_content = snapshot.content[content_delta.index]
-
-            await self.on_message_delta(event.data.delta, snapshot)
-        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
-            self.__current_message_snapshot = event.data
-            self.__message_snapshots[event.data.id] = event.data
-
-            if self._current_message_content_index is not None:
-                content = event.data.content[self._current_message_content_index]
-                if content.type == "text":
-                    await self.on_text_done(content.text)
-                elif content.type == "image_file":
-                    await self.on_image_file_done(content.image_file)
-
-            await self.on_message_done(event.data)
-        elif event.event == "thread.run.step.created":
-            self.__current_run_step_id = event.data.id
-            await self.on_run_step_created(event.data)
-        elif event.event == "thread.run.step.in_progress":
-            self.__current_run_step_id = event.data.id
-        elif event.event == "thread.run.step.delta":
-            step_snapshot = self.__run_step_snapshots[event.data.id]
-
-            run_step_delta = event.data.delta
-            if (
-                run_step_delta.step_details
-                and run_step_delta.step_details.type == "tool_calls"
-                and run_step_delta.step_details.tool_calls is not None
-            ):
-                assert step_snapshot.step_details.type == "tool_calls"
-                for tool_call_delta in run_step_delta.step_details.tool_calls:
-                    if tool_call_delta.index == self._current_tool_call_index:
-                        await self.on_tool_call_delta(
-                            tool_call_delta,
-                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
-                        )
-
-                    # If the delta is for a new tool call:
-                    # - emit on_tool_call_done for the previous tool_call
-                    # - emit on_tool_call_created for the new tool_call
-                    if tool_call_delta.index != self._current_tool_call_index:
-                        if self._current_tool_call is not None:
-                            await self.on_tool_call_done(self._current_tool_call)
-
-                        self._current_tool_call_index = tool_call_delta.index
-                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-                        await self.on_tool_call_created(self._current_tool_call)
-
-                    # Update the current_tool_call (delta event is correctly emitted already)
-                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-
-            await self.on_run_step_delta(
-                event.data.delta,
-                step_snapshot,
-            )
-        elif (
-            event.event == "thread.run.step.completed"
-            or event.event == "thread.run.step.cancelled"
-            or event.event == "thread.run.step.expired"
-            or event.event == "thread.run.step.failed"
-        ):
-            if self._current_tool_call:
-                await self.on_tool_call_done(self._current_tool_call)
-
-            await self.on_run_step_done(event.data)
-            self.__current_run_step_id = None
-        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
-            # currently no special handling
-            ...
-        else:
-            # we only want to error at build-time
-            if TYPE_CHECKING:  # type: ignore[unreachable]
-                assert_never(event)
-
-        self._current_event = None
-
-    async def __stream__(self) -> AsyncIterator[AssistantStreamEvent]:
-        stream = self.__stream
-        if not stream:
-            raise RuntimeError("Stream has not been started yet")
-
-        try:
-            async for event in stream:
-                await self._emit_sse_event(event)
-
-                yield event
-        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
-            await self.on_timeout()
-            await self.on_exception(exc)
-            raise
-        except Exception as exc:
-            await self.on_exception(exc)
-            raise
-        finally:
-            await self.on_end()
-
-
-AsyncAssistantEventHandlerT = TypeVar("AsyncAssistantEventHandlerT", bound=AsyncAssistantEventHandler)
-
-
-class AsyncAssistantStreamManager(Generic[AsyncAssistantEventHandlerT]):
-    """Wrapper over AsyncAssistantStreamEventHandler that is returned by `.stream()`
-    so that an async context manager can be used without `await`ing the
-    original client call.
-
-    ```py
-    async with client.threads.create_and_run_stream(...) as stream:
-        async for event in stream:
-            ...
-    ```
-    """
-
-    def __init__(
-        self,
-        api_request: Awaitable[AsyncStream[AssistantStreamEvent]],
-        *,
-        event_handler: AsyncAssistantEventHandlerT,
-    ) -> None:
-        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
-        self.__event_handler = event_handler
-        self.__api_request = api_request
-
-    async def __aenter__(self) -> AsyncAssistantEventHandlerT:
-        self.__stream = await self.__api_request
-        self.__event_handler._init(self.__stream)
-        return self.__event_handler
-
-    async def __aexit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__stream is not None:
-            await self.__stream.close()
-
-
-def accumulate_run_step(
-    *,
-    event: AssistantStreamEvent,
-    run_step_snapshots: dict[str, RunStep],
-) -> None:
-    if event.event == "thread.run.step.created":
-        run_step_snapshots[event.data.id] = event.data
-        return
-
-    if event.event == "thread.run.step.delta":
-        data = event.data
-        snapshot = run_step_snapshots[data.id]
-
-        if data.delta:
-            merged = accumulate_delta(
-                cast(
-                    "dict[object, object]",
-                    snapshot.model_dump(exclude_unset=True),
-                ),
-                cast(
-                    "dict[object, object]",
-                    data.delta.model_dump(exclude_unset=True),
-                ),
-            )
-            run_step_snapshots[snapshot.id] = cast(RunStep, construct_type(type_=RunStep, value=merged))
-
-    return None
-
-
-def accumulate_event(
-    *,
-    event: AssistantStreamEvent,
-    current_message_snapshot: Message | None,
-) -> tuple[Message | None, list[MessageContentDelta]]:
-    """Returns a tuple of message snapshot and newly created text message deltas"""
-    if event.event == "thread.message.created":
-        return event.data, []
-
-    new_content: list[MessageContentDelta] = []
-
-    if event.event != "thread.message.delta":
-        return current_message_snapshot, []
-
-    if not current_message_snapshot:
-        raise RuntimeError("Encountered a message delta with no previous snapshot")
-
-    data = event.data
-    if data.delta.content:
-        for content_delta in data.delta.content:
-            try:
-                block = current_message_snapshot.content[content_delta.index]
-            except IndexError:
-                current_message_snapshot.content.insert(
-                    content_delta.index,
-                    cast(
-                        MessageContent,
-                        construct_type(
-                            # mypy doesn't allow Content for some reason
-                            type_=cast(Any, MessageContent),
-                            value=content_delta.model_dump(exclude_unset=True),
-                        ),
-                    ),
-                )
-                new_content.append(content_delta)
-            else:
-                merged = accumulate_delta(
-                    cast(
-                        "dict[object, object]",
-                        block.model_dump(exclude_unset=True),
-                    ),
-                    cast(
-                        "dict[object, object]",
-                        content_delta.model_dump(exclude_unset=True),
-                    ),
-                )
-                current_message_snapshot.content[content_delta.index] = cast(
-                    MessageContent,
-                    construct_type(
-                        # mypy doesn't allow Content for some reason
-                        type_=cast(Any, MessageContent),
-                        value=merged,
-                    ),
-                )
-
-    return current_message_snapshot, new_content
-
-
-def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) -> dict[object, object]:
-    for key, delta_value in delta.items():
-        if key not in acc:
-            acc[key] = delta_value
-            continue
-
-        acc_value = acc[key]
-        if acc_value is None:
-            acc[key] = delta_value
-            continue
-
-        # the `index` property is used in arrays of objects so it should
-        # not be accumulated like other values e.g.
-        # [{'foo': 'bar', 'index': 0}]
-        #
-        # the same applies to `type` properties as they're used for
-        # discriminated unions
-        if key == "index" or key == "type":
-            acc[key] = delta_value
-            continue
-
-        if isinstance(acc_value, str) and isinstance(delta_value, str):
-            acc_value += delta_value
-        elif isinstance(acc_value, (int, float)) and isinstance(delta_value, (int, float)):
-            acc_value += delta_value
-        elif is_dict(acc_value) and is_dict(delta_value):
-            acc_value = accumulate_delta(acc_value, delta_value)
-        elif is_list(acc_value) and is_list(delta_value):
-            # for lists of non-dictionary items we'll only ever get new entries
-            # in the array, existing entries will never be changed
-            if all(isinstance(x, (str, int, float)) for x in acc_value):
-                acc_value.extend(delta_value)
-                continue
-
-            for delta_entry in delta_value:
-                if not is_dict(delta_entry):
-                    raise TypeError(f"Unexpected list delta entry is not a dictionary: {delta_entry}")
-
-                try:
-                    index = delta_entry["index"]
-                except KeyError as exc:
-                    raise RuntimeError(f"Expected list delta entry to have an `index` key; {delta_entry}") from exc
-
-                if not isinstance(index, int):
-                    raise TypeError(f"Unexpected, list delta entry `index` value is not an integer; {index}")
-
-                try:
-                    acc_entry = acc_value[index]
-                except IndexError:
-                    acc_value.insert(index, delta_entry)
-                else:
-                    if not is_dict(acc_entry):
-                        raise TypeError("not handled yet")
-
-                    acc_value[index] = accumulate_delta(acc_entry, delta_entry)
-
-        acc[key] = acc_value
-
-    return acc
diff --git a/src/openai/pagination.py b/src/openai/pagination.py
index 8293638269..a59cced854 100644
--- a/src/openai/pagination.py
+++ b/src/openai/pagination.py
@@ -61,6 +61,7 @@ def next_page_info(self) -> None:
 
 class SyncCursorPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
     data: List[_T]
+    has_more: Optional[bool] = None
 
     @override
     def _get_page_items(self) -> List[_T]:
@@ -69,6 +70,14 @@ def _get_page_items(self) -> List[_T]:
             return []
         return data
 
+    @override
+    def has_next_page(self) -> bool:
+        has_more = self.has_more
+        if has_more is not None and has_more is False:
+            return False
+
+        return super().has_next_page()
+
     @override
     def next_page_info(self) -> Optional[PageInfo]:
         data = self.data
@@ -85,6 +94,7 @@ def next_page_info(self) -> Optional[PageInfo]:
 
 class AsyncCursorPage(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
     data: List[_T]
+    has_more: Optional[bool] = None
 
     @override
     def _get_page_items(self) -> List[_T]:
@@ -93,6 +103,14 @@ def _get_page_items(self) -> List[_T]:
             return []
         return data
 
+    @override
+    def has_next_page(self) -> bool:
+        has_more = self.has_more
+        if has_more is not None and has_more is False:
+            return False
+
+        return super().has_next_page()
+
     @override
     def next_page_info(self) -> Optional[PageInfo]:
         data = self.data
diff --git a/src/openai/resources/__init__.py b/src/openai/resources/__init__.py
index ecae4243fc..d3457cf319 100644
--- a/src/openai/resources/__init__.py
+++ b/src/openai/resources/__init__.py
@@ -56,6 +56,22 @@
     BatchesWithStreamingResponse,
     AsyncBatchesWithStreamingResponse,
 )
+from .uploads import (
+    Uploads,
+    AsyncUploads,
+    UploadsWithRawResponse,
+    AsyncUploadsWithRawResponse,
+    UploadsWithStreamingResponse,
+    AsyncUploadsWithStreamingResponse,
+)
+from .responses import (
+    Responses,
+    AsyncResponses,
+    ResponsesWithRawResponse,
+    AsyncResponsesWithRawResponse,
+    ResponsesWithStreamingResponse,
+    AsyncResponsesWithStreamingResponse,
+)
 from .embeddings import (
     Embeddings,
     AsyncEmbeddings,
@@ -88,6 +104,14 @@
     ModerationsWithStreamingResponse,
     AsyncModerationsWithStreamingResponse,
 )
+from .vector_stores import (
+    VectorStores,
+    AsyncVectorStores,
+    VectorStoresWithRawResponse,
+    AsyncVectorStoresWithRawResponse,
+    VectorStoresWithStreamingResponse,
+    AsyncVectorStoresWithStreamingResponse,
+)
 
 __all__ = [
     "Completions",
@@ -144,6 +168,12 @@
     "AsyncFineTuningWithRawResponse",
     "FineTuningWithStreamingResponse",
     "AsyncFineTuningWithStreamingResponse",
+    "VectorStores",
+    "AsyncVectorStores",
+    "VectorStoresWithRawResponse",
+    "AsyncVectorStoresWithRawResponse",
+    "VectorStoresWithStreamingResponse",
+    "AsyncVectorStoresWithStreamingResponse",
     "Beta",
     "AsyncBeta",
     "BetaWithRawResponse",
@@ -156,4 +186,16 @@
     "AsyncBatchesWithRawResponse",
     "BatchesWithStreamingResponse",
     "AsyncBatchesWithStreamingResponse",
+    "Uploads",
+    "AsyncUploads",
+    "UploadsWithRawResponse",
+    "AsyncUploadsWithRawResponse",
+    "UploadsWithStreamingResponse",
+    "AsyncUploadsWithStreamingResponse",
+    "Responses",
+    "AsyncResponses",
+    "ResponsesWithRawResponse",
+    "AsyncResponsesWithRawResponse",
+    "ResponsesWithStreamingResponse",
+    "AsyncResponsesWithStreamingResponse",
 ]
diff --git a/src/openai/resources/audio/audio.py b/src/openai/resources/audio/audio.py
index 537ad573d0..383b7073bf 100644
--- a/src/openai/resources/audio/audio.py
+++ b/src/openai/resources/audio/audio.py
@@ -47,10 +47,21 @@ def speech(self) -> Speech:
 
     @cached_property
     def with_raw_response(self) -> AudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AudioWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AudioWithStreamingResponse(self)
 
 
@@ -69,10 +80,21 @@ def speech(self) -> AsyncSpeech:
 
     @cached_property
     def with_raw_response(self) -> AsyncAudioWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncAudioWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncAudioWithStreamingResponse(self)
 
 
diff --git a/src/openai/resources/audio/speech.py b/src/openai/resources/audio/speech.py
index e26c58051e..ad01118161 100644
--- a/src/openai/resources/audio/speech.py
+++ b/src/openai/resources/audio/speech.py
@@ -22,9 +22,8 @@
     async_to_custom_streamed_response_wrapper,
 )
 from ...types.audio import speech_create_params
-from ..._base_client import (
-    make_request_options,
-)
+from ..._base_client import make_request_options
+from ...types.audio.speech_model import SpeechModel
 
 __all__ = ["Speech", "AsyncSpeech"]
 
@@ -32,18 +31,29 @@
 class Speech(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> SpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return SpeechWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> SpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return SpeechWithStreamingResponse(self)
 
     def create(
         self,
         *,
         input: str,
-        model: Union[str, Literal["tts-1", "tts-1-hd"]],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
+        model: Union[str, SpeechModel],
+        voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
         response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
         speed: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -60,13 +70,13 @@ def create(
           input: The text to generate audio for. The maximum length is 4096 characters.
 
           model:
-              One of the available [TTS models](https://platform.openai.com/docs/models/tts):
+              One of the available [TTS models](https://platform.openai.com/docs/models#tts):
               `tts-1` or `tts-1-hd`
 
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
-              available in the
-              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
+              voices are available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
 
           response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
               `wav`, and `pcm`.
@@ -105,18 +115,29 @@ def create(
 class AsyncSpeech(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncSpeechWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncSpeechWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncSpeechWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         input: str,
-        model: Union[str, Literal["tts-1", "tts-1-hd"]],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
+        model: Union[str, SpeechModel],
+        voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
         response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
         speed: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -133,13 +154,13 @@ async def create(
           input: The text to generate audio for. The maximum length is 4096 characters.
 
           model:
-              One of the available [TTS models](https://platform.openai.com/docs/models/tts):
+              One of the available [TTS models](https://platform.openai.com/docs/models#tts):
               `tts-1` or `tts-1-hd`
 
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
-              available in the
-              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
+              voices are available in the
+              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
 
           response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
               `wav`, and `pcm`.
diff --git a/src/openai/resources/audio/transcriptions.py b/src/openai/resources/audio/transcriptions.py
index 995680186b..6cc3b9881c 100644
--- a/src/openai/resources/audio/transcriptions.py
+++ b/src/openai/resources/audio/transcriptions.py
@@ -2,12 +2,13 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Mapping, cast
+from typing import Any, List, Union, Mapping, cast
 from typing_extensions import Literal
 
 import httpx
 
 from ... import _legacy_response
+from ...types import AudioResponseFormat
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
 from ..._utils import (
     extract_files,
@@ -19,10 +20,10 @@
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ...types.audio import transcription_create_params
-from ..._base_client import (
-    make_request_options,
-)
-from ...types.audio.transcription import Transcription
+from ..._base_client import make_request_options
+from ...types.audio_model import AudioModel
+from ...types.audio_response_format import AudioResponseFormat
+from ...types.audio.transcription_create_response import TranscriptionCreateResponse
 
 __all__ = ["Transcriptions", "AsyncTranscriptions"]
 
@@ -30,20 +31,31 @@
 class Transcriptions(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> TranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return TranscriptionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return TranscriptionsWithStreamingResponse(self)
 
     def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
+        model: Union[str, AudioModel],
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
-        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
+        response_format: AudioResponseFormat | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -52,7 +64,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Transcription:
+    ) -> TranscriptionCreateResponse:
         """
         Transcribes audio into the input language.
 
@@ -65,16 +77,16 @@ def create(
               Whisper V2 model) is currently available.
 
           language: The language of the input audio. Supplying the input language in
-              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-              improve accuracy and latency.
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
               should match the audio language.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -108,39 +120,54 @@ def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return self._post(
-            "/audio/transcriptions",
-            body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return cast(
+            TranscriptionCreateResponse,
+            self._post(
+                "/audio/transcriptions",
+                body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+                files=files,
+                options=make_request_options(
+                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                ),
+                cast_to=cast(
+                    Any, TranscriptionCreateResponse
+                ),  # Union types cannot be passed in as arguments in the type system
             ),
-            cast_to=Transcription,
         )
 
 
 class AsyncTranscriptions(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncTranscriptionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncTranscriptionsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
+        model: Union[str, AudioModel],
         language: str | NotGiven = NOT_GIVEN,
         prompt: str | NotGiven = NOT_GIVEN,
-        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
+        response_format: AudioResponseFormat | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -149,7 +176,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Transcription:
+    ) -> TranscriptionCreateResponse:
         """
         Transcribes audio into the input language.
 
@@ -162,16 +189,16 @@ async def create(
               Whisper V2 model) is currently available.
 
           language: The language of the input audio. Supplying the input language in
-              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-              improve accuracy and latency.
+              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+              format will improve accuracy and latency.
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
               should match the audio language.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -205,19 +232,23 @@ async def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return await self._post(
-            "/audio/transcriptions",
-            body=await async_maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return cast(
+            TranscriptionCreateResponse,
+            await self._post(
+                "/audio/transcriptions",
+                body=await async_maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
+                files=files,
+                options=make_request_options(
+                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                ),
+                cast_to=cast(
+                    Any, TranscriptionCreateResponse
+                ),  # Union types cannot be passed in as arguments in the type system
             ),
-            cast_to=Transcription,
         )
 
 
diff --git a/src/openai/resources/audio/translations.py b/src/openai/resources/audio/translations.py
index d711ee2fbd..77e5c2a543 100644
--- a/src/openai/resources/audio/translations.py
+++ b/src/openai/resources/audio/translations.py
@@ -2,12 +2,12 @@
 
 from __future__ import annotations
 
-from typing import Union, Mapping, cast
-from typing_extensions import Literal
+from typing import Any, Union, Mapping, cast
 
 import httpx
 
 from ... import _legacy_response
+from ...types import AudioResponseFormat
 from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
 from ..._utils import (
     extract_files,
@@ -19,10 +19,10 @@
 from ..._resource import SyncAPIResource, AsyncAPIResource
 from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ...types.audio import translation_create_params
-from ..._base_client import (
-    make_request_options,
-)
-from ...types.audio.translation import Translation
+from ..._base_client import make_request_options
+from ...types.audio_model import AudioModel
+from ...types.audio_response_format import AudioResponseFormat
+from ...types.audio.translation_create_response import TranslationCreateResponse
 
 __all__ = ["Translations", "AsyncTranslations"]
 
@@ -30,19 +30,30 @@
 class Translations(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> TranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return TranslationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> TranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return TranslationsWithStreamingResponse(self)
 
     def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
+        model: Union[str, AudioModel],
         prompt: str | NotGiven = NOT_GIVEN,
-        response_format: str | NotGiven = NOT_GIVEN,
+        response_format: AudioResponseFormat | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -50,7 +61,7 @@ def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Translation:
+    ) -> TranslationCreateResponse:
         """
         Translates audio into English.
 
@@ -63,11 +74,11 @@ def create(
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
               should be in English.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -93,38 +104,53 @@ def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return self._post(
-            "/audio/translations",
-            body=maybe_transform(body, translation_create_params.TranslationCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return cast(
+            TranslationCreateResponse,
+            self._post(
+                "/audio/translations",
+                body=maybe_transform(body, translation_create_params.TranslationCreateParams),
+                files=files,
+                options=make_request_options(
+                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                ),
+                cast_to=cast(
+                    Any, TranslationCreateResponse
+                ),  # Union types cannot be passed in as arguments in the type system
             ),
-            cast_to=Translation,
         )
 
 
 class AsyncTranslations(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncTranslationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncTranslationsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
+        model: Union[str, AudioModel],
         prompt: str | NotGiven = NOT_GIVEN,
-        response_format: str | NotGiven = NOT_GIVEN,
+        response_format: AudioResponseFormat | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -132,7 +158,7 @@ async def create(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Translation:
+    ) -> TranslationCreateResponse:
         """
         Translates audio into English.
 
@@ -145,11 +171,11 @@ async def create(
 
           prompt: An optional text to guide the model's style or continue a previous audio
               segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+              [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
               should be in English.
 
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
+          response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
+              `verbose_json`, or `vtt`.
 
           temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
               output more random, while lower values like 0.2 will make it more focused and
@@ -175,19 +201,23 @@ async def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return await self._post(
-            "/audio/translations",
-            body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return cast(
+            TranslationCreateResponse,
+            await self._post(
+                "/audio/translations",
+                body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams),
+                files=files,
+                options=make_request_options(
+                    extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                ),
+                cast_to=cast(
+                    Any, TranslationCreateResponse
+                ),  # Union types cannot be passed in as arguments in the type system
             ),
-            cast_to=Translation,
         )
 
 
diff --git a/src/openai/resources/batches.py b/src/openai/resources/batches.py
index db4c4da235..b7a299be12 100644
--- a/src/openai/resources/batches.py
+++ b/src/openai/resources/batches.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Dict, Optional
+from typing import Optional
 from typing_extensions import Literal
 
 import httpx
@@ -19,10 +19,8 @@
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ..pagination import SyncCursorPage, AsyncCursorPage
 from ..types.batch import Batch
-from .._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from .._base_client import AsyncPaginator, make_request_options
+from ..types.shared_params.metadata import Metadata
 
 __all__ = ["Batches", "AsyncBatches"]
 
@@ -30,19 +28,30 @@
 class Batches(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> BatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return BatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> BatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return BatchesWithStreamingResponse(self)
 
     def create(
         self,
         *,
         completion_window: Literal["24h"],
-        endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        endpoint: Literal["/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
         input_file_id: str,
-        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -58,9 +67,9 @@ def create(
               is supported.
 
           endpoint: The endpoint to be used for all requests in the batch. Currently
-              `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported.
-              Note that `/v1/embeddings` batches are also restricted to a maximum of 50,000
-              embedding inputs across all requests in the batch.
+              `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions`
+              are supported. Note that `/v1/embeddings` batches are also restricted to a
+              maximum of 50,000 embedding inputs across all requests in the batch.
 
           input_file_id: The ID of an uploaded file that contains requests for the new batch.
 
@@ -68,11 +77,16 @@ def create(
               for how to upload a file.
 
               Your input file must be formatted as a
-              [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
               and must be uploaded with the purpose `batch`. The file can contain up to 50,000
-              requests, and can be up to 100 MB in size.
+              requests, and can be up to 200 MB in size.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
 
-          metadata: Optional custom metadata for the batch.
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -195,8 +209,11 @@ def cancel(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Batch:
-        """
-        Cancels an in-progress batch.
+        """Cancels an in-progress batch.
+
+        The batch will be in status `cancelling` for up to
+        10 minutes, before changing to `cancelled`, where it will have partial results
+        (if any) available in the output file.
 
         Args:
           extra_headers: Send extra headers
@@ -221,19 +238,30 @@ def cancel(
 class AsyncBatches(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncBatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncBatchesWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         completion_window: Literal["24h"],
-        endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
+        endpoint: Literal["/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions"],
         input_file_id: str,
-        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -249,9 +277,9 @@ async def create(
               is supported.
 
           endpoint: The endpoint to be used for all requests in the batch. Currently
-              `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported.
-              Note that `/v1/embeddings` batches are also restricted to a maximum of 50,000
-              embedding inputs across all requests in the batch.
+              `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions`
+              are supported. Note that `/v1/embeddings` batches are also restricted to a
+              maximum of 50,000 embedding inputs across all requests in the batch.
 
           input_file_id: The ID of an uploaded file that contains requests for the new batch.
 
@@ -259,11 +287,16 @@ async def create(
               for how to upload a file.
 
               Your input file must be formatted as a
-              [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+              [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
               and must be uploaded with the purpose `batch`. The file can contain up to 50,000
-              requests, and can be up to 100 MB in size.
+              requests, and can be up to 200 MB in size.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
 
-          metadata: Optional custom metadata for the batch.
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -386,8 +419,11 @@ async def cancel(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Batch:
-        """
-        Cancels an in-progress batch.
+        """Cancels an in-progress batch.
+
+        The batch will be in status `cancelling` for up to
+        10 minutes, before changing to `cancelled`, where it will have partial results
+        (if any) available in the output file.
 
         Args:
           extra_headers: Send extra headers
diff --git a/src/openai/resources/beta/__init__.py b/src/openai/resources/beta/__init__.py
index 01f5338757..87fea25267 100644
--- a/src/openai/resources/beta/__init__.py
+++ b/src/openai/resources/beta/__init__.py
@@ -24,22 +24,8 @@
     AssistantsWithStreamingResponse,
     AsyncAssistantsWithStreamingResponse,
 )
-from .vector_stores import (
-    VectorStores,
-    AsyncVectorStores,
-    VectorStoresWithRawResponse,
-    AsyncVectorStoresWithRawResponse,
-    VectorStoresWithStreamingResponse,
-    AsyncVectorStoresWithStreamingResponse,
-)
 
 __all__ = [
-    "VectorStores",
-    "AsyncVectorStores",
-    "VectorStoresWithRawResponse",
-    "AsyncVectorStoresWithRawResponse",
-    "VectorStoresWithStreamingResponse",
-    "AsyncVectorStoresWithStreamingResponse",
     "Assistants",
     "AsyncAssistants",
     "AssistantsWithRawResponse",
diff --git a/src/openai/resources/beta/assistants.py b/src/openai/resources/beta/assistants.py
index 5912aff77a..1c7cbf3737 100644
--- a/src/openai/resources/beta/assistants.py
+++ b/src/openai/resources/beta/assistants.py
@@ -22,12 +22,12 @@
     assistant_create_params,
     assistant_update_params,
 )
-from ..._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ..._base_client import AsyncPaginator, make_request_options
 from ...types.beta.assistant import Assistant
+from ...types.shared.chat_model import ChatModel
 from ...types.beta.assistant_deleted import AssistantDeleted
+from ...types.shared_params.metadata import Metadata
+from ...types.shared.reasoning_effort import ReasoningEffort
 from ...types.beta.assistant_tool_param import AssistantToolParam
 from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
@@ -37,44 +37,32 @@
 class Assistants(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AssistantsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AssistantsWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
+        model: Union[str, ChatModel],
         description: Optional[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
@@ -94,8 +82,8 @@ def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           description: The description of the assistant. The maximum length is 512 characters.
 
@@ -103,18 +91,32 @@ def create(
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -162,6 +164,7 @@ def create(
                     "instructions": instructions,
                     "metadata": metadata,
                     "name": name,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_resources": tool_resources,
@@ -216,9 +219,45 @@ def update(
         *,
         description: Optional[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4.5-preview",
+                "gpt-4.5-preview-2025-02-27",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+        | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
@@ -242,24 +281,38 @@ def update(
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -309,6 +362,7 @@ def update(
                     "metadata": metadata,
                     "model": model,
                     "name": name,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_resources": tool_resources,
@@ -349,8 +403,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -426,44 +480,32 @@ def delete(
 class AsyncAssistants(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncAssistantsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncAssistantsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
+        model: Union[str, ChatModel],
         description: Optional[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
@@ -483,8 +525,8 @@ async def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           description: The description of the assistant. The maximum length is 512 characters.
 
@@ -492,18 +534,32 @@ async def create(
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -551,6 +607,7 @@ async def create(
                     "instructions": instructions,
                     "metadata": metadata,
                     "name": name,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_resources": tool_resources,
@@ -605,9 +662,45 @@ async def update(
         *,
         description: Optional[str] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[
+            str,
+            Literal[
+                "o3-mini",
+                "o3-mini-2025-01-31",
+                "o1",
+                "o1-2024-12-17",
+                "gpt-4o",
+                "gpt-4o-2024-11-20",
+                "gpt-4o-2024-08-06",
+                "gpt-4o-2024-05-13",
+                "gpt-4o-mini",
+                "gpt-4o-mini-2024-07-18",
+                "gpt-4.5-preview",
+                "gpt-4.5-preview-2025-02-27",
+                "gpt-4-turbo",
+                "gpt-4-turbo-2024-04-09",
+                "gpt-4-0125-preview",
+                "gpt-4-turbo-preview",
+                "gpt-4-1106-preview",
+                "gpt-4-vision-preview",
+                "gpt-4",
+                "gpt-4-0314",
+                "gpt-4-0613",
+                "gpt-4-32k",
+                "gpt-4-32k-0314",
+                "gpt-4-32k-0613",
+                "gpt-3.5-turbo",
+                "gpt-3.5-turbo-16k",
+                "gpt-3.5-turbo-0613",
+                "gpt-3.5-turbo-1106",
+                "gpt-3.5-turbo-0125",
+                "gpt-3.5-turbo-16k-0613",
+            ],
+        ]
+        | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
@@ -631,24 +724,38 @@ async def update(
               characters.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           name: The name of the assistant. The maximum length is 256 characters.
 
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -698,6 +805,7 @@ async def update(
                     "metadata": metadata,
                     "model": model,
                     "name": name,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "temperature": temperature,
                     "tool_resources": tool_resources,
@@ -738,8 +846,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
diff --git a/src/openai/resources/beta/beta.py b/src/openai/resources/beta/beta.py
index 0d9806678f..46c100d3f9 100644
--- a/src/openai/resources/beta/beta.py
+++ b/src/openai/resources/beta/beta.py
@@ -2,14 +2,6 @@
 
 from __future__ import annotations
 
-from .threads import (
-    Threads,
-    AsyncThreads,
-    ThreadsWithRawResponse,
-    AsyncThreadsWithRawResponse,
-    ThreadsWithStreamingResponse,
-    AsyncThreadsWithStreamingResponse,
-)
 from ..._compat import cached_property
 from .assistants import (
     Assistants,
@@ -20,24 +12,30 @@
     AsyncAssistantsWithStreamingResponse,
 )
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from .vector_stores import (
-    VectorStores,
-    AsyncVectorStores,
-    VectorStoresWithRawResponse,
-    AsyncVectorStoresWithRawResponse,
-    VectorStoresWithStreamingResponse,
-    AsyncVectorStoresWithStreamingResponse,
+from .threads.threads import (
+    Threads,
+    AsyncThreads,
+    ThreadsWithRawResponse,
+    AsyncThreadsWithRawResponse,
+    ThreadsWithStreamingResponse,
+    AsyncThreadsWithStreamingResponse,
+)
+from .realtime.realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
 )
-from .threads.threads import Threads, AsyncThreads
-from .vector_stores.vector_stores import VectorStores, AsyncVectorStores
 
 __all__ = ["Beta", "AsyncBeta"]
 
 
 class Beta(SyncAPIResource):
     @cached_property
-    def vector_stores(self) -> VectorStores:
-        return VectorStores(self._client)
+    def realtime(self) -> Realtime:
+        return Realtime(self._client)
 
     @cached_property
     def assistants(self) -> Assistants:
@@ -49,17 +47,28 @@ def threads(self) -> Threads:
 
     @cached_property
     def with_raw_response(self) -> BetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return BetaWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> BetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return BetaWithStreamingResponse(self)
 
 
 class AsyncBeta(AsyncAPIResource):
     @cached_property
-    def vector_stores(self) -> AsyncVectorStores:
-        return AsyncVectorStores(self._client)
+    def realtime(self) -> AsyncRealtime:
+        return AsyncRealtime(self._client)
 
     @cached_property
     def assistants(self) -> AsyncAssistants:
@@ -71,10 +80,21 @@ def threads(self) -> AsyncThreads:
 
     @cached_property
     def with_raw_response(self) -> AsyncBetaWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncBetaWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncBetaWithStreamingResponse(self)
 
 
@@ -83,8 +103,8 @@ def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
     @cached_property
-    def vector_stores(self) -> VectorStoresWithRawResponse:
-        return VectorStoresWithRawResponse(self._beta.vector_stores)
+    def realtime(self) -> RealtimeWithRawResponse:
+        return RealtimeWithRawResponse(self._beta.realtime)
 
     @cached_property
     def assistants(self) -> AssistantsWithRawResponse:
@@ -100,8 +120,8 @@ def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
     @cached_property
-    def vector_stores(self) -> AsyncVectorStoresWithRawResponse:
-        return AsyncVectorStoresWithRawResponse(self._beta.vector_stores)
+    def realtime(self) -> AsyncRealtimeWithRawResponse:
+        return AsyncRealtimeWithRawResponse(self._beta.realtime)
 
     @cached_property
     def assistants(self) -> AsyncAssistantsWithRawResponse:
@@ -117,8 +137,8 @@ def __init__(self, beta: Beta) -> None:
         self._beta = beta
 
     @cached_property
-    def vector_stores(self) -> VectorStoresWithStreamingResponse:
-        return VectorStoresWithStreamingResponse(self._beta.vector_stores)
+    def realtime(self) -> RealtimeWithStreamingResponse:
+        return RealtimeWithStreamingResponse(self._beta.realtime)
 
     @cached_property
     def assistants(self) -> AssistantsWithStreamingResponse:
@@ -134,8 +154,8 @@ def __init__(self, beta: AsyncBeta) -> None:
         self._beta = beta
 
     @cached_property
-    def vector_stores(self) -> AsyncVectorStoresWithStreamingResponse:
-        return AsyncVectorStoresWithStreamingResponse(self._beta.vector_stores)
+    def realtime(self) -> AsyncRealtimeWithStreamingResponse:
+        return AsyncRealtimeWithStreamingResponse(self._beta.realtime)
 
     @cached_property
     def assistants(self) -> AsyncAssistantsWithStreamingResponse:
diff --git a/src/openai/resources/beta/realtime/__init__.py b/src/openai/resources/beta/realtime/__init__.py
new file mode 100644
index 0000000000..474434e6e1
--- /dev/null
+++ b/src/openai/resources/beta/realtime/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .realtime import (
+    Realtime,
+    AsyncRealtime,
+    RealtimeWithRawResponse,
+    AsyncRealtimeWithRawResponse,
+    RealtimeWithStreamingResponse,
+    AsyncRealtimeWithStreamingResponse,
+)
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Sessions",
+    "AsyncSessions",
+    "SessionsWithRawResponse",
+    "AsyncSessionsWithRawResponse",
+    "SessionsWithStreamingResponse",
+    "AsyncSessionsWithStreamingResponse",
+    "Realtime",
+    "AsyncRealtime",
+    "RealtimeWithRawResponse",
+    "AsyncRealtimeWithRawResponse",
+    "RealtimeWithStreamingResponse",
+    "AsyncRealtimeWithStreamingResponse",
+]
diff --git a/src/openai/resources/beta/realtime/realtime.py b/src/openai/resources/beta/realtime/realtime.py
new file mode 100644
index 0000000000..a2dd143bfc
--- /dev/null
+++ b/src/openai/resources/beta/realtime/realtime.py
@@ -0,0 +1,960 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import json
+import logging
+from types import TracebackType
+from typing import TYPE_CHECKING, Any, Iterator, cast
+from typing_extensions import AsyncIterator
+
+import httpx
+from pydantic import BaseModel
+
+from .sessions import (
+    Sessions,
+    AsyncSessions,
+    SessionsWithRawResponse,
+    AsyncSessionsWithRawResponse,
+    SessionsWithStreamingResponse,
+    AsyncSessionsWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    strip_not_given,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._models import construct_type_unchecked
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._exceptions import OpenAIError
+from ...._base_client import _merge_mappings
+from ....types.beta.realtime import session_update_event_param, response_create_event_param
+from ....types.websocket_connection_options import WebsocketConnectionOptions
+from ....types.beta.realtime.realtime_client_event import RealtimeClientEvent
+from ....types.beta.realtime.realtime_server_event import RealtimeServerEvent
+from ....types.beta.realtime.conversation_item_param import ConversationItemParam
+from ....types.beta.realtime.realtime_client_event_param import RealtimeClientEventParam
+
+if TYPE_CHECKING:
+    from websockets.sync.client import ClientConnection as WebsocketConnection
+    from websockets.asyncio.client import ClientConnection as AsyncWebsocketConnection
+
+    from ...._client import OpenAI, AsyncOpenAI
+
+__all__ = ["Realtime", "AsyncRealtime"]
+
+log: logging.Logger = logging.getLogger(__name__)
+
+
+class Realtime(SyncAPIResource):
+    @cached_property
+    def sessions(self) -> Sessions:
+        return Sessions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> RealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return RealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> RealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return RealtimeWithStreamingResponse(self)
+
+    def connect(
+        self,
+        *,
+        model: str,
+        extra_query: Query = {},
+        extra_headers: Headers = {},
+        websocket_connection_options: WebsocketConnectionOptions = {},
+    ) -> RealtimeConnectionManager:
+        """
+        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+        Some notable benefits of the API include:
+
+        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+        """
+        return RealtimeConnectionManager(
+            client=self._client,
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            websocket_connection_options=websocket_connection_options,
+            model=model,
+        )
+
+
+class AsyncRealtime(AsyncAPIResource):
+    @cached_property
+    def sessions(self) -> AsyncSessions:
+        return AsyncSessions(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncRealtimeWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRealtimeWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncRealtimeWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRealtimeWithStreamingResponse(self)
+
+    def connect(
+        self,
+        *,
+        model: str,
+        extra_query: Query = {},
+        extra_headers: Headers = {},
+        websocket_connection_options: WebsocketConnectionOptions = {},
+    ) -> AsyncRealtimeConnectionManager:
+        """
+        The Realtime API enables you to build low-latency, multi-modal conversational experiences. It currently supports text and audio as both input and output, as well as function calling.
+
+        Some notable benefits of the API include:
+
+        - Native speech-to-speech: Skipping an intermediate text format means low latency and nuanced output.
+        - Natural, steerable voices: The models have natural inflection and can laugh, whisper, and adhere to tone direction.
+        - Simultaneous multimodal output: Text is useful for moderation; faster-than-realtime audio ensures stable playback.
+
+        The Realtime API is a stateful, event-based API that communicates over a WebSocket.
+        """
+        return AsyncRealtimeConnectionManager(
+            client=self._client,
+            extra_query=extra_query,
+            extra_headers=extra_headers,
+            websocket_connection_options=websocket_connection_options,
+            model=model,
+        )
+
+
+class RealtimeWithRawResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> SessionsWithRawResponse:
+        return SessionsWithRawResponse(self._realtime.sessions)
+
+
+class AsyncRealtimeWithRawResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithRawResponse:
+        return AsyncSessionsWithRawResponse(self._realtime.sessions)
+
+
+class RealtimeWithStreamingResponse:
+    def __init__(self, realtime: Realtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> SessionsWithStreamingResponse:
+        return SessionsWithStreamingResponse(self._realtime.sessions)
+
+
+class AsyncRealtimeWithStreamingResponse:
+    def __init__(self, realtime: AsyncRealtime) -> None:
+        self._realtime = realtime
+
+    @cached_property
+    def sessions(self) -> AsyncSessionsWithStreamingResponse:
+        return AsyncSessionsWithStreamingResponse(self._realtime.sessions)
+
+
+class AsyncRealtimeConnection:
+    """Represents a live websocket connection to the Realtime API"""
+
+    session: AsyncRealtimeSessionResource
+    response: AsyncRealtimeResponseResource
+    conversation: AsyncRealtimeConversationResource
+    input_audio_buffer: AsyncRealtimeInputAudioBufferResource
+
+    _connection: AsyncWebsocketConnection
+
+    def __init__(self, connection: AsyncWebsocketConnection) -> None:
+        self._connection = connection
+
+        self.session = AsyncRealtimeSessionResource(self)
+        self.response = AsyncRealtimeResponseResource(self)
+        self.conversation = AsyncRealtimeConversationResource(self)
+        self.input_audio_buffer = AsyncRealtimeInputAudioBufferResource(self)
+
+    async def __aiter__(self) -> AsyncIterator[RealtimeServerEvent]:
+        """
+        An infinite-iterator that will continue to yield events until
+        the connection is closed.
+        """
+        from websockets.exceptions import ConnectionClosedOK
+
+        try:
+            while True:
+                yield await self.recv()
+        except ConnectionClosedOK:
+            return
+
+    async def recv(self) -> RealtimeServerEvent:
+        """
+        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+        Canceling this method is safe. There's no risk of losing data.
+        """
+        return self.parse_event(await self.recv_bytes())
+
+    async def recv_bytes(self) -> bytes:
+        """Receive the next message from the connection as raw bytes.
+
+        Canceling this method is safe. There's no risk of losing data.
+
+        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+        then you can call `.parse_event(data)`.
+        """
+        message = await self._connection.recv(decode=False)
+        log.debug(f"Received websocket message: %s", message)
+        if not isinstance(message, bytes):
+            # passing `decode=False` should always result in us getting `bytes` back
+            raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+        return message
+
+    async def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+        data = (
+            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+            if isinstance(event, BaseModel)
+            else json.dumps(await async_maybe_transform(event, RealtimeClientEventParam))
+        )
+        await self._connection.send(data)
+
+    async def close(self, *, code: int = 1000, reason: str = "") -> None:
+        await self._connection.close(code=code, reason=reason)
+
+    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+        """
+        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+        This is helpful if you're using `.recv_bytes()`.
+        """
+        return cast(
+            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+        )
+
+
+class AsyncRealtimeConnectionManager:
+    """
+    Context manager over a `AsyncRealtimeConnection` that is returned by `beta.realtime.connect()`
+
+    This context manager ensures that the connection will be closed when it exits.
+
+    ---
+
+    Note that if your application doesn't work well with the context manager approach then you
+    can call the `.enter()` method directly to initiate a connection.
+
+    **Warning**: You must remember to close the connection with `.close()`.
+
+    ```py
+    connection = await client.beta.realtime.connect(...).enter()
+    # ...
+    await connection.close()
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        client: AsyncOpenAI,
+        model: str,
+        extra_query: Query,
+        extra_headers: Headers,
+        websocket_connection_options: WebsocketConnectionOptions,
+    ) -> None:
+        self.__client = client
+        self.__model = model
+        self.__connection: AsyncRealtimeConnection | None = None
+        self.__extra_query = extra_query
+        self.__extra_headers = extra_headers
+        self.__websocket_connection_options = websocket_connection_options
+
+    async def __aenter__(self) -> AsyncRealtimeConnection:
+        """
+        👋 If your application doesn't work well with the context manager approach then you
+        can call this method directly to initiate a connection.
+
+        **Warning**: You must remember to close the connection with `.close()`.
+
+        ```py
+        connection = await client.beta.realtime.connect(...).enter()
+        # ...
+        await connection.close()
+        ```
+        """
+        try:
+            from websockets.asyncio.client import connect
+        except ImportError as exc:
+            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+        url = self._prepare_url().copy_with(
+            params={
+                **self.__client.base_url.params,
+                "model": self.__model,
+                **self.__extra_query,
+            },
+        )
+        log.debug("Connecting to %s", url)
+        if self.__websocket_connection_options:
+            log.debug("Connection options: %s", self.__websocket_connection_options)
+
+        self.__connection = AsyncRealtimeConnection(
+            await connect(
+                str(url),
+                user_agent_header=self.__client.user_agent,
+                additional_headers=_merge_mappings(
+                    {
+                        **self.__client.auth_headers,
+                        "OpenAI-Beta": "realtime=v1",
+                    },
+                    self.__extra_headers,
+                ),
+                **self.__websocket_connection_options,
+            )
+        )
+
+        return self.__connection
+
+    enter = __aenter__
+
+    def _prepare_url(https://github.com/self) -> httpx.URL:
+        if self.__client.websocket_base_url is not None:
+            base_url = httpx.URL(self.__client.websocket_base_url)
+        else:
+            base_url = self.__client._base_url.copy_with(scheme="wss")
+
+        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+        return base_url.copy_with(raw_path=merge_raw_path)
+
+    async def __aexit__(
+        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+    ) -> None:
+        if self.__connection is not None:
+            await self.__connection.close()
+
+
+class RealtimeConnection:
+    """Represents a live websocket connection to the Realtime API"""
+
+    session: RealtimeSessionResource
+    response: RealtimeResponseResource
+    conversation: RealtimeConversationResource
+    input_audio_buffer: RealtimeInputAudioBufferResource
+
+    _connection: WebsocketConnection
+
+    def __init__(self, connection: WebsocketConnection) -> None:
+        self._connection = connection
+
+        self.session = RealtimeSessionResource(self)
+        self.response = RealtimeResponseResource(self)
+        self.conversation = RealtimeConversationResource(self)
+        self.input_audio_buffer = RealtimeInputAudioBufferResource(self)
+
+    def __iter__(self) -> Iterator[RealtimeServerEvent]:
+        """
+        An infinite-iterator that will continue to yield events until
+        the connection is closed.
+        """
+        from websockets.exceptions import ConnectionClosedOK
+
+        try:
+            while True:
+                yield self.recv()
+        except ConnectionClosedOK:
+            return
+
+    def recv(self) -> RealtimeServerEvent:
+        """
+        Receive the next message from the connection and parses it into a `RealtimeServerEvent` object.
+
+        Canceling this method is safe. There's no risk of losing data.
+        """
+        return self.parse_event(self.recv_bytes())
+
+    def recv_bytes(self) -> bytes:
+        """Receive the next message from the connection as raw bytes.
+
+        Canceling this method is safe. There's no risk of losing data.
+
+        If you want to parse the message into a `RealtimeServerEvent` object like `.recv()` does,
+        then you can call `.parse_event(data)`.
+        """
+        message = self._connection.recv(decode=False)
+        log.debug(f"Received websocket message: %s", message)
+        if not isinstance(message, bytes):
+            # passing `decode=False` should always result in us getting `bytes` back
+            raise TypeError(f"Expected `.recv(decode=False)` to return `bytes` but got {type(message)}")
+
+        return message
+
+    def send(self, event: RealtimeClientEvent | RealtimeClientEventParam) -> None:
+        data = (
+            event.to_json(use_api_names=True, exclude_defaults=True, exclude_unset=True)
+            if isinstance(event, BaseModel)
+            else json.dumps(maybe_transform(event, RealtimeClientEventParam))
+        )
+        self._connection.send(data)
+
+    def close(self, *, code: int = 1000, reason: str = "") -> None:
+        self._connection.close(code=code, reason=reason)
+
+    def parse_event(self, data: str | bytes) -> RealtimeServerEvent:
+        """
+        Converts a raw `str` or `bytes` message into a `RealtimeServerEvent` object.
+
+        This is helpful if you're using `.recv_bytes()`.
+        """
+        return cast(
+            RealtimeServerEvent, construct_type_unchecked(value=json.loads(data), type_=cast(Any, RealtimeServerEvent))
+        )
+
+
+class RealtimeConnectionManager:
+    """
+    Context manager over a `RealtimeConnection` that is returned by `beta.realtime.connect()`
+
+    This context manager ensures that the connection will be closed when it exits.
+
+    ---
+
+    Note that if your application doesn't work well with the context manager approach then you
+    can call the `.enter()` method directly to initiate a connection.
+
+    **Warning**: You must remember to close the connection with `.close()`.
+
+    ```py
+    connection = client.beta.realtime.connect(...).enter()
+    # ...
+    connection.close()
+    ```
+    """
+
+    def __init__(
+        self,
+        *,
+        client: OpenAI,
+        model: str,
+        extra_query: Query,
+        extra_headers: Headers,
+        websocket_connection_options: WebsocketConnectionOptions,
+    ) -> None:
+        self.__client = client
+        self.__model = model
+        self.__connection: RealtimeConnection | None = None
+        self.__extra_query = extra_query
+        self.__extra_headers = extra_headers
+        self.__websocket_connection_options = websocket_connection_options
+
+    def __enter__(self) -> RealtimeConnection:
+        """
+        👋 If your application doesn't work well with the context manager approach then you
+        can call this method directly to initiate a connection.
+
+        **Warning**: You must remember to close the connection with `.close()`.
+
+        ```py
+        connection = client.beta.realtime.connect(...).enter()
+        # ...
+        connection.close()
+        ```
+        """
+        try:
+            from websockets.sync.client import connect
+        except ImportError as exc:
+            raise OpenAIError("You need to install `openai[realtime]` to use this method") from exc
+
+        url = self._prepare_url().copy_with(
+            params={
+                **self.__client.base_url.params,
+                "model": self.__model,
+                **self.__extra_query,
+            },
+        )
+        log.debug("Connecting to %s", url)
+        if self.__websocket_connection_options:
+            log.debug("Connection options: %s", self.__websocket_connection_options)
+
+        self.__connection = RealtimeConnection(
+            connect(
+                str(url),
+                user_agent_header=self.__client.user_agent,
+                additional_headers=_merge_mappings(
+                    {
+                        **self.__client.auth_headers,
+                        "OpenAI-Beta": "realtime=v1",
+                    },
+                    self.__extra_headers,
+                ),
+                **self.__websocket_connection_options,
+            )
+        )
+
+        return self.__connection
+
+    enter = __enter__
+
+    def _prepare_url(https://github.com/self) -> httpx.URL:
+        if self.__client.websocket_base_url is not None:
+            base_url = httpx.URL(self.__client.websocket_base_url)
+        else:
+            base_url = self.__client._base_url.copy_with(scheme="wss")
+
+        merge_raw_path = base_url.raw_path.rstrip(b"/") + b"/realtime"
+        return base_url.copy_with(raw_path=merge_raw_path)
+
+    def __exit__(
+        self, exc_type: type[BaseException] | None, exc: BaseException | None, exc_tb: TracebackType | None
+    ) -> None:
+        if self.__connection is not None:
+            self.__connection.close()
+
+
+class BaseRealtimeConnectionResource:
+    def __init__(self, connection: RealtimeConnection) -> None:
+        self._connection = connection
+
+
+class RealtimeSessionResource(BaseRealtimeConnectionResource):
+    def update(self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event to update the session’s default configuration.
+        The client may send this event at any time to update any field,
+        except for `voice`. However, note that once a session has been
+        initialized with a particular `model`, it can’t be changed to
+        another model using `session.update`.
+
+        When the server receives a `session.update`, it will respond
+        with a `session.updated` event showing the full, effective configuration.
+        Only the fields that are present are updated. To clear a field like
+        `instructions`, pass an empty string.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class RealtimeResponseResource(BaseRealtimeConnectionResource):
+    def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to cancel an in-progress response.
+
+        The server will respond
+        with a `response.cancelled` event or an error if there is no response to
+        cancel.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+            )
+        )
+
+    def create(
+        self,
+        *,
+        event_id: str | NotGiven = NOT_GIVEN,
+        response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This event instructs the server to create a Response, which means triggering
+        model inference. When in Server VAD mode, the server will create Responses
+        automatically.
+
+        A Response will include at least one Item, and may have two, in which case
+        the second will be a function call. These Items will be appended to the
+        conversation history.
+
+        The server will respond with a `response.created` event, events for Items
+        and content created, and finally a `response.done` event to indicate the
+        Response is complete.
+
+        The `response.create` event includes inference configuration like
+        `instructions`, and `temperature`. These fields will override the Session's
+        configuration for this Response only.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+            )
+        )
+
+
+class RealtimeConversationResource(BaseRealtimeConnectionResource):
+    @cached_property
+    def item(self) -> RealtimeConversationItemResource:
+        return RealtimeConversationItemResource(self._connection)
+
+
+class RealtimeConversationItemResource(BaseRealtimeConnectionResource):
+    def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event when you want to remove any item from the conversation
+        history.
+
+        The server will respond with a `conversation.item.deleted` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+    def create(
+        self,
+        *,
+        item: ConversationItemParam,
+        event_id: str | NotGiven = NOT_GIVEN,
+        previous_item_id: str | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Add a new Item to the Conversation's context, including messages, function
+        calls, and function call responses. This event can be used both to populate a
+        "history" of the conversation and to add new items mid-stream, but has the
+        current limitation that it cannot populate assistant audio messages.
+
+        If successful, the server will respond with a `conversation.item.created`
+        event, otherwise an `error` event will be sent.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.create",
+                        "item": item,
+                        "event_id": event_id,
+                        "previous_item_id": previous_item_id,
+                    }
+                ),
+            )
+        )
+
+    def truncate(
+        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to truncate a previous assistant message’s audio.
+
+        The server
+        will produce audio faster than realtime, so this event is useful when the user
+        interrupts to truncate audio that has already been sent to the client but not
+        yet played. This will synchronize the server's understanding of the audio with
+        the client's playback.
+
+        Truncating audio will delete the server-side text transcript to ensure there
+        is not text in the context that hasn't been heard by the user.
+
+        If successful, the server will respond with a `conversation.item.truncated`
+        event.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.truncate",
+                        "audio_end_ms": audio_end_ms,
+                        "content_index": content_index,
+                        "item_id": item_id,
+                        "event_id": event_id,
+                    }
+                ),
+            )
+        )
+
+
+class RealtimeInputAudioBufferResource(BaseRealtimeConnectionResource):
+    def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to clear the audio bytes in the buffer.
+
+        The server will
+        respond with an `input_audio_buffer.cleared` event.
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+        )
+
+    def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event to commit the user input audio buffer, which will create a
+        new user message item in the conversation. This event will produce an error
+        if the input audio buffer is empty. When in Server VAD mode, the client does
+        not need to send this event, the server will commit the audio buffer
+        automatically.
+
+        Committing the input audio buffer will trigger input audio transcription
+        (if enabled in session configuration), but it will not create a response
+        from the model. The server will respond with an `input_audio_buffer.committed`
+        event.
+        """
+        self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+        )
+
+    def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to append audio bytes to the input audio buffer.
+
+        The audio
+        buffer is temporary storage you can write to and later commit. In Server VAD
+        mode, the audio buffer is used to detect speech and the server will decide
+        when to commit. When Server VAD is disabled, you must commit the audio buffer
+        manually.
+
+        The client may choose how much audio to place in each event up to a maximum
+        of 15 MiB, for example streaming smaller chunks from the client may allow the
+        VAD to be more responsive. Unlike made other client events, the server will
+        not send a confirmation response to this event.
+        """
+        self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+            )
+        )
+
+
+class BaseAsyncRealtimeConnectionResource:
+    def __init__(self, connection: AsyncRealtimeConnection) -> None:
+        self._connection = connection
+
+
+class AsyncRealtimeSessionResource(BaseAsyncRealtimeConnectionResource):
+    async def update(
+        self, *, session: session_update_event_param.Session, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """
+        Send this event to update the session’s default configuration.
+        The client may send this event at any time to update any field,
+        except for `voice`. However, note that once a session has been
+        initialized with a particular `model`, it can’t be changed to
+        another model using `session.update`.
+
+        When the server receives a `session.update`, it will respond
+        with a `session.updated` event showing the full, effective configuration.
+        Only the fields that are present are updated. To clear a field like
+        `instructions`, pass an empty string.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "session.update", "session": session, "event_id": event_id}),
+            )
+        )
+
+
+class AsyncRealtimeResponseResource(BaseAsyncRealtimeConnectionResource):
+    async def cancel(self, *, event_id: str | NotGiven = NOT_GIVEN, response_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to cancel an in-progress response.
+
+        The server will respond
+        with a `response.cancelled` event or an error if there is no response to
+        cancel.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.cancel", "event_id": event_id, "response_id": response_id}),
+            )
+        )
+
+    async def create(
+        self,
+        *,
+        event_id: str | NotGiven = NOT_GIVEN,
+        response: response_create_event_param.Response | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        This event instructs the server to create a Response, which means triggering
+        model inference. When in Server VAD mode, the server will create Responses
+        automatically.
+
+        A Response will include at least one Item, and may have two, in which case
+        the second will be a function call. These Items will be appended to the
+        conversation history.
+
+        The server will respond with a `response.created` event, events for Items
+        and content created, and finally a `response.done` event to indicate the
+        Response is complete.
+
+        The `response.create` event includes inference configuration like
+        `instructions`, and `temperature`. These fields will override the Session's
+        configuration for this Response only.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "response.create", "event_id": event_id, "response": response}),
+            )
+        )
+
+
+class AsyncRealtimeConversationResource(BaseAsyncRealtimeConnectionResource):
+    @cached_property
+    def item(self) -> AsyncRealtimeConversationItemResource:
+        return AsyncRealtimeConversationItemResource(self._connection)
+
+
+class AsyncRealtimeConversationItemResource(BaseAsyncRealtimeConnectionResource):
+    async def delete(self, *, item_id: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event when you want to remove any item from the conversation
+        history.
+
+        The server will respond with a `conversation.item.deleted` event,
+        unless the item does not exist in the conversation history, in which case the
+        server will respond with an error.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "conversation.item.delete", "item_id": item_id, "event_id": event_id}),
+            )
+        )
+
+    async def create(
+        self,
+        *,
+        item: ConversationItemParam,
+        event_id: str | NotGiven = NOT_GIVEN,
+        previous_item_id: str | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Add a new Item to the Conversation's context, including messages, function
+        calls, and function call responses. This event can be used both to populate a
+        "history" of the conversation and to add new items mid-stream, but has the
+        current limitation that it cannot populate assistant audio messages.
+
+        If successful, the server will respond with a `conversation.item.created`
+        event, otherwise an `error` event will be sent.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.create",
+                        "item": item,
+                        "event_id": event_id,
+                        "previous_item_id": previous_item_id,
+                    }
+                ),
+            )
+        )
+
+    async def truncate(
+        self, *, audio_end_ms: int, content_index: int, item_id: str, event_id: str | NotGiven = NOT_GIVEN
+    ) -> None:
+        """Send this event to truncate a previous assistant message’s audio.
+
+        The server
+        will produce audio faster than realtime, so this event is useful when the user
+        interrupts to truncate audio that has already been sent to the client but not
+        yet played. This will synchronize the server's understanding of the audio with
+        the client's playback.
+
+        Truncating audio will delete the server-side text transcript to ensure there
+        is not text in the context that hasn't been heard by the user.
+
+        If successful, the server will respond with a `conversation.item.truncated`
+        event.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given(
+                    {
+                        "type": "conversation.item.truncate",
+                        "audio_end_ms": audio_end_ms,
+                        "content_index": content_index,
+                        "item_id": item_id,
+                        "event_id": event_id,
+                    }
+                ),
+            )
+        )
+
+
+class AsyncRealtimeInputAudioBufferResource(BaseAsyncRealtimeConnectionResource):
+    async def clear(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to clear the audio bytes in the buffer.
+
+        The server will
+        respond with an `input_audio_buffer.cleared` event.
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.clear", "event_id": event_id}))
+        )
+
+    async def commit(self, *, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """
+        Send this event to commit the user input audio buffer, which will create a
+        new user message item in the conversation. This event will produce an error
+        if the input audio buffer is empty. When in Server VAD mode, the client does
+        not need to send this event, the server will commit the audio buffer
+        automatically.
+
+        Committing the input audio buffer will trigger input audio transcription
+        (if enabled in session configuration), but it will not create a response
+        from the model. The server will respond with an `input_audio_buffer.committed`
+        event.
+        """
+        await self._connection.send(
+            cast(RealtimeClientEventParam, strip_not_given({"type": "input_audio_buffer.commit", "event_id": event_id}))
+        )
+
+    async def append(self, *, audio: str, event_id: str | NotGiven = NOT_GIVEN) -> None:
+        """Send this event to append audio bytes to the input audio buffer.
+
+        The audio
+        buffer is temporary storage you can write to and later commit. In Server VAD
+        mode, the audio buffer is used to detect speech and the server will decide
+        when to commit. When Server VAD is disabled, you must commit the audio buffer
+        manually.
+
+        The client may choose how much audio to place in each event up to a maximum
+        of 15 MiB, for example streaming smaller chunks from the client may allow the
+        VAD to be more responsive. Unlike made other client events, the server will
+        not send a confirmation response to this event.
+        """
+        await self._connection.send(
+            cast(
+                RealtimeClientEventParam,
+                strip_not_given({"type": "input_audio_buffer.append", "audio": audio, "event_id": event_id}),
+            )
+        )
diff --git a/src/openai/resources/beta/realtime/sessions.py b/src/openai/resources/beta/realtime/sessions.py
new file mode 100644
index 0000000000..4b337b7c19
--- /dev/null
+++ b/src/openai/resources/beta/realtime/sessions.py
@@ -0,0 +1,351 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._base_client import make_request_options
+from ....types.beta.realtime import session_create_params
+from ....types.beta.realtime.session_create_response import SessionCreateResponse
+
+__all__ = ["Sessions", "AsyncSessions"]
+
+
+class Sessions(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> SessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return SessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> SessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return SessionsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+        | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through
+              [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+              and should be treated as rough guidance rather than the representation
+              understood by the model. The client can optionally set the language and prompt
+              for transcription, these fields will be passed to the Whisper API.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          model: The Realtime model used for this session.
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          turn_detection: Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+              means that the model will detect the start and end of speech based on audio
+              volume and respond at the end of user speech.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            "/realtime/sessions",
+            body=maybe_transform(
+                {
+                    "input_audio_format": input_audio_format,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "model": model,
+                    "output_audio_format": output_audio_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class AsyncSessions(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncSessionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncSessionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncSessionsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        model: Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+        | NotGiven = NOT_GIVEN,
+        output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        temperature: float | NotGiven = NOT_GIVEN,
+        tool_choice: str | NotGiven = NOT_GIVEN,
+        tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
+        turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
+        voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SessionCreateResponse:
+        """
+        Create an ephemeral API token for use in client-side applications with the
+        Realtime API. Can be configured with the same session parameters as the
+        `session.update` client event.
+
+        It responds with a session object, plus a `client_secret` key which contains a
+        usable ephemeral API token that can be used to authenticate browser clients for
+        the Realtime API.
+
+        Args:
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
+
+          input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
+              `null` to turn off once on. Input audio transcription is not native to the
+              model, since the model consumes audio directly. Transcription runs
+              asynchronously through
+              [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+              and should be treated as rough guidance rather than the representation
+              understood by the model. The client can optionally set the language and prompt
+              for transcription, these fields will be passed to the Whisper API.
+
+          instructions: The default system instructions (i.e. system message) prepended to model calls.
+              This field allows the client to guide the model on desired responses. The model
+              can be instructed on response content and format, (e.g. "be extremely succinct",
+              "act friendly", "here are examples of good responses") and on audio behavior
+              (e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
+              instructions are not guaranteed to be followed by the model, but they provide
+              guidance to the model on the desired behavior.
+
+              Note that the server sets default instructions which will be used if this field
+              is not set and are visible in the `session.created` event at the start of the
+              session.
+
+          max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
+              tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+              `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+
+          modalities: The set of modalities the model can respond with. To disable audio, set this to
+              ["text"].
+
+          model: The Realtime model used for this session.
+
+          output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
+
+          temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
+
+          tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
+              a function.
+
+          tools: Tools (functions) available to the model.
+
+          turn_detection: Configuration for turn detection. Can be set to `null` to turn off. Server VAD
+              means that the model will detect the start and end of speech based on audio
+              volume and respond at the end of user speech.
+
+          voice: The voice the model uses to respond. Voice cannot be changed during the session
+              once the model has responded with audio at least once. Current voice options are
+              `alloy`, `ash`, `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            "/realtime/sessions",
+            body=await async_maybe_transform(
+                {
+                    "input_audio_format": input_audio_format,
+                    "input_audio_transcription": input_audio_transcription,
+                    "instructions": instructions,
+                    "max_response_output_tokens": max_response_output_tokens,
+                    "modalities": modalities,
+                    "model": model,
+                    "output_audio_format": output_audio_format,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "turn_detection": turn_detection,
+                    "voice": voice,
+                },
+                session_create_params.SessionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=SessionCreateResponse,
+        )
+
+
+class SessionsWithRawResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithRawResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            sessions.create,
+        )
+
+
+class SessionsWithStreamingResponse:
+    def __init__(self, sessions: Sessions) -> None:
+        self._sessions = sessions
+
+        self.create = to_streamed_response_wrapper(
+            sessions.create,
+        )
+
+
+class AsyncSessionsWithStreamingResponse:
+    def __init__(self, sessions: AsyncSessions) -> None:
+        self._sessions = sessions
+
+        self.create = async_to_streamed_response_wrapper(
+            sessions.create,
+        )
diff --git a/src/openai/resources/beta/threads/messages.py b/src/openai/resources/beta/threads/messages.py
index f0832515ce..403f95443f 100644
--- a/src/openai/resources/beta/threads/messages.py
+++ b/src/openai/resources/beta/threads/messages.py
@@ -17,12 +17,10 @@
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ...._base_client import AsyncPaginator, make_request_options
 from ....types.beta.threads import message_list_params, message_create_params, message_update_params
 from ....types.beta.threads.message import Message
+from ....types.shared_params.metadata import Metadata
 from ....types.beta.threads.message_deleted import MessageDeleted
 from ....types.beta.threads.message_content_part_param import MessageContentPartParam
 
@@ -32,10 +30,21 @@
 class Messages(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return MessagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return MessagesWithStreamingResponse(self)
 
     def create(
@@ -45,7 +54,7 @@ def create(
         content: Union[str, Iterable[MessageContentPartParam]],
         role: Literal["user", "assistant"],
         attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -70,9 +79,11 @@ def create(
           attachments: A list of files attached to the message, and the tools they should be added to.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -144,7 +155,7 @@ def update(
         message_id: str,
         *,
         thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -157,9 +168,11 @@ def update(
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -210,8 +223,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -295,10 +308,21 @@ def delete(
 class AsyncMessages(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncMessagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncMessagesWithStreamingResponse(self)
 
     async def create(
@@ -308,7 +332,7 @@ async def create(
         content: Union[str, Iterable[MessageContentPartParam]],
         role: Literal["user", "assistant"],
         attachments: Optional[Iterable[message_create_params.Attachment]] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -333,9 +357,11 @@ async def create(
           attachments: A list of files attached to the message, and the tools they should be added to.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -407,7 +433,7 @@ async def update(
         message_id: str,
         *,
         thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -420,9 +446,11 @@ async def update(
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -473,8 +501,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
diff --git a/src/openai/resources/beta/threads/runs/runs.py b/src/openai/resources/beta/threads/runs/runs.py
index c37071529c..8f6eed0cad 100644
--- a/src/openai/resources/beta/threads/runs/runs.py
+++ b/src/openai/resources/beta/threads/runs/runs.py
@@ -2,10 +2,8 @@
 
 from __future__ import annotations
 
-import typing_extensions
-from typing import Union, Iterable, Optional, overload
-from functools import partial
-from typing_extensions import Literal
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
 
 import httpx
 
@@ -20,7 +18,6 @@
 )
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
 from ....._utils import (
-    is_given,
     required_args,
     maybe_transform,
     async_maybe_transform,
@@ -30,18 +27,7 @@
 from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....._streaming import Stream, AsyncStream
 from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from .....lib.streaming import (
-    AssistantEventHandler,
-    AssistantEventHandlerT,
-    AssistantStreamManager,
-    AsyncAssistantEventHandler,
-    AsyncAssistantEventHandlerT,
-    AsyncAssistantStreamManager,
-)
+from ....._base_client import AsyncPaginator, make_request_options
 from .....types.beta.threads import (
     run_list_params,
     run_create_params,
@@ -49,8 +35,12 @@
     run_submit_tool_outputs_params,
 )
 from .....types.beta.threads.run import Run
+from .....types.shared.chat_model import ChatModel
+from .....types.shared_params.metadata import Metadata
+from .....types.shared.reasoning_effort import ReasoningEffort
 from .....types.beta.assistant_tool_param import AssistantToolParam
 from .....types.beta.assistant_stream_event import AssistantStreamEvent
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
 from .....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
 from .....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
@@ -64,10 +54,21 @@ def steps(self) -> Steps:
 
     @cached_property
     def with_raw_response(self) -> RunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return RunsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> RunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return RunsWithStreamingResponse(self)
 
     @overload
@@ -76,39 +77,16 @@ def create(
         thread_id: str,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -131,6 +109,14 @@ def create(
               [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
               execute this run.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -154,21 +140,39 @@ def create(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -224,39 +228,16 @@ def create(
         *,
         assistant_id: str,
         stream: Literal[True],
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
@@ -282,6 +263,14 @@ def create(
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -305,21 +294,39 @@ def create(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -371,39 +378,16 @@ def create(
         *,
         assistant_id: str,
         stream: bool,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
@@ -429,6 +413,14 @@ def create(
               events, terminating when the Run enters a terminal state with a `data: [DONE]`
               message.
 
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           additional_instructions: Appends additional instructions at the end of the instructions for the run. This
               is useful for modifying the behavior on a per-run basis without overriding other
               instructions.
@@ -452,21 +444,39 @@ def create(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -517,39 +527,16 @@ def create(
         thread_id: str,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -579,6 +566,8 @@ def create(
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
@@ -590,7 +579,11 @@ def create(
                 run_create_params.RunCreateParams,
             ),
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, run_create_params.RunCreateParams),
             ),
             cast_to=Run,
             stream=stream or False,
@@ -639,7 +632,7 @@ def update(
         run_id: str,
         *,
         thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -652,9 +645,11 @@ def update(
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           extra_headers: Send extra headers
 
@@ -704,8 +699,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -782,51 +777,14 @@ def cancel(
             cast_to=Run,
         )
 
-    def create_and_poll(
+    @overload
+    def submit_tool_outputs(
         self,
+        run_id: str,
         *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
         thread_id: str,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -835,1489 +793,306 @@ def create_and_poll(
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> Run:
         """
-        A helper to create a run an poll for a terminal state. More information on Run
-        lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        run = self.create(
-            thread_id=thread_id,
-            assistant_id=assistant_id,
-            additional_instructions=additional_instructions,
-            additional_messages=additional_messages,
-            instructions=instructions,
-            max_completion_tokens=max_completion_tokens,
-            max_prompt_tokens=max_prompt_tokens,
-            metadata=metadata,
-            model=model,
-            response_format=response_format,
-            temperature=temperature,
-            tool_choice=tool_choice,
-            # We assume we are not streaming when polling
-            stream=False,
-            tools=tools,
-            truncation_strategy=truncation_strategy,
-            top_p=top_p,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return self.poll(
-            run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            poll_interval_ms=poll_interval_ms,
-            timeout=timeout,
-        )
+        ...
 
     @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
+    def submit_tool_outputs(
         self,
+        run_id: str,
         *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
+        stream: Literal[True],
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
-        """Create a Run stream"""
+    ) -> Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
         ...
 
     @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
+    def submit_tool_outputs(
         self,
+        run_id: str,
         *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
-        event_handler: AssistantEventHandlerT,
+        stream: bool,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
+    ) -> Run | Stream[AssistantStreamEvent]:
+        """
+        When a run has the `status: "requires_action"` and `required_action.type` is
+        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
+        tool calls once they're all completed. All outputs must be submitted in a single
+        request.
+
+        Args:
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          tool_outputs: A list of tools for which the outputs are being submitted.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
         ...
 
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
+    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
+    def submit_tool_outputs(
         self,
+        run_id: str,
         *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
-        event_handler: AssistantEventHandlerT | None = None,
+        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
+    ) -> Run | Stream[AssistantStreamEvent]:
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        make_request = partial(
-            self._post,
-            f"/threads/{thread_id}/runs",
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
             body=maybe_transform(
                 {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "additional_messages": additional_messages,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "stream": True,
-                    "tools": tools,
-                    "truncation_strategy": truncation_strategy,
-                    "top_p": top_p,
+                    "tool_outputs": tool_outputs,
+                    "stream": stream,
                 },
-                run_create_params.RunCreateParams,
+                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
             ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
-            stream=True,
+            stream=stream or False,
             stream_cls=Stream[AssistantStreamEvent],
         )
-        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
 
-    def poll(
-        self,
-        run_id: str,
-        thread_id: str,
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to poll a run status until it reaches a terminal state. More
-        information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
 
-        if is_given(poll_interval_ms):
-            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+class AsyncRuns(AsyncAPIResource):
+    @cached_property
+    def steps(self) -> AsyncSteps:
+        return AsyncSteps(self._client)
 
-        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
-        while True:
-            response = self.with_raw_response.retrieve(
-                thread_id=thread_id,
-                run_id=run_id,
-                extra_headers=extra_headers,
-                extra_body=extra_body,
-                extra_query=extra_query,
-                timeout=timeout,
-            )
+    @cached_property
+    def with_raw_response(self) -> AsyncRunsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
 
-            run = response.parse()
-            # Return if we reached a terminal state
-            if run.status in terminal_states:
-                return run
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncRunsWithRawResponse(self)
 
-            if not is_given(poll_interval_ms):
-                from_header = response.headers.get("openai-poll-after-ms")
-                if from_header is not None:
-                    poll_interval_ms = int(from_header)
-                else:
-                    poll_interval_ms = 1000
+    @cached_property
+    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
 
-            self._sleep(poll_interval_ms / 1000)
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncRunsWithStreamingResponse(self)
 
     @overload
-    def stream(
+    async def create(
         self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
-        """Create a Run stream"""
-        ...
-
-    @overload
-    def stream(
-        self,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
-        ...
+    ) -> Run:
+        """
+        Create a run.
 
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        make_request = partial(
-            self._post,
-            f"/threads/{thread_id}/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "additional_messages": additional_messages,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "stream": True,
-                    "tools": tools,
-                    "truncation_strategy": truncation_strategy,
-                    "top_p": top_p,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
-
-    @overload
-    def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        stream: Literal[True],
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[AssistantStreamEvent]:
-        """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        stream: bool,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | Stream[AssistantStreamEvent]:
-        """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
-    def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | Stream[AssistantStreamEvent]:
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
-            body=maybe_transform(
-                {
-                    "tool_outputs": tool_outputs,
-                    "stream": stream,
-                },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=stream or False,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-
-    def submit_tool_outputs_and_poll(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to submit a tool output to a run and poll for a terminal run state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = self.submit_tool_outputs(
-            run_id=run_id,
-            thread_id=thread_id,
-            tool_outputs=tool_outputs,
-            stream=False,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return self.poll(
-            run_id=run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-            poll_interval_ms=poll_interval_ms,
-        )
-
-    @overload
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        ...
-
-    @overload
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        ...
-
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = partial(
-            self._post,
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
-            body=maybe_transform(
-                {
-                    "tool_outputs": tool_outputs,
-                    "stream": True,
-                },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-        return AssistantStreamManager(request, event_handler=event_handler or AssistantEventHandler())
-
-
-class AsyncRuns(AsyncAPIResource):
-    @cached_property
-    def steps(self) -> AsyncSteps:
-        return AsyncSteps(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncRunsWithRawResponse:
-        return AsyncRunsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
-        return AsyncRunsWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Create a run.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
-              is useful for modifying the behavior on a per-run basis without overriding other
-              instructions.
-
-          additional_messages: Adds additional messages to the thread before creating the run.
-
-          instructions: Overrides the
-              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-              of the assistant. This is useful for modifying the behavior on a per-run basis.
-
-          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
-              run. The run will make a best effort to use only the number of completion tokens
-              specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `incomplete`. See
-              `incomplete_details` for more info.
-
-          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
-              The run will make a best effort to use only the number of prompt tokens
-              specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `incomplete`. See
-              `incomplete_details` for more info.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
-              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tools and instead generates a message. `auto` is the default value
-              and means the model can pick between generating a message or calling one or more
-              tools. `required` means the model must call one or more tools before responding
-              to the user. Specifying a particular tool like `{"type": "file_search"}` or
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or temperature but not both.
-
-          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        stream: Literal[True],
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[AssistantStreamEvent]:
-        """
-        Create a run.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
-              is useful for modifying the behavior on a per-run basis without overriding other
-              instructions.
-
-          additional_messages: Adds additional messages to the thread before creating the run.
-
-          instructions: Overrides the
-              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-              of the assistant. This is useful for modifying the behavior on a per-run basis.
-
-          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
-              run. The run will make a best effort to use only the number of completion tokens
-              specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `incomplete`. See
-              `incomplete_details` for more info.
-
-          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
-              The run will make a best effort to use only the number of prompt tokens
-              specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `incomplete`. See
-              `incomplete_details` for more info.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
-              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tools and instead generates a message. `auto` is the default value
-              and means the model can pick between generating a message or calling one or more
-              tools. `required` means the model must call one or more tools before responding
-              to the user. Specifying a particular tool like `{"type": "file_search"}` or
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or temperature but not both.
-
-          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        stream: bool,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | AsyncStream[AssistantStreamEvent]:
-        """
-        Create a run.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
-              is useful for modifying the behavior on a per-run basis without overriding other
-              instructions.
-
-          additional_messages: Adds additional messages to the thread before creating the run.
-
-          instructions: Overrides the
-              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-              of the assistant. This is useful for modifying the behavior on a per-run basis.
-
-          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
-              run. The run will make a best effort to use only the number of completion tokens
-              specified, across multiple turns of the run. If the run exceeds the number of
-              completion tokens specified, the run will end with status `incomplete`. See
-              `incomplete_details` for more info.
-
-          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
-              The run will make a best effort to use only the number of prompt tokens
-              specified, across multiple turns of the run. If the run exceeds the number of
-              prompt tokens specified, the run will end with status `incomplete`. See
-              `incomplete_details` for more info.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
-              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tools and instead generates a message. `auto` is the default value
-              and means the model can pick between generating a message or calling one or more
-              tools. `required` means the model must call one or more tools before responding
-              to the user. Specifying a particular tool like `{"type": "file_search"}` or
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or temperature but not both.
-
-          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
-              control the intial context window of the run.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["assistant_id"], ["assistant_id", "stream"])
-    async def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | AsyncStream[AssistantStreamEvent]:
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/runs",
-            body=await async_maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "additional_messages": additional_messages,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "stream": stream,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_p": top_p,
-                    "truncation_strategy": truncation_strategy,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=stream or False,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
 
-    async def retrieve(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Retrieves a run.
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
 
-        Args:
-          extra_headers: Send extra headers
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
 
-          extra_query: Add additional query parameters to the request
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
 
-          extra_body: Add additional JSON properties to the request
+          additional_messages: Adds additional messages to the thread before creating the run.
 
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
-        return await self._get(
-            f"/threads/{thread_id}/runs/{run_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
 
-    async def update(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Modifies a run.
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
 
-        Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
 
-          extra_headers: Send extra headers
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
-          extra_query: Add additional query parameters to the request
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
 
-          extra_body: Add additional JSON properties to the request
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
 
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/runs/{run_id}",
-            body=await async_maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
+          reasoning_effort: **o-series models only**
 
-    def list(
-        self,
-        thread_id: str,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
-        """
-        Returns a list of runs belonging to a thread.
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
 
-        Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
 
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
 
-          extra_headers: Send extra headers
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
 
-          extra_query: Add additional query parameters to the request
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
 
-          extra_body: Add additional JSON properties to the request
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
 
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/runs",
-            page=AsyncCursorPage[Run],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    run_list_params.RunListParams,
-                ),
-            ),
-            model=Run,
-        )
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
 
-    async def cancel(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Cancels a run that is `in_progress`.
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
 
-        Args:
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -2326,291 +1101,344 @@ async def cancel(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/runs/{run_id}/cancel",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
-
-    async def create_and_poll(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to create a run an poll for a terminal state. More information on Run
-        lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = await self.create(
-            thread_id=thread_id,
-            assistant_id=assistant_id,
-            additional_instructions=additional_instructions,
-            additional_messages=additional_messages,
-            instructions=instructions,
-            max_completion_tokens=max_completion_tokens,
-            max_prompt_tokens=max_prompt_tokens,
-            metadata=metadata,
-            model=model,
-            response_format=response_format,
-            temperature=temperature,
-            tool_choice=tool_choice,
-            # We assume we are not streaming when polling
-            stream=False,
-            tools=tools,
-            truncation_strategy=truncation_strategy,
-            top_p=top_p,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return await self.poll(
-            run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            poll_interval_ms=poll_interval_ms,
-            timeout=timeout,
-        )
+        ...
 
     @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
+    async def create(
         self,
+        thread_id: str,
         *,
         assistant_id: str,
+        stream: Literal[True],
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """Create a Run stream"""
+    ) -> AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
         ...
 
     @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
+    async def create(
         self,
+        thread_id: str,
         *,
         assistant_id: str,
+        stream: bool,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """Create a Run stream"""
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
+        """
+        Create a run.
+
+        Args:
+          assistant_id: The ID of the
+              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
+              execute this run.
+
+          stream: If `true`, returns a stream of events that happen during the Run as server-sent
+              events, terminating when the Run enters a terminal state with a `data: [DONE]`
+              message.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
+          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
+              is useful for modifying the behavior on a per-run basis without overriding other
+              instructions.
+
+          additional_messages: Adds additional messages to the thread before creating the run.
+
+          instructions: Overrides the
+              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
+              of the assistant. This is useful for modifying the behavior on a per-run basis.
+
+          max_completion_tokens: The maximum number of completion tokens that may be used over the course of the
+              run. The run will make a best effort to use only the number of completion tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              completion tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          max_prompt_tokens: The maximum number of prompt tokens that may be used over the course of the run.
+              The run will make a best effort to use only the number of prompt tokens
+              specified, across multiple turns of the run. If the run exceeds the number of
+              prompt tokens specified, the run will end with status `incomplete`. See
+              `incomplete_details` for more info.
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
+              be used to execute this run. If a value is provided here, it will override the
+              model associated with the assistant. If not, the model associated with the
+              assistant will be used.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: Specifies the format that the model must output. Compatible with
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
+              and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
+              message the model generates is valid JSON.
+
+              **Important:** when using JSON mode, you **must** also instruct the model to
+              produce JSON yourself via a system or user message. Without this, the model may
+              generate an unending stream of whitespace until the generation reaches the token
+              limit, resulting in a long-running and seemingly "stuck" request. Also note that
+              the message content may be partially cut off if `finish_reason="length"`, which
+              indicates the generation exceeded `max_tokens` or the conversation exceeded the
+              max context length.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tools and instead generates a message. `auto` is the default value
+              and means the model can pick between generating a message or calling one or more
+              tools. `required` means the model must call one or more tools before responding
+              to the user. Specifying a particular tool like `{"type": "file_search"}` or
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+          tools: Override the tools the assistant can use for this run. This is useful for
+              modifying the behavior on a per-run basis.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or temperature but not both.
+
+          truncation_strategy: Controls for how a thread will be truncated prior to the run. Use this to
+              control the intial context window of the run.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
         ...
 
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
+    @required_args(["assistant_id"], ["assistant_id", "stream"])
+    async def create(
         self,
+        thread_id: str,
         *,
         assistant_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
         additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
         tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
         top_p: Optional[float] | NotGiven = NOT_GIVEN,
         truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """Create a Run stream"""
+    ) -> Run | AsyncStream[AssistantStreamEvent]:
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
             f"/threads/{thread_id}/runs",
-            body=maybe_transform(
+            body=await async_maybe_transform(
                 {
                     "assistant_id": assistant_id,
                     "additional_instructions": additional_instructions,
@@ -2620,276 +1448,216 @@ def create_and_stream(
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "reasoning_effort": reasoning_effort,
                     "response_format": response_format,
+                    "stream": stream,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
-                    "stream": True,
                     "tools": tools,
-                    "truncation_strategy": truncation_strategy,
                     "top_p": top_p,
+                    "truncation_strategy": truncation_strategy,
                 },
                 run_create_params.RunCreateParams,
             ),
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, run_create_params.RunCreateParams),
             ),
             cast_to=Run,
-            stream=True,
+            stream=stream or False,
             stream_cls=AsyncStream[AssistantStreamEvent],
         )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
 
-    async def poll(
+    async def retrieve(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Run:
+        """
+        Retrieves a run.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._get(
+            f"/threads/{thread_id}/runs/{run_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
+
+    async def update(
         self,
         run_id: str,
+        *,
         thread_id: str,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
     ) -> Run:
         """
-        A helper to poll a run status until it reaches a terminal state. More
-        information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
+        Modifies a run.
 
-        if is_given(poll_interval_ms):
-            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
 
-        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired", "incomplete"}
-        while True:
-            response = await self.with_raw_response.retrieve(
-                thread_id=thread_id,
-                run_id=run_id,
-                extra_headers=extra_headers,
-                extra_body=extra_body,
-                extra_query=extra_query,
-                timeout=timeout,
-            )
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
-            run = response.parse()
-            # Return if we reached a terminal state
-            if run.status in terminal_states:
-                return run
+          extra_headers: Send extra headers
 
-            if not is_given(poll_interval_ms):
-                from_header = response.headers.get("openai-poll-after-ms")
-                if from_header is not None:
-                    poll_interval_ms = int(from_header)
-                else:
-                    poll_interval_ms = 1000
+          extra_query: Add additional query parameters to the request
 
-            await self._sleep(poll_interval_ms / 1000)
+          extra_body: Add additional JSON properties to the request
 
-    @overload
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """Create a Run stream"""
-        ...
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}",
+            body=await async_maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Run,
+        )
 
-    @overload
-    def stream(
+    def list(
         self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """Create a Run stream"""
-        ...
+    ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
+        """
+        Returns a list of runs belonging to a thread.
+
+        Args:
+          after: A cursor for use in pagination. `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          before: A cursor for use in pagination. `before` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not thread_id:
+            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/threads/{thread_id}/runs",
+            page=AsyncCursorPage[Run],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    run_list_params.RunListParams,
+                ),
+            ),
+            model=Run,
+        )
 
-    def stream(
+    async def cancel(
         self,
+        run_id: str,
         *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        additional_messages: Optional[Iterable[run_create_params.AdditionalMessage]] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[run_create_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
         thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """Create a Run stream"""
+    ) -> Run:
+        """
+        Cancels a run that is `in_progress`.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
         if not thread_id:
             raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            f"/threads/{thread_id}/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "additional_messages": additional_messages,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "stream": True,
-                    "tools": tools,
-                    "truncation_strategy": truncation_strategy,
-                    "top_p": top_p,
-                },
-                run_create_params.RunCreateParams,
-            ),
+        if not run_id:
+            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/threads/{thread_id}/runs/{run_id}/cancel",
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=Run,
-            stream=True,
-            stream_cls=AsyncStream[AssistantStreamEvent],
         )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
 
     @overload
     async def submit_tool_outputs(
@@ -3042,140 +1810,6 @@ async def submit_tool_outputs(
             stream_cls=AsyncStream[AssistantStreamEvent],
         )
 
-    async def submit_tool_outputs_and_poll(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to submit a tool output to a run and poll for a terminal run state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = await self.submit_tool_outputs(
-            run_id=run_id,
-            thread_id=thread_id,
-            tool_outputs=tool_outputs,
-            stream=False,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return await self.poll(
-            run_id=run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-            poll_interval_ms=poll_interval_ms,
-        )
-
-    @overload
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        ...
-
-    @overload
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        ...
-
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
-            body=maybe_transform(
-                {
-                    "tool_outputs": tool_outputs,
-                    "stream": True,
-                },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
-
 
 class RunsWithRawResponse:
     def __init__(self, runs: Runs) -> None:
diff --git a/src/openai/resources/beta/threads/runs/steps.py b/src/openai/resources/beta/threads/runs/steps.py
index 512008939c..709c729d45 100644
--- a/src/openai/resources/beta/threads/runs/steps.py
+++ b/src/openai/resources/beta/threads/runs/steps.py
@@ -2,23 +2,25 @@
 
 from __future__ import annotations
 
+from typing import List
 from typing_extensions import Literal
 
 import httpx
 
 from ..... import _legacy_response
 from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
+from ....._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
 from ....._compat import cached_property
 from ....._resource import SyncAPIResource, AsyncAPIResource
 from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from .....types.beta.threads.runs import step_list_params
+from ....._base_client import AsyncPaginator, make_request_options
+from .....types.beta.threads.runs import step_list_params, step_retrieve_params
 from .....types.beta.threads.runs.run_step import RunStep
+from .....types.beta.threads.runs.run_step_include import RunStepInclude
 
 __all__ = ["Steps", "AsyncSteps"]
 
@@ -26,10 +28,21 @@
 class Steps(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> StepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return StepsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> StepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return StepsWithStreamingResponse(self)
 
     def retrieve(
@@ -38,6 +51,7 @@ def retrieve(
         *,
         thread_id: str,
         run_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -49,6 +63,14 @@ def retrieve(
         Retrieves a run step.
 
         Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -67,7 +89,11 @@ def retrieve(
         return self._get(
             f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
             ),
             cast_to=RunStep,
         )
@@ -79,6 +105,7 @@ def list(
         thread_id: str,
         after: str | NotGiven = NOT_GIVEN,
         before: str | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -99,8 +126,16 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -133,6 +168,7 @@ def list(
                     {
                         "after": after,
                         "before": before,
+                        "include": include,
                         "limit": limit,
                         "order": order,
                     },
@@ -146,10 +182,21 @@ def list(
 class AsyncSteps(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncStepsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncStepsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncStepsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncStepsWithStreamingResponse(self)
 
     async def retrieve(
@@ -158,6 +205,7 @@ async def retrieve(
         *,
         thread_id: str,
         run_id: str,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -169,6 +217,14 @@ async def retrieve(
         Retrieves a run step.
 
         Args:
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -187,7 +243,11 @@ async def retrieve(
         return await self._get(
             f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
             options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform({"include": include}, step_retrieve_params.StepRetrieveParams),
             ),
             cast_to=RunStep,
         )
@@ -199,6 +259,7 @@ def list(
         thread_id: str,
         after: str | NotGiven = NOT_GIVEN,
         before: str | NotGiven = NOT_GIVEN,
+        include: List[RunStepInclude] | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
         order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -219,8 +280,16 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
+
+          include: A list of additional fields to include in the response. Currently the only
+              supported value is `step_details.tool_calls[*].file_search.results[*].content`
+              to fetch the file search result content.
+
+              See the
+              [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+              for more information.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -253,6 +322,7 @@ def list(
                     {
                         "after": after,
                         "before": before,
+                        "include": include,
                         "limit": limit,
                         "order": order,
                     },
diff --git a/src/openai/resources/beta/threads/threads.py b/src/openai/resources/beta/threads/threads.py
index 36cdd03f91..299b23f375 100644
--- a/src/openai/resources/beta/threads/threads.py
+++ b/src/openai/resources/beta/threads/threads.py
@@ -2,21 +2,12 @@
 
 from __future__ import annotations
 
-from typing import Union, Iterable, Optional, overload
-from functools import partial
-from typing_extensions import Literal
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, overload
 
 import httpx
 
 from .... import _legacy_response
-from .runs import (
-    Runs,
-    AsyncRuns,
-    RunsWithRawResponse,
-    AsyncRunsWithRawResponse,
-    RunsWithStreamingResponse,
-    AsyncRunsWithStreamingResponse,
-)
 from .messages import (
     Messages,
     AsyncMessages,
@@ -31,7 +22,14 @@
     maybe_transform,
     async_maybe_transform,
 )
-from .runs.runs import Runs, AsyncRuns
+from .runs.runs import (
+    Runs,
+    AsyncRuns,
+    RunsWithRawResponse,
+    AsyncRunsWithRawResponse,
+    RunsWithStreamingResponse,
+    AsyncRunsWithStreamingResponse,
+)
 from ...._compat import cached_property
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
@@ -41,20 +39,12 @@
     thread_update_params,
     thread_create_and_run_params,
 )
-from ...._base_client import (
-    make_request_options,
-)
-from ....lib.streaming import (
-    AssistantEventHandler,
-    AssistantEventHandlerT,
-    AssistantStreamManager,
-    AsyncAssistantEventHandler,
-    AsyncAssistantEventHandlerT,
-    AsyncAssistantStreamManager,
-)
+from ...._base_client import make_request_options
 from ....types.beta.thread import Thread
 from ....types.beta.threads.run import Run
+from ....types.shared.chat_model import ChatModel
 from ....types.beta.thread_deleted import ThreadDeleted
+from ....types.shared_params.metadata import Metadata
 from ....types.beta.assistant_stream_event import AssistantStreamEvent
 from ....types.beta.assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
 from ....types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
@@ -73,17 +63,28 @@ def messages(self) -> Messages:
 
     @cached_property
     def with_raw_response(self) -> ThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ThreadsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ThreadsWithStreamingResponse(self)
 
     def create(
         self,
         *,
         messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -100,9 +101,11 @@ def create(
               start the thread with.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           tool_resources: A set of resources that are made available to the assistant's tools in this
               thread. The resources are specific to the type of tool. For example, the
@@ -172,7 +175,7 @@ def update(
         self,
         thread_id: str,
         *,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -186,9 +189,11 @@ def update(
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           tool_resources: A set of resources that are made available to the assistant's tools in this
               thread. The resources are specific to the type of tool. For example, the
@@ -263,34 +268,9 @@ def create_and_run(
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -331,21 +311,32 @@ def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -364,7 +355,8 @@ def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -410,34 +402,9 @@ def create_and_run(
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
@@ -481,21 +448,32 @@ def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -510,7 +488,8 @@ def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -556,34 +535,9 @@ def create_and_run(
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
@@ -627,21 +581,32 @@ def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -656,7 +621,8 @@ def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -701,34 +667,9 @@ def create_and_run(
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -756,6 +697,7 @@ def create_and_run(
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
@@ -776,284 +718,6 @@ def create_and_run(
             stream_cls=Stream[AssistantStreamEvent],
         )
 
-    def create_and_run_poll(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to create a thread, start a run and then poll for a terminal state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = self.create_and_run(
-            assistant_id=assistant_id,
-            instructions=instructions,
-            max_completion_tokens=max_completion_tokens,
-            max_prompt_tokens=max_prompt_tokens,
-            metadata=metadata,
-            model=model,
-            response_format=response_format,
-            temperature=temperature,
-            stream=False,
-            thread=thread,
-            tool_resources=tool_resources,
-            tool_choice=tool_choice,
-            truncation_strategy=truncation_strategy,
-            top_p=top_p,
-            tools=tools,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms)
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
-        """Create a thread and stream the run back"""
-        ...
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        event_handler: AssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a thread and stream the run back"""
-        ...
-
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        event_handler: AssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a thread and stream the run back"""
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        make_request = partial(
-            self._post,
-            "/threads/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "stream": True,
-                    "thread": thread,
-                    "tools": tools,
-                    "tool": tool_resources,
-                    "truncation_strategy": truncation_strategy,
-                    "top_p": top_p,
-                },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
-
 
 class AsyncThreads(AsyncAPIResource):
     @cached_property
@@ -1066,17 +730,28 @@ def messages(self) -> AsyncMessages:
 
     @cached_property
     def with_raw_response(self) -> AsyncThreadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncThreadsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncThreadsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_create_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1093,9 +768,11 @@ async def create(
               start the thread with.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           tool_resources: A set of resources that are made available to the assistant's tools in this
               thread. The resources are specific to the type of tool. For example, the
@@ -1165,7 +842,7 @@ async def update(
         self,
         thread_id: str,
         *,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         tool_resources: Optional[thread_update_params.ToolResources] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -1179,9 +856,11 @@ async def update(
 
         Args:
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           tool_resources: A set of resources that are made available to the assistant's tools in this
               thread. The resources are specific to the type of tool. For example, the
@@ -1256,34 +935,9 @@ async def create_and_run(
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1324,21 +978,32 @@ async def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1357,7 +1022,8 @@ async def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -1403,34 +1069,9 @@ async def create_and_run(
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
@@ -1474,21 +1115,32 @@ async def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1503,7 +1155,8 @@ async def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -1549,34 +1202,9 @@ async def create_and_run(
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
         thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
@@ -1620,21 +1248,32 @@ async def create_and_run(
               `incomplete_details` for more info.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
               be used to execute this run. If a value is provided here, it will override the
               model associated with the assistant. If not, the model associated with the
               assistant will be used.
 
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
           response_format: Specifies the format that the model must output. Compatible with
-              [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+              [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+              [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
               and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
               message the model generates is valid JSON.
 
               **Important:** when using JSON mode, you **must** also instruct the model to
@@ -1649,7 +1288,8 @@ async def create_and_run(
               make the output more random, while lower values like 0.2 will make it more
               focused and deterministic.
 
-          thread: If no thread is provided, an empty thread will be created.
+          thread: Options to create a new thread. If no thread is provided when running a request,
+              an empty thread will be created.
 
           tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
               not call any tools and instead generates a message. `auto` is the default value
@@ -1694,34 +1334,9 @@ async def create_and_run(
         instructions: Optional[str] | NotGiven = NOT_GIVEN,
         max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
         max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: Union[str, ChatModel, None] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
         response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
         stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
         temperature: Optional[float] | NotGiven = NOT_GIVEN,
@@ -1749,6 +1364,7 @@ async def create_and_run(
                     "max_prompt_tokens": max_prompt_tokens,
                     "metadata": metadata,
                     "model": model,
+                    "parallel_tool_calls": parallel_tool_calls,
                     "response_format": response_format,
                     "stream": stream,
                     "temperature": temperature,
@@ -1769,288 +1385,6 @@ async def create_and_run(
             stream_cls=AsyncStream[AssistantStreamEvent],
         )
 
-    async def create_and_run_poll(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to create a thread, start a run and then poll for a terminal state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = await self.create_and_run(
-            assistant_id=assistant_id,
-            instructions=instructions,
-            max_completion_tokens=max_completion_tokens,
-            max_prompt_tokens=max_prompt_tokens,
-            metadata=metadata,
-            model=model,
-            response_format=response_format,
-            temperature=temperature,
-            stream=False,
-            thread=thread,
-            tool_resources=tool_resources,
-            tool_choice=tool_choice,
-            truncation_strategy=truncation_strategy,
-            top_p=top_p,
-            tools=tools,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return await self.runs.poll(
-            run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
-        )
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """Create a thread and stream the run back"""
-        ...
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        event_handler: AsyncAssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """Create a thread and stream the run back"""
-        ...
-
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        max_prompt_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-            None,
-        ]
-        | NotGiven = NOT_GIVEN,
-        response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tool_choice: Optional[AssistantToolChoiceOptionParam] | NotGiven = NOT_GIVEN,
-        tool_resources: Optional[thread_create_and_run_params.ToolResources] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        truncation_strategy: Optional[thread_create_and_run_params.TruncationStrategy] | NotGiven = NOT_GIVEN,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """Create a thread and stream the run back"""
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v2",
-            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            "/threads/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "instructions": instructions,
-                    "max_completion_tokens": max_completion_tokens,
-                    "max_prompt_tokens": max_prompt_tokens,
-                    "metadata": metadata,
-                    "model": model,
-                    "response_format": response_format,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "stream": True,
-                    "thread": thread,
-                    "tools": tools,
-                    "tool": tool_resources,
-                    "truncation_strategy": truncation_strategy,
-                    "top_p": top_p,
-                },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
-
 
 class ThreadsWithRawResponse:
     def __init__(self, threads: Threads) -> None:
diff --git a/src/openai/resources/chat/chat.py b/src/openai/resources/chat/chat.py
index d14d055506..14f9224b41 100644
--- a/src/openai/resources/chat/chat.py
+++ b/src/openai/resources/chat/chat.py
@@ -4,7 +4,7 @@
 
 from ..._compat import cached_property
 from ..._resource import SyncAPIResource, AsyncAPIResource
-from .completions import (
+from .completions.completions import (
     Completions,
     AsyncCompletions,
     CompletionsWithRawResponse,
@@ -23,10 +23,21 @@ def completions(self) -> Completions:
 
     @cached_property
     def with_raw_response(self) -> ChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ChatWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ChatWithStreamingResponse(self)
 
 
@@ -37,10 +48,21 @@ def completions(self) -> AsyncCompletions:
 
     @cached_property
     def with_raw_response(self) -> AsyncChatWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncChatWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncChatWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncChatWithStreamingResponse(self)
 
 
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
deleted file mode 100644
index aa25bc1858..0000000000
--- a/src/openai/resources/chat/completions.py
+++ /dev/null
@@ -1,1251 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable, Optional, overload
-from typing_extensions import Literal
-
-import httpx
-
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ..._streaming import Stream, AsyncStream
-from ...types.chat import completion_create_params
-from ..._base_client import (
-    make_request_options,
-)
-from ...types.chat_model import ChatModel
-from ...types.chat.chat_completion import ChatCompletion
-from ...types.chat.chat_completion_chunk import ChatCompletionChunk
-from ...types.chat.chat_completion_tool_param import ChatCompletionToolParam
-from ...types.chat.chat_completion_message_param import ChatCompletionMessageParam
-from ...types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
-from ...types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
-
-__all__ = ["Completions", "AsyncCompletions"]
-
-
-class Completions(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> CompletionsWithRawResponse:
-        return CompletionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
-        return CompletionsWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        stream: Literal[True],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        stream: bool,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model"], ["messages", "model", "stream"])
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        return self._post(
-            "/chat/completions",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "response_format": response_format,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "stream_options": stream_options,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_logprobs": top_logprobs,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=Stream[ChatCompletionChunk],
-        )
-
-
-class AsyncCompletions(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
-        return AsyncCompletionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
-        return AsyncCompletionsWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        stream: Literal[True],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        stream: bool,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream_options: Options for streaming response. Only set this when you set `stream: true`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
-              not call any tool and instead generates a message. `auto` means the model can
-              pick between generating a message or calling one or more tools. `required` means
-              the model must call one or more tools. Specifying a particular tool via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that tool.
-
-              `none` is the default when no tools are present. `auto` is the default if tools
-              are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model"], ["messages", "model", "stream"])
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[str, ChatModel],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        return await self._post(
-            "/chat/completions",
-            body=await async_maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "response_format": response_format,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "stream_options": stream_options,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_logprobs": top_logprobs,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=AsyncStream[ChatCompletionChunk],
-        )
-
-
-class CompletionsWithRawResponse:
-    def __init__(self, completions: Completions) -> None:
-        self._completions = completions
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            completions.create,
-        )
-
-
-class AsyncCompletionsWithRawResponse:
-    def __init__(self, completions: AsyncCompletions) -> None:
-        self._completions = completions
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            completions.create,
-        )
-
-
-class CompletionsWithStreamingResponse:
-    def __init__(self, completions: Completions) -> None:
-        self._completions = completions
-
-        self.create = to_streamed_response_wrapper(
-            completions.create,
-        )
-
-
-class AsyncCompletionsWithStreamingResponse:
-    def __init__(self, completions: AsyncCompletions) -> None:
-        self._completions = completions
-
-        self.create = async_to_streamed_response_wrapper(
-            completions.create,
-        )
diff --git a/src/openai/resources/chat/completions/__init__.py b/src/openai/resources/chat/completions/__init__.py
new file mode 100644
index 0000000000..12d3b3aa28
--- /dev/null
+++ b/src/openai/resources/chat/completions/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from .completions import (
+    Completions,
+    AsyncCompletions,
+    CompletionsWithRawResponse,
+    AsyncCompletionsWithRawResponse,
+    CompletionsWithStreamingResponse,
+    AsyncCompletionsWithStreamingResponse,
+)
+
+__all__ = [
+    "Messages",
+    "AsyncMessages",
+    "MessagesWithRawResponse",
+    "AsyncMessagesWithRawResponse",
+    "MessagesWithStreamingResponse",
+    "AsyncMessagesWithStreamingResponse",
+    "Completions",
+    "AsyncCompletions",
+    "CompletionsWithRawResponse",
+    "AsyncCompletionsWithRawResponse",
+    "CompletionsWithStreamingResponse",
+    "AsyncCompletionsWithStreamingResponse",
+]
diff --git a/src/openai/resources/chat/completions/completions.py b/src/openai/resources/chat/completions/completions.py
new file mode 100644
index 0000000000..b3e4666fc1
--- /dev/null
+++ b/src/openai/resources/chat/completions/completions.py
@@ -0,0 +1,2320 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
+
+import httpx
+
+from .... import _legacy_response
+from .messages import (
+    Messages,
+    AsyncMessages,
+    MessagesWithRawResponse,
+    AsyncMessagesWithRawResponse,
+    MessagesWithStreamingResponse,
+    AsyncMessagesWithStreamingResponse,
+)
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...._streaming import Stream, AsyncStream
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ....types.chat import (
+    ChatCompletionAudioParam,
+    completion_list_params,
+    completion_create_params,
+    completion_update_params,
+)
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.shared.chat_model import ChatModel
+from ....types.chat.chat_completion import ChatCompletion
+from ....types.shared_params.metadata import Metadata
+from ....types.shared.reasoning_effort import ReasoningEffort
+from ....types.chat.chat_completion_chunk import ChatCompletionChunk
+from ....types.chat.chat_completion_deleted import ChatCompletionDeleted
+from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam
+from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
+from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
+from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
+from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
+
+__all__ = ["Completions", "AsyncCompletions"]
+
+
+class Completions(SyncAPIResource):
+    @cached_property
+    def messages(self) -> Messages:
+        return Messages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return CompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return CompletionsWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["messages", "model"], ["messages", "model", "stream"])
+    def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
+        return self._post(
+            "/chat/completions",
+            body=maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=Stream[ChatCompletionChunk],
+        )
+
+    def retrieve(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Get a stored chat completion.
+
+        Only Chat Completions that have been created with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def update(
+        self,
+        completion_id: str,
+        *,
+        metadata: Optional[Metadata],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Modify a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be modified. Currently, the only
+        supported modification is to update the `metadata` field.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._post(
+            f"/chat/completions/{completion_id}",
+            body=maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[ChatCompletion]:
+        """List stored Chat Completions.
+
+        Only Chat Completions that have been stored with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last chat completion from the previous pagination request.
+
+          limit: Number of Chat Completions to retrieve.
+
+          metadata:
+              A list of metadata keys to filter the Chat Completions by. Example:
+
+              `metadata[key1]=value1&metadata[key2]=value2`
+
+          model: The model used to generate the Chat Completions.
+
+          order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/chat/completions",
+            page=SyncCursorPage[ChatCompletion],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "metadata": metadata,
+                        "model": model,
+                        "order": order,
+                    },
+                    completion_list_params.CompletionListParams,
+                ),
+            ),
+            model=ChatCompletion,
+        )
+
+    def delete(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletionDeleted:
+        """Delete a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be deleted.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._delete(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletionDeleted,
+        )
+
+
+class AsyncCompletions(AsyncAPIResource):
+    @cached_property
+    def messages(self) -> AsyncMessages:
+        return AsyncMessages(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncCompletionsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncCompletionsWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        """
+        **Starting a new project?** We recommend trying
+        [Responses](https://platform.openai.com/docs/api-reference/responses) to take
+        advantage of the latest OpenAI platform features. Compare
+        [Chat Completions with Responses](https://platform.openai.com/docs/guides/responses-vs-chat-completions?api-mode=responses).
+
+        ---
+
+        Creates a model response for the given chat conversation. Learn more in the
+        [text generation](https://platform.openai.com/docs/guides/text-generation),
+        [vision](https://platform.openai.com/docs/guides/vision), and
+        [audio](https://platform.openai.com/docs/guides/audio) guides.
+
+        Parameter support can differ depending on the model used to generate the
+        response, particularly for newer reasoning models. Parameters that are only
+        supported for reasoning models are noted below. For the current state of
+        unsupported parameters in reasoning models,
+        [refer to the reasoning guide](https://platform.openai.com/docs/guides/reasoning).
+
+        Args:
+          messages: A list of messages comprising the conversation so far. Depending on the
+              [model](https://platform.openai.com/docs/models) you use, different message
+              types (modalities) are supported, like
+              [text](https://platform.openai.com/docs/guides/text-generation),
+              [images](https://platform.openai.com/docs/guides/vision), and
+              [audio](https://platform.openai.com/docs/guides/audio).
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+              for more information, along with the
+              [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+              guide for more information on how to handle the streaming events.
+
+          audio: Parameters for audio output. Required when audio output is requested with
+              `modalities: ["audio"]`.
+              [Learn more](https://platform.openai.com/docs/guides/audio).
+
+          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
+              existing frequency in the text so far, decreasing the model's likelihood to
+              repeat the same line verbatim.
+
+          function_call: Deprecated in favor of `tool_choice`.
+
+              Controls which (if any) function is called by the model.
+
+              `none` means the model will not call a function and instead generates a message.
+
+              `auto` means the model can pick between generating a message or calling a
+              function.
+
+              Specifying a particular function via `{"name": "my_function"}` forces the model
+              to call that function.
+
+              `none` is the default when no functions are present. `auto` is the default if
+              functions are present.
+
+          functions: Deprecated in favor of `tools`.
+
+              A list of functions the model may generate JSON inputs for.
+
+          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
+
+              Accepts a JSON object that maps tokens (specified by their token ID in the
+              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
+              bias is added to the logits generated by the model prior to sampling. The exact
+              effect will vary per model, but values between -1 and 1 should decrease or
+              increase likelihood of selection; values like -100 or 100 should result in a ban
+              or exclusive selection of the relevant token.
+
+          logprobs: Whether to return log probabilities of the output tokens or not. If true,
+              returns the log probabilities of each output token returned in the `content` of
+              `message`.
+
+          max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
+              completion. This value can be used to control
+              [costs](https://openai.com/api/pricing/) for text generated via API.
+
+              This value is now deprecated in favor of `max_completion_tokens`, and is not
+              compatible with
+              [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          modalities: Output types that you would like the model to generate. Most models are capable
+              of generating text, which is the default:
+
+              `["text"]`
+
+              The `gpt-4o-audio-preview` model can also be used to
+              [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+              this model generate both text and audio responses, you can use:
+
+              `["text", "audio"]`
+
+          n: How many chat completion choices to generate for each input message. Note that
+              you will be charged based on the number of generated tokens across all of the
+              choices. Keep `n` as `1` to minimize costs.
+
+          parallel_tool_calls: Whether to enable
+              [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+              during tool use.
+
+          prediction: Static predicted output content, such as the content of a text file that is
+              being regenerated.
+
+          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
+              whether they appear in the text so far, increasing the model's likelihood to
+              talk about new topics.
+
+          reasoning_effort: **o-series models only**
+
+              Constrains effort on reasoning for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+              supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+              result in faster responses and fewer tokens used on reasoning in a response.
+
+          response_format: An object specifying the format that the model must output.
+
+              Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+              Outputs which ensures the model will match your supplied JSON schema. Learn more
+              in the
+              [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+              Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+              ensures the message the model generates is valid JSON. Using `json_schema` is
+              preferred for models that support it.
+
+          seed: This feature is in Beta. If specified, our system will make a best effort to
+              sample deterministically, such that repeated requests with the same `seed` and
+              parameters should return the same result. Determinism is not guaranteed, and you
+              should refer to the `system_fingerprint` response parameter to monitor changes
+              in the backend.
+
+          service_tier: Specifies the latency tier to use for processing the request. This parameter is
+              relevant for customers subscribed to the scale tier service:
+
+              - If set to 'auto', and the Project is Scale tier enabled, the system will
+                utilize scale tier credits until they are exhausted.
+              - If set to 'auto', and the Project is not Scale tier enabled, the request will
+                be processed using the default service tier with a lower uptime SLA and no
+                latency guarentee.
+              - If set to 'default', the request will be processed using the default service
+                tier with a lower uptime SLA and no latency guarentee.
+              - When not set, the default behavior is 'auto'.
+
+              When this parameter is set, the response body will include the `service_tier`
+              utilized.
+
+          stop: Up to 4 sequences where the API will stop generating further tokens. The
+              returned text will not contain the stop sequence.
+
+          store: Whether or not to store the output of this chat completion request for use in
+              our [model distillation](https://platform.openai.com/docs/guides/distillation)
+              or [evals](https://platform.openai.com/docs/guides/evals) products.
+
+          stream_options: Options for streaming response. Only set this when you set `stream: true`.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          tool_choice: Controls which (if any) tool is called by the model. `none` means the model will
+              not call any tool and instead generates a message. `auto` means the model can
+              pick between generating a message or calling one or more tools. `required` means
+              the model must call one or more tools. Specifying a particular tool via
+              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
+              call that tool.
+
+              `none` is the default when no tools are present. `auto` is the default if tools
+              are present.
+
+          tools: A list of tools the model may call. Currently, only functions are supported as a
+              tool. Use this to provide a list of functions the model may generate JSON inputs
+              for. A max of 128 functions are supported.
+
+          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
+              return at each token position, each with an associated log probability.
+              `logprobs` must be set to `true` if this parameter is used.
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          web_search_options: This tool searches the web for relevant results to use in a response. Learn more
+              about the
+              [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["messages", "model"], ["messages", "model", "stream"])
+    async def create(
+        self,
+        *,
+        messages: Iterable[ChatCompletionMessageParam],
+        model: Union[str, ChatModel],
+        audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
+        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
+        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
+        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
+        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
+        max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
+        n: Optional[int] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
+        prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
+        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
+        reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
+        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
+        seed: Optional[int] | NotGiven = NOT_GIVEN,
+        service_tier: Optional[Literal["auto", "default"]] | NotGiven = NOT_GIVEN,
+        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
+        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
+        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
+        return await self._post(
+            "/chat/completions",
+            body=await async_maybe_transform(
+                {
+                    "messages": messages,
+                    "model": model,
+                    "audio": audio,
+                    "frequency_penalty": frequency_penalty,
+                    "function_call": function_call,
+                    "functions": functions,
+                    "logit_bias": logit_bias,
+                    "logprobs": logprobs,
+                    "max_completion_tokens": max_completion_tokens,
+                    "max_tokens": max_tokens,
+                    "metadata": metadata,
+                    "modalities": modalities,
+                    "n": n,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "prediction": prediction,
+                    "presence_penalty": presence_penalty,
+                    "reasoning_effort": reasoning_effort,
+                    "response_format": response_format,
+                    "seed": seed,
+                    "service_tier": service_tier,
+                    "stop": stop,
+                    "store": store,
+                    "stream": stream,
+                    "stream_options": stream_options,
+                    "temperature": temperature,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_logprobs": top_logprobs,
+                    "top_p": top_p,
+                    "user": user,
+                    "web_search_options": web_search_options,
+                },
+                completion_create_params.CompletionCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+            stream=stream or False,
+            stream_cls=AsyncStream[ChatCompletionChunk],
+        )
+
+    async def retrieve(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Get a stored chat completion.
+
+        Only Chat Completions that have been created with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._get(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    async def update(
+        self,
+        completion_id: str,
+        *,
+        metadata: Optional[Metadata],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletion:
+        """Modify a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be modified. Currently, the only
+        supported modification is to update the `metadata` field.
+
+        Args:
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._post(
+            f"/chat/completions/{completion_id}",
+            body=await async_maybe_transform({"metadata": metadata}, completion_update_params.CompletionUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletion,
+        )
+
+    def list(
+        self,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        model: str | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[ChatCompletion, AsyncCursorPage[ChatCompletion]]:
+        """List stored Chat Completions.
+
+        Only Chat Completions that have been stored with
+        the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last chat completion from the previous pagination request.
+
+          limit: Number of Chat Completions to retrieve.
+
+          metadata:
+              A list of metadata keys to filter the Chat Completions by. Example:
+
+              `metadata[key1]=value1&metadata[key2]=value2`
+
+          model: The model used to generate the Chat Completions.
+
+          order: Sort order for Chat Completions by timestamp. Use `asc` for ascending order or
+              `desc` for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._get_api_list(
+            "/chat/completions",
+            page=AsyncCursorPage[ChatCompletion],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "metadata": metadata,
+                        "model": model,
+                        "order": order,
+                    },
+                    completion_list_params.CompletionListParams,
+                ),
+            ),
+            model=ChatCompletion,
+        )
+
+    async def delete(
+        self,
+        completion_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> ChatCompletionDeleted:
+        """Delete a stored chat completion.
+
+        Only Chat Completions that have been created
+        with the `store` parameter set to `true` can be deleted.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return await self._delete(
+            f"/chat/completions/{completion_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=ChatCompletionDeleted,
+        )
+
+
+class CompletionsWithRawResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            completions.update,
+        )
+        self.list = _legacy_response.to_raw_response_wrapper(
+            completions.list,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> MessagesWithRawResponse:
+        return MessagesWithRawResponse(self._completions.messages)
+
+
+class AsyncCompletionsWithRawResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            completions.update,
+        )
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            completions.list,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithRawResponse:
+        return AsyncMessagesWithRawResponse(self._completions.messages)
+
+
+class CompletionsWithStreamingResponse:
+    def __init__(self, completions: Completions) -> None:
+        self._completions = completions
+
+        self.create = to_streamed_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = to_streamed_response_wrapper(
+            completions.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            completions.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> MessagesWithStreamingResponse:
+        return MessagesWithStreamingResponse(self._completions.messages)
+
+
+class AsyncCompletionsWithStreamingResponse:
+    def __init__(self, completions: AsyncCompletions) -> None:
+        self._completions = completions
+
+        self.create = async_to_streamed_response_wrapper(
+            completions.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            completions.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            completions.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            completions.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            completions.delete,
+        )
+
+    @cached_property
+    def messages(self) -> AsyncMessagesWithStreamingResponse:
+        return AsyncMessagesWithStreamingResponse(self._completions.messages)
diff --git a/src/openai/resources/chat/completions/messages.py b/src/openai/resources/chat/completions/messages.py
new file mode 100644
index 0000000000..fac15fba8b
--- /dev/null
+++ b/src/openai/resources/chat/completions/messages.py
@@ -0,0 +1,212 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal
+
+import httpx
+
+from .... import _legacy_response
+from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ...._utils import maybe_transform
+from ...._compat import cached_property
+from ...._resource import SyncAPIResource, AsyncAPIResource
+from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ....pagination import SyncCursorPage, AsyncCursorPage
+from ...._base_client import AsyncPaginator, make_request_options
+from ....types.chat.completions import message_list_params
+from ....types.chat.chat_completion_store_message import ChatCompletionStoreMessage
+
+__all__ = ["Messages", "AsyncMessages"]
+
+
+class Messages(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> MessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return MessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> MessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return MessagesWithStreamingResponse(self)
+
+    def list(
+        self,
+        completion_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[ChatCompletionStoreMessage]:
+        """Get the messages in a stored chat completion.
+
+        Only Chat Completions that have
+        been created with the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last message from the previous pagination request.
+
+          limit: Number of messages to retrieve.
+
+          order: Sort order for messages by timestamp. Use `asc` for ascending order or `desc`
+              for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get_api_list(
+            f"/chat/completions/{completion_id}/messages",
+            page=SyncCursorPage[ChatCompletionStoreMessage],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=ChatCompletionStoreMessage,
+        )
+
+
+class AsyncMessages(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncMessagesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncMessagesWithStreamingResponse(self)
+
+    def list(
+        self,
+        completion_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[ChatCompletionStoreMessage, AsyncCursorPage[ChatCompletionStoreMessage]]:
+        """Get the messages in a stored chat completion.
+
+        Only Chat Completions that have
+        been created with the `store` parameter set to `true` will be returned.
+
+        Args:
+          after: Identifier for the last message from the previous pagination request.
+
+          limit: Number of messages to retrieve.
+
+          order: Sort order for messages by timestamp. Use `asc` for ascending order or `desc`
+              for descending order. Defaults to `asc`.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not completion_id:
+            raise ValueError(f"Expected a non-empty value for `completion_id` but received {completion_id!r}")
+        return self._get_api_list(
+            f"/chat/completions/{completion_id}/messages",
+            page=AsyncCursorPage[ChatCompletionStoreMessage],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    message_list_params.MessageListParams,
+                ),
+            ),
+            model=ChatCompletionStoreMessage,
+        )
+
+
+class MessagesWithRawResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            messages.list,
+        )
+
+
+class AsyncMessagesWithRawResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            messages.list,
+        )
+
+
+class MessagesWithStreamingResponse:
+    def __init__(self, messages: Messages) -> None:
+        self._messages = messages
+
+        self.list = to_streamed_response_wrapper(
+            messages.list,
+        )
+
+
+class AsyncMessagesWithStreamingResponse:
+    def __init__(self, messages: AsyncMessages) -> None:
+        self._messages = messages
+
+        self.list = async_to_streamed_response_wrapper(
+            messages.list,
+        )
diff --git a/src/openai/resources/completions.py b/src/openai/resources/completions.py
index 0812000f78..46ed113ec9 100644
--- a/src/openai/resources/completions.py
+++ b/src/openai/resources/completions.py
@@ -2,8 +2,8 @@
 
 from __future__ import annotations
 
-from typing import Dict, List, Union, Iterable, Optional, overload
-from typing_extensions import Literal
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
 
 import httpx
 
@@ -19,9 +19,7 @@
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from .._streaming import Stream, AsyncStream
-from .._base_client import (
-    make_request_options,
-)
+from .._base_client import make_request_options
 from ..types.completion import Completion
 from ..types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
 
@@ -31,10 +29,21 @@
 class Completions(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> CompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return CompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> CompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return CompletionsWithStreamingResponse(self)
 
     @overload
@@ -73,8 +82,8 @@ def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -99,7 +108,7 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -139,7 +148,7 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -178,7 +187,7 @@ def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -226,8 +235,8 @@ def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -259,7 +268,7 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -299,7 +308,7 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -331,7 +340,7 @@ def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -379,8 +388,8 @@ def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -412,7 +421,7 @@ def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -452,7 +461,7 @@ def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -484,7 +493,7 @@ def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -562,10 +571,21 @@ def create(
 class AsyncCompletions(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncCompletionsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncCompletionsWithStreamingResponse(self)
 
     @overload
@@ -604,8 +624,8 @@ async def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -630,7 +650,7 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -670,7 +690,7 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -709,7 +729,7 @@ async def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -757,8 +777,8 @@ async def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -790,7 +810,7 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -830,7 +850,7 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -862,7 +882,7 @@ async def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -910,8 +930,8 @@ async def create(
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           prompt: The prompt(s) to generate completions for, encoded as a string, array of
               strings, array of tokens, or array of token arrays.
@@ -943,7 +963,7 @@ async def create(
               existing frequency in the text so far, decreasing the model's likelihood to
               repeat the same line verbatim.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           logit_bias: Modify the likelihood of specified tokens appearing in the completion.
 
@@ -983,7 +1003,7 @@ async def create(
               whether they appear in the text so far, increasing the model's likelihood to
               talk about new topics.
 
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
 
           seed: If specified, our system will make a best effort to sample deterministically,
               such that repeated requests with the same `seed` and parameters should return
@@ -1015,7 +1035,7 @@ async def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
diff --git a/src/openai/resources/embeddings.py b/src/openai/resources/embeddings.py
index 773b6f0968..e6c09f1374 100644
--- a/src/openai/resources/embeddings.py
+++ b/src/openai/resources/embeddings.py
@@ -2,8 +2,7 @@
 
 from __future__ import annotations
 
-import base64
-from typing import List, Union, Iterable, cast
+from typing import List, Union, Iterable
 from typing_extensions import Literal
 
 import httpx
@@ -11,14 +10,15 @@
 from .. import _legacy_response
 from ..types import embedding_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import is_given, maybe_transform
+from .._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
 from .._compat import cached_property
-from .._extras import numpy as np, has_numpy
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._base_client import (
-    make_request_options,
-)
+from .._base_client import make_request_options
+from ..types.embedding_model import EmbeddingModel
 from ..types.create_embedding_response import CreateEmbeddingResponse
 
 __all__ = ["Embeddings", "AsyncEmbeddings"]
@@ -27,17 +27,28 @@
 class Embeddings(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> EmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return EmbeddingsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return EmbeddingsWithStreamingResponse(self)
 
     def create(
         self,
         *,
         input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        model: Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]],
+        model: Union[str, EmbeddingModel],
         dimensions: int | NotGiven = NOT_GIVEN,
         encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -58,13 +69,14 @@ def create(
               `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              for counting tokens. Some models may also impose a limit on total number of
+              tokens summed across inputs.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           dimensions: The number of dimensions the resulting output embeddings should have. Only
               supported in `text-embedding-3` and later models.
@@ -74,7 +86,7 @@ def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -84,42 +96,20 @@ def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        params = {
-            "input": input,
-            "model": model,
-            "user": user,
-            "dimensions": dimensions,
-            "encoding_format": encoding_format,
-        }
-        if not is_given(encoding_format) and has_numpy():
-            params["encoding_format"] = "base64"
-
-        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
-            if is_given(encoding_format):
-                # don't modify the response object if a user explicitly asked for a format
-                return obj
-
-            for embedding in obj.data:
-                data = cast(object, embedding.embedding)
-                if not isinstance(data, str):
-                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
-                    continue
-
-                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
-                    base64.b64decode(data), dtype="float32"
-                ).tolist()
-
-            return obj
-
         return self._post(
             "/embeddings",
-            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "dimensions": dimensions,
+                    "encoding_format": encoding_format,
+                    "user": user,
+                },
+                embedding_create_params.EmbeddingCreateParams,
+            ),
             options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=parser,
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=CreateEmbeddingResponse,
         )
@@ -128,17 +118,28 @@ def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
 class AsyncEmbeddings(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncEmbeddingsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncEmbeddingsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        model: Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]],
+        model: Union[str, EmbeddingModel],
         dimensions: int | NotGiven = NOT_GIVEN,
         encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
         user: str | NotGiven = NOT_GIVEN,
@@ -159,13 +160,14 @@ async def create(
               `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              for counting tokens. Some models may also impose a limit on total number of
+              tokens summed across inputs.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
               see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
+              [Model overview](https://platform.openai.com/docs/models) for descriptions of
+              them.
 
           dimensions: The number of dimensions the resulting output embeddings should have. Only
               supported in `text-embedding-3` and later models.
@@ -175,7 +177,7 @@ async def create(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -185,42 +187,20 @@ async def create(
 
           timeout: Override the client-level default timeout for this request, in seconds
         """
-        params = {
-            "input": input,
-            "model": model,
-            "user": user,
-            "dimensions": dimensions,
-            "encoding_format": encoding_format,
-        }
-        if not is_given(encoding_format) and has_numpy():
-            params["encoding_format"] = "base64"
-
-        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
-            if is_given(encoding_format):
-                # don't modify the response object if a user explicitly asked for a format
-                return obj
-
-            for embedding in obj.data:
-                data = cast(object, embedding.embedding)
-                if not isinstance(data, str):
-                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
-                    continue
-
-                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
-                    base64.b64decode(data), dtype="float32"
-                ).tolist()
-
-            return obj
-
         return await self._post(
             "/embeddings",
-            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "dimensions": dimensions,
+                    "encoding_format": encoding_format,
+                    "user": user,
+                },
+                embedding_create_params.EmbeddingCreateParams,
+            ),
             options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=parser,
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
             cast_to=CreateEmbeddingResponse,
         )
diff --git a/src/openai/resources/files.py b/src/openai/resources/files.py
index aed0829dfe..4bc263511e 100644
--- a/src/openai/resources/files.py
+++ b/src/openai/resources/files.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import time
 import typing_extensions
 from typing import Mapping, cast
 from typing_extensions import Literal
@@ -10,7 +9,7 @@
 import httpx
 
 from .. import _legacy_response
-from ..types import file_list_params, file_create_params
+from ..types import FilePurpose, file_list_params, file_create_params
 from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
 from .._utils import (
     extract_files,
@@ -28,13 +27,11 @@
     to_custom_streamed_response_wrapper,
     async_to_custom_streamed_response_wrapper,
 )
-from ..pagination import SyncPage, AsyncPage
-from .._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ..pagination import SyncCursorPage, AsyncCursorPage
+from .._base_client import AsyncPaginator, make_request_options
 from ..types.file_object import FileObject
 from ..types.file_deleted import FileDeleted
+from ..types.file_purpose import FilePurpose
 
 __all__ = ["Files", "AsyncFiles"]
 
@@ -42,17 +39,28 @@
 class Files(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FilesWithStreamingResponse(self)
 
     def create(
         self,
         *,
         file: FileTypes,
-        purpose: Literal["assistants", "batch", "fine-tune"],
+        purpose: FilePurpose,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -71,9 +79,15 @@ def create(
         [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
         details.
 
-        The Fine-tuning API only supports `.jsonl` files.
+        The Fine-tuning API only supports `.jsonl` files. The input also has certain
+        required formats for fine-tuning
+        [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+        [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+        models.
 
-        The Batch API only supports `.jsonl` files up to 100 MB in size.
+        The Batch API only supports `.jsonl` files up to 200 MB in size. The input also
+        has a specific required
+        [format](https://platform.openai.com/docs/api-reference/batch/request-input).
 
         Please [contact us](https://help.openai.com/) if you need to increase these
         storage limits.
@@ -81,14 +95,10 @@ def create(
         Args:
           file: The File object (not file name) to be uploaded.
 
-          purpose: The intended purpose of the uploaded file.
-
-              Use "assistants" for
-              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-              [Message](https://platform.openai.com/docs/api-reference/messages) files,
-              "vision" for Assistants image file inputs, "batch" for
-              [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for
-              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
+          purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the
+              Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for
+              fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`:
+              Flexible file type for any purpose - `evals`: Used for eval data sets
 
           extra_headers: Send extra headers
 
@@ -105,11 +115,10 @@ def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             "/files",
             body=maybe_transform(body, file_create_params.FileCreateParams),
@@ -156,6 +165,9 @@ def retrieve(
     def list(
         self,
         *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         purpose: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -163,11 +175,23 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncPage[FileObject]:
-        """
-        Returns a list of files that belong to the user's organization.
+    ) -> SyncCursorPage[FileObject]:
+        """Returns a list of files.
 
         Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              10,000, and the default is 10,000.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
           purpose: Only return files with the given purpose.
 
           extra_headers: Send extra headers
@@ -180,13 +204,21 @@ def list(
         """
         return self._get_api_list(
             "/files",
-            page=SyncPage[FileObject],
+            page=SyncCursorPage[FileObject],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform({"purpose": purpose}, file_list_params.FileListParams),
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "purpose": purpose,
+                    },
+                    file_list_params.FileListParams,
+                ),
             ),
             model=FileObject,
         )
@@ -292,44 +324,32 @@ def retrieve_content(
             cast_to=str,
         )
 
-    def wait_for_processing(
-        self,
-        id: str,
-        *,
-        poll_interval: float = 5.0,
-        max_wait_seconds: float = 30 * 60,
-    ) -> FileObject:
-        """Waits for the given file to be processed, default timeout is 30 mins."""
-        TERMINAL_STATES = {"processed", "error", "deleted"}
-
-        start = time.time()
-        file = self.retrieve(id)
-        while file.status not in TERMINAL_STATES:
-            self._sleep(poll_interval)
-
-            file = self.retrieve(id)
-            if time.time() - start > max_wait_seconds:
-                raise RuntimeError(
-                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
-                )
-
-        return file
-
 
 class AsyncFiles(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFilesWithStreamingResponse(self)
 
     async def create(
         self,
         *,
         file: FileTypes,
-        purpose: Literal["assistants", "batch", "fine-tune"],
+        purpose: FilePurpose,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -348,9 +368,15 @@ async def create(
         [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) for
         details.
 
-        The Fine-tuning API only supports `.jsonl` files.
+        The Fine-tuning API only supports `.jsonl` files. The input also has certain
+        required formats for fine-tuning
+        [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input) or
+        [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+        models.
 
-        The Batch API only supports `.jsonl` files up to 100 MB in size.
+        The Batch API only supports `.jsonl` files up to 200 MB in size. The input also
+        has a specific required
+        [format](https://platform.openai.com/docs/api-reference/batch/request-input).
 
         Please [contact us](https://help.openai.com/) if you need to increase these
         storage limits.
@@ -358,14 +384,10 @@ async def create(
         Args:
           file: The File object (not file name) to be uploaded.
 
-          purpose: The intended purpose of the uploaded file.
-
-              Use "assistants" for
-              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-              [Message](https://platform.openai.com/docs/api-reference/messages) files,
-              "vision" for Assistants image file inputs, "batch" for
-              [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for
-              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
+          purpose: The intended purpose of the uploaded file. One of: - `assistants`: Used in the
+              Assistants API - `batch`: Used in the Batch API - `fine-tune`: Used for
+              fine-tuning - `vision`: Images used for vision fine-tuning - `user_data`:
+              Flexible file type for any purpose - `evals`: Used for eval data sets
 
           extra_headers: Send extra headers
 
@@ -382,11 +404,10 @@ async def create(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/files",
             body=await async_maybe_transform(body, file_create_params.FileCreateParams),
@@ -433,6 +454,9 @@ async def retrieve(
     def list(
         self,
         *,
+        after: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
         purpose: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -440,11 +464,23 @@ def list(
         extra_query: Query | None = None,
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[FileObject, AsyncPage[FileObject]]:
-        """
-        Returns a list of files that belong to the user's organization.
+    ) -> AsyncPaginator[FileObject, AsyncCursorPage[FileObject]]:
+        """Returns a list of files.
 
         Args:
+          after: A cursor for use in pagination.
+
+        `after` is an object ID that defines your place
+              in the list. For instance, if you make a list request and receive 100 objects,
+              ending with obj_foo, your subsequent call can include after=obj_foo in order to
+              fetch the next page of the list.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              10,000, and the default is 10,000.
+
+          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
+              order and `desc` for descending order.
+
           purpose: Only return files with the given purpose.
 
           extra_headers: Send extra headers
@@ -457,13 +493,21 @@ def list(
         """
         return self._get_api_list(
             "/files",
-            page=AsyncPage[FileObject],
+            page=AsyncCursorPage[FileObject],
             options=make_request_options(
                 extra_headers=extra_headers,
                 extra_query=extra_query,
                 extra_body=extra_body,
                 timeout=timeout,
-                query=maybe_transform({"purpose": purpose}, file_list_params.FileListParams),
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "limit": limit,
+                        "order": order,
+                        "purpose": purpose,
+                    },
+                    file_list_params.FileListParams,
+                ),
             ),
             model=FileObject,
         )
@@ -569,29 +613,6 @@ async def retrieve_content(
             cast_to=str,
         )
 
-    async def wait_for_processing(
-        self,
-        id: str,
-        *,
-        poll_interval: float = 5.0,
-        max_wait_seconds: float = 30 * 60,
-    ) -> FileObject:
-        """Waits for the given file to be processed, default timeout is 30 mins."""
-        TERMINAL_STATES = {"processed", "error", "deleted"}
-
-        start = time.time()
-        file = await self.retrieve(id)
-        while file.status not in TERMINAL_STATES:
-            await self._sleep(poll_interval)
-
-            file = await self.retrieve(id)
-            if time.time() - start > max_wait_seconds:
-                raise RuntimeError(
-                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
-                )
-
-        return file
-
 
 class FilesWithRawResponse:
     def __init__(self, files: Files) -> None:
diff --git a/src/openai/resources/fine_tuning/fine_tuning.py b/src/openai/resources/fine_tuning/fine_tuning.py
index 0404fed6ec..eebde07d81 100644
--- a/src/openai/resources/fine_tuning/fine_tuning.py
+++ b/src/openai/resources/fine_tuning/fine_tuning.py
@@ -2,7 +2,8 @@
 
 from __future__ import annotations
 
-from .jobs import (
+from ..._compat import cached_property
+from .jobs.jobs import (
     Jobs,
     AsyncJobs,
     JobsWithRawResponse,
@@ -10,8 +11,6 @@
     JobsWithStreamingResponse,
     AsyncJobsWithStreamingResponse,
 )
-from ..._compat import cached_property
-from .jobs.jobs import Jobs, AsyncJobs
 from ..._resource import SyncAPIResource, AsyncAPIResource
 
 __all__ = ["FineTuning", "AsyncFineTuning"]
@@ -24,10 +23,21 @@ def jobs(self) -> Jobs:
 
     @cached_property
     def with_raw_response(self) -> FineTuningWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FineTuningWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FineTuningWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FineTuningWithStreamingResponse(self)
 
 
@@ -38,10 +48,21 @@ def jobs(self) -> AsyncJobs:
 
     @cached_property
     def with_raw_response(self) -> AsyncFineTuningWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFineTuningWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFineTuningWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFineTuningWithStreamingResponse(self)
 
 
diff --git a/src/openai/resources/fine_tuning/jobs/checkpoints.py b/src/openai/resources/fine_tuning/jobs/checkpoints.py
index 67f5739a02..799efe88fd 100644
--- a/src/openai/resources/fine_tuning/jobs/checkpoints.py
+++ b/src/openai/resources/fine_tuning/jobs/checkpoints.py
@@ -11,10 +11,7 @@
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ...._base_client import AsyncPaginator, make_request_options
 from ....types.fine_tuning.jobs import checkpoint_list_params
 from ....types.fine_tuning.jobs.fine_tuning_job_checkpoint import FineTuningJobCheckpoint
 
@@ -24,10 +21,21 @@
 class Checkpoints(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> CheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return CheckpointsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> CheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return CheckpointsWithStreamingResponse(self)
 
     def list(
@@ -84,10 +92,21 @@ def list(
 class AsyncCheckpoints(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncCheckpointsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncCheckpointsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncCheckpointsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncCheckpointsWithStreamingResponse(self)
 
     def list(
diff --git a/src/openai/resources/fine_tuning/jobs/jobs.py b/src/openai/resources/fine_tuning/jobs/jobs.py
index f38956e6be..49629ca6a7 100644
--- a/src/openai/resources/fine_tuning/jobs/jobs.py
+++ b/src/openai/resources/fine_tuning/jobs/jobs.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Union, Iterable, Optional
+from typing import Dict, Union, Iterable, Optional
 from typing_extensions import Literal
 
 import httpx
@@ -25,11 +25,9 @@
 from ...._resource import SyncAPIResource, AsyncAPIResource
 from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from ...._base_client import AsyncPaginator, make_request_options
 from ....types.fine_tuning import job_list_params, job_create_params, job_list_events_params
+from ....types.shared_params.metadata import Metadata
 from ....types.fine_tuning.fine_tuning_job import FineTuningJob
 from ....types.fine_tuning.fine_tuning_job_event import FineTuningJobEvent
 
@@ -43,19 +41,32 @@ def checkpoints(self) -> Checkpoints:
 
     @cached_property
     def with_raw_response(self) -> JobsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return JobsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> JobsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return JobsWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
+        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]],
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
         integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        method: job_create_params.Method | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
@@ -77,7 +88,7 @@ def create(
 
         Args:
           model: The name of the model to fine-tune. You can select one of the
-              [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+              [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
 
           training_file: The ID of an uploaded file that contains training data.
 
@@ -87,22 +98,39 @@ def create(
               Your dataset must be formatted as a JSONL file. Additionally, you must upload
               your file with the purpose `fine-tune`.
 
+              The contents of the file should differ depending on if the model uses the
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
+              [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
+              format.
+
               See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
               for more details.
 
-          hyperparameters: The hyperparameters used for the fine-tuning job.
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
 
           integrations: A list of integrations to enable for your fine-tuning job.
 
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          method: The method used for fine-tuning.
+
           seed: The seed controls the reproducibility of the job. Passing in the same seed and
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
 
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
               name.
 
               For example, a `suffix` of "custom-model-name" would produce a model name like
-              `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+              `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
 
           validation_file: The ID of an uploaded file that contains validation data.
 
@@ -133,6 +161,8 @@ def create(
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
                     "integrations": integrations,
+                    "metadata": metadata,
+                    "method": method,
                     "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
@@ -185,6 +215,7 @@ def list(
         *,
         after: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -200,6 +231,9 @@ def list(
 
           limit: Number of fine-tuning jobs to retrieve.
 
+          metadata: Optional metadata filter. To filter, use the syntax `metadata[k]=v`.
+              Alternatively, set `metadata=null` to indicate no metadata.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -220,6 +254,7 @@ def list(
                     {
                         "after": after,
                         "limit": limit,
+                        "metadata": metadata,
                     },
                     job_list_params.JobListParams,
                 ),
@@ -318,19 +353,32 @@ def checkpoints(self) -> AsyncCheckpoints:
 
     @cached_property
     def with_raw_response(self) -> AsyncJobsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncJobsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncJobsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncJobsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
+        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]],
         training_file: str,
         hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
         integrations: Optional[Iterable[job_create_params.Integration]] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        method: job_create_params.Method | NotGiven = NOT_GIVEN,
         seed: Optional[int] | NotGiven = NOT_GIVEN,
         suffix: Optional[str] | NotGiven = NOT_GIVEN,
         validation_file: Optional[str] | NotGiven = NOT_GIVEN,
@@ -352,7 +400,7 @@ async def create(
 
         Args:
           model: The name of the model to fine-tune. You can select one of the
-              [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+              [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
 
           training_file: The ID of an uploaded file that contains training data.
 
@@ -362,22 +410,39 @@ async def create(
               Your dataset must be formatted as a JSONL file. Additionally, you must upload
               your file with the purpose `fine-tune`.
 
+              The contents of the file should differ depending on if the model uses the
+              [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
+              [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+              format, or if the fine-tuning method uses the
+              [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
+              format.
+
               See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
               for more details.
 
-          hyperparameters: The hyperparameters used for the fine-tuning job.
+          hyperparameters: The hyperparameters used for the fine-tuning job. This value is now deprecated
+              in favor of `method`, and should be passed in under the `method` parameter.
 
           integrations: A list of integrations to enable for your fine-tuning job.
 
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          method: The method used for fine-tuning.
+
           seed: The seed controls the reproducibility of the job. Passing in the same seed and
               job parameters should produce the same results, but may differ in rare cases. If
               a seed is not specified, one will be generated for you.
 
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
+          suffix: A string of up to 64 characters that will be added to your fine-tuned model
               name.
 
               For example, a `suffix` of "custom-model-name" would produce a model name like
-              `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+              `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
 
           validation_file: The ID of an uploaded file that contains validation data.
 
@@ -408,6 +473,8 @@ async def create(
                     "training_file": training_file,
                     "hyperparameters": hyperparameters,
                     "integrations": integrations,
+                    "metadata": metadata,
+                    "method": method,
                     "seed": seed,
                     "suffix": suffix,
                     "validation_file": validation_file,
@@ -460,6 +527,7 @@ def list(
         *,
         after: str | NotGiven = NOT_GIVEN,
         limit: int | NotGiven = NOT_GIVEN,
+        metadata: Optional[Dict[str, str]] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -475,6 +543,9 @@ def list(
 
           limit: Number of fine-tuning jobs to retrieve.
 
+          metadata: Optional metadata filter. To filter, use the syntax `metadata[k]=v`.
+              Alternatively, set `metadata=null` to indicate no metadata.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -495,6 +566,7 @@ def list(
                     {
                         "after": after,
                         "limit": limit,
+                        "metadata": metadata,
                     },
                     job_list_params.JobListParams,
                 ),
diff --git a/src/openai/resources/images.py b/src/openai/resources/images.py
index 74b2a46a3f..30473c14f7 100644
--- a/src/openai/resources/images.py
+++ b/src/openai/resources/images.py
@@ -19,9 +19,8 @@
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._base_client import (
-    make_request_options,
-)
+from .._base_client import make_request_options
+from ..types.image_model import ImageModel
 from ..types.images_response import ImagesResponse
 
 __all__ = ["Images", "AsyncImages"]
@@ -30,17 +29,28 @@
 class Images(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ImagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ImagesWithStreamingResponse(self)
 
     def create_variation(
         self,
         *,
         image: FileTypes,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
         size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
@@ -74,7 +84,7 @@ def create_variation(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -95,11 +105,10 @@ def create_variation(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             "/images/variations",
             body=maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
@@ -116,7 +125,7 @@ def edit(
         image: FileTypes,
         prompt: str,
         mask: FileTypes | NotGiven = NOT_GIVEN,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
         size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
@@ -156,7 +165,7 @@ def edit(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -179,11 +188,10 @@ def edit(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return self._post(
             "/images/edits",
             body=maybe_transform(body, image_edit_params.ImageEditParams),
@@ -198,7 +206,7 @@ def generate(
         self,
         *,
         prompt: str,
-        model: Union[str, Literal["dall-e-2", "dall-e-3"], None] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
@@ -243,7 +251,7 @@ def generate(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -278,17 +286,28 @@ def generate(
 class AsyncImages(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncImagesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncImagesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncImagesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncImagesWithStreamingResponse(self)
 
     async def create_variation(
         self,
         *,
         image: FileTypes,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
         size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
@@ -322,7 +341,7 @@ async def create_variation(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -343,11 +362,10 @@ async def create_variation(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/images/variations",
             body=await async_maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
@@ -364,7 +382,7 @@ async def edit(
         image: FileTypes,
         prompt: str,
         mask: FileTypes | NotGiven = NOT_GIVEN,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
         size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
@@ -404,7 +422,7 @@ async def edit(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
@@ -427,11 +445,10 @@ async def edit(
             }
         )
         files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
         return await self._post(
             "/images/edits",
             body=await async_maybe_transform(body, image_edit_params.ImageEditParams),
@@ -446,7 +463,7 @@ async def generate(
         self,
         *,
         prompt: str,
-        model: Union[str, Literal["dall-e-2", "dall-e-3"], None] | NotGiven = NOT_GIVEN,
+        model: Union[str, ImageModel, None] | NotGiven = NOT_GIVEN,
         n: Optional[int] | NotGiven = NOT_GIVEN,
         quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
         response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
@@ -491,7 +508,7 @@ async def generate(
 
           user: A unique identifier representing your end-user, which can help OpenAI to monitor
               and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
 
           extra_headers: Send extra headers
 
diff --git a/src/openai/resources/models.py b/src/openai/resources/models.py
index e76c496ffa..945f0acc1a 100644
--- a/src/openai/resources/models.py
+++ b/src/openai/resources/models.py
@@ -11,10 +11,7 @@
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
 from ..pagination import SyncPage, AsyncPage
 from ..types.model import Model
-from .._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
+from .._base_client import AsyncPaginator, make_request_options
 from ..types.model_deleted import ModelDeleted
 
 __all__ = ["Models", "AsyncModels"]
@@ -23,10 +20,21 @@
 class Models(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ModelsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ModelsWithStreamingResponse(self)
 
     def retrieve(
@@ -125,10 +133,21 @@ def delete(
 class AsyncModels(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncModelsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncModelsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncModelsWithStreamingResponse(self)
 
     async def retrieve(
diff --git a/src/openai/resources/moderations.py b/src/openai/resources/moderations.py
index 9386e50dae..a8f03142bc 100644
--- a/src/openai/resources/moderations.py
+++ b/src/openai/resources/moderations.py
@@ -2,8 +2,7 @@
 
 from __future__ import annotations
 
-from typing import List, Union
-from typing_extensions import Literal
+from typing import List, Union, Iterable
 
 import httpx
 
@@ -17,10 +16,10 @@
 from .._compat import cached_property
 from .._resource import SyncAPIResource, AsyncAPIResource
 from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._base_client import (
-    make_request_options,
-)
+from .._base_client import make_request_options
+from ..types.moderation_model import ModerationModel
 from ..types.moderation_create_response import ModerationCreateResponse
+from ..types.moderation_multi_modal_input_param import ModerationMultiModalInputParam
 
 __all__ = ["Moderations", "AsyncModerations"]
 
@@ -28,17 +27,28 @@
 class Moderations(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> ModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return ModerationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> ModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return ModerationsWithStreamingResponse(self)
 
     def create(
         self,
         *,
-        input: Union[str, List[str]],
-        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]] | NotGiven = NOT_GIVEN,
+        input: Union[str, List[str], Iterable[ModerationMultiModalInputParam]],
+        model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -46,20 +56,19 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModerationCreateResponse:
-        """
-        Classifies if text is potentially harmful.
+        """Classifies if text and/or image inputs are potentially harmful.
 
-        Args:
-          input: The input text to classify
+        Learn more in
+        the [moderation guide](https://platform.openai.com/docs/guides/moderation).
 
-          model: Two content moderations models are available: `text-moderation-stable` and
-              `text-moderation-latest`.
+        Args:
+          input: Input (or inputs) to classify. Can be a single string, an array of strings, or
+              an array of multi-modal input objects similar to other models.
 
-              The default is `text-moderation-latest` which will be automatically upgraded
-              over time. This ensures you are always using our most accurate model. If you use
-              `text-moderation-stable`, we will provide advanced notice before updating the
-              model. Accuracy of `text-moderation-stable` may be slightly lower than for
-              `text-moderation-latest`.
+          model: The content moderation model you would like to use. Learn more in
+              [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+              learn about available models
+              [here](https://platform.openai.com/docs/models#moderation).
 
           extra_headers: Send extra headers
 
@@ -88,17 +97,28 @@ def create(
 class AsyncModerations(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncModerationsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncModerationsWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncModerationsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncModerationsWithStreamingResponse(self)
 
     async def create(
         self,
         *,
-        input: Union[str, List[str]],
-        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]] | NotGiven = NOT_GIVEN,
+        input: Union[str, List[str], Iterable[ModerationMultiModalInputParam]],
+        model: Union[str, ModerationModel] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -106,20 +126,19 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
     ) -> ModerationCreateResponse:
-        """
-        Classifies if text is potentially harmful.
+        """Classifies if text and/or image inputs are potentially harmful.
 
-        Args:
-          input: The input text to classify
+        Learn more in
+        the [moderation guide](https://platform.openai.com/docs/guides/moderation).
 
-          model: Two content moderations models are available: `text-moderation-stable` and
-              `text-moderation-latest`.
+        Args:
+          input: Input (or inputs) to classify. Can be a single string, an array of strings, or
+              an array of multi-modal input objects similar to other models.
 
-              The default is `text-moderation-latest` which will be automatically upgraded
-              over time. This ensures you are always using our most accurate model. If you use
-              `text-moderation-stable`, we will provide advanced notice before updating the
-              model. Accuracy of `text-moderation-stable` may be slightly lower than for
-              `text-moderation-latest`.
+          model: The content moderation model you would like to use. Learn more in
+              [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+              learn about available models
+              [here](https://platform.openai.com/docs/models#moderation).
 
           extra_headers: Send extra headers
 
diff --git a/src/openai/resources/responses/__init__.py b/src/openai/resources/responses/__init__.py
new file mode 100644
index 0000000000..ad19218b01
--- /dev/null
+++ b/src/openai/resources/responses/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .responses import (
+    Responses,
+    AsyncResponses,
+    ResponsesWithRawResponse,
+    AsyncResponsesWithRawResponse,
+    ResponsesWithStreamingResponse,
+    AsyncResponsesWithStreamingResponse,
+)
+from .input_items import (
+    InputItems,
+    AsyncInputItems,
+    InputItemsWithRawResponse,
+    AsyncInputItemsWithRawResponse,
+    InputItemsWithStreamingResponse,
+    AsyncInputItemsWithStreamingResponse,
+)
+
+__all__ = [
+    "InputItems",
+    "AsyncInputItems",
+    "InputItemsWithRawResponse",
+    "AsyncInputItemsWithRawResponse",
+    "InputItemsWithStreamingResponse",
+    "AsyncInputItemsWithStreamingResponse",
+    "Responses",
+    "AsyncResponses",
+    "ResponsesWithRawResponse",
+    "AsyncResponsesWithRawResponse",
+    "ResponsesWithStreamingResponse",
+    "AsyncResponsesWithStreamingResponse",
+]
diff --git a/src/openai/resources/responses/input_items.py b/src/openai/resources/responses/input_items.py
new file mode 100644
index 0000000000..10e7d545dc
--- /dev/null
+++ b/src/openai/resources/responses/input_items.py
@@ -0,0 +1,223 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Any, cast
+from typing_extensions import Literal
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import maybe_transform
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.responses import input_item_list_params
+from ...types.responses.response_item_list import Data
+
+__all__ = ["InputItems", "AsyncInputItems"]
+
+
+class InputItems(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> InputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return InputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> InputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return InputItemsWithStreamingResponse(self)
+
+    def list(
+        self,
+        response_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncCursorPage[Data]:
+        """
+        Returns a list of input items for a given response.
+
+        Args:
+          after: An item ID to list items after, used in pagination.
+
+          before: An item ID to list items before, used in pagination.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: The order to return the input items in. Default is `asc`.
+
+              - `asc`: Return the input items in ascending order.
+              - `desc`: Return the input items in descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get_api_list(
+            f"/responses/{response_id}/input_items",
+            page=SyncCursorPage[Data],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    input_item_list_params.InputItemListParams,
+                ),
+            ),
+            model=cast(Any, Data),  # Union types cannot be passed in as arguments in the type system
+        )
+
+
+class AsyncInputItems(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncInputItemsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncInputItemsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncInputItemsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncInputItemsWithStreamingResponse(self)
+
+    def list(
+        self,
+        response_id: str,
+        *,
+        after: str | NotGiven = NOT_GIVEN,
+        before: str | NotGiven = NOT_GIVEN,
+        limit: int | NotGiven = NOT_GIVEN,
+        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[Data, AsyncCursorPage[Data]]:
+        """
+        Returns a list of input items for a given response.
+
+        Args:
+          after: An item ID to list items after, used in pagination.
+
+          before: An item ID to list items before, used in pagination.
+
+          limit: A limit on the number of objects to be returned. Limit can range between 1 and
+              100, and the default is 20.
+
+          order: The order to return the input items in. Default is `asc`.
+
+              - `asc`: Return the input items in ascending order.
+              - `desc`: Return the input items in descending order.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get_api_list(
+            f"/responses/{response_id}/input_items",
+            page=AsyncCursorPage[Data],
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "after": after,
+                        "before": before,
+                        "limit": limit,
+                        "order": order,
+                    },
+                    input_item_list_params.InputItemListParams,
+                ),
+            ),
+            model=cast(Any, Data),  # Union types cannot be passed in as arguments in the type system
+        )
+
+
+class InputItemsWithRawResponse:
+    def __init__(self, input_items: InputItems) -> None:
+        self._input_items = input_items
+
+        self.list = _legacy_response.to_raw_response_wrapper(
+            input_items.list,
+        )
+
+
+class AsyncInputItemsWithRawResponse:
+    def __init__(self, input_items: AsyncInputItems) -> None:
+        self._input_items = input_items
+
+        self.list = _legacy_response.async_to_raw_response_wrapper(
+            input_items.list,
+        )
+
+
+class InputItemsWithStreamingResponse:
+    def __init__(self, input_items: InputItems) -> None:
+        self._input_items = input_items
+
+        self.list = to_streamed_response_wrapper(
+            input_items.list,
+        )
+
+
+class AsyncInputItemsWithStreamingResponse:
+    def __init__(self, input_items: AsyncInputItems) -> None:
+        self._input_items = input_items
+
+        self.list = async_to_streamed_response_wrapper(
+            input_items.list,
+        )
diff --git a/src/openai/resources/responses/responses.py b/src/openai/resources/responses/responses.py
new file mode 100644
index 0000000000..843e4972a9
--- /dev/null
+++ b/src/openai/resources/responses/responses.py
@@ -0,0 +1,1433 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, overload
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NoneType, NotGiven
+from ..._utils import (
+    required_args,
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from .input_items import (
+    InputItems,
+    AsyncInputItems,
+    InputItemsWithRawResponse,
+    AsyncInputItemsWithRawResponse,
+    InputItemsWithStreamingResponse,
+    AsyncInputItemsWithStreamingResponse,
+)
+from ..._streaming import Stream, AsyncStream
+from ..._base_client import make_request_options
+from ...types.responses import response_create_params, response_retrieve_params
+from ...types.shared.chat_model import ChatModel
+from ...types.responses.response import Response
+from ...types.responses.tool_param import ToolParam
+from ...types.shared_params.metadata import Metadata
+from ...types.shared_params.reasoning import Reasoning
+from ...types.responses.response_includable import ResponseIncludable
+from ...types.responses.response_input_param import ResponseInputParam
+from ...types.responses.response_stream_event import ResponseStreamEvent
+from ...types.responses.response_text_config_param import ResponseTextConfigParam
+
+__all__ = ["Responses", "AsyncResponses"]
+
+
+class Responses(SyncAPIResource):
+    @cached_property
+    def input_items(self) -> InputItems:
+        return InputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> ResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return ResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> ResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return ResponsesWithStreamingResponse(self)
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model"], ["input", "model", "stream"])
+    def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | Stream[ResponseStreamEvent]:
+        return self._post(
+            "/responses",
+            body=maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=Stream[ResponseStreamEvent],
+        )
+
+    def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform({"include": include}, response_retrieve_params.ResponseRetrieveParams),
+            ),
+            cast_to=Response,
+        )
+
+    def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class AsyncResponses(AsyncAPIResource):
+    @cached_property
+    def input_items(self) -> AsyncInputItems:
+        return AsyncInputItems(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncResponsesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncResponsesWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncResponsesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncResponsesWithStreamingResponse(self)
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        stream: Literal[True],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @overload
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        stream: bool,
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        """Creates a model response.
+
+        Provide
+        [text](https://platform.openai.com/docs/guides/text) or
+        [image](https://platform.openai.com/docs/guides/images) inputs to generate
+        [text](https://platform.openai.com/docs/guides/text) or
+        [JSON](https://platform.openai.com/docs/guides/structured-outputs) outputs. Have
+        the model call your own
+        [custom code](https://platform.openai.com/docs/guides/function-calling) or use
+        built-in [tools](https://platform.openai.com/docs/guides/tools) like
+        [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+        [file search](https://platform.openai.com/docs/guides/tools-file-search) to use
+        your own data as input for the model's response.
+
+        Args:
+          input: Text, image, or file inputs to the model, used to generate a response.
+
+              Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Image inputs](https://platform.openai.com/docs/guides/images)
+              - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+              - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+              - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+
+          model: Model ID used to generate the response, like `gpt-4o` or `o1`. OpenAI offers a
+              wide range of models with different capabilities, performance characteristics,
+              and price points. Refer to the
+              [model guide](https://platform.openai.com/docs/models) to browse and compare
+              available models.
+
+          stream: If set to true, the model response data will be streamed to the client as it is
+              generated using
+              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+              See the
+              [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+              for more information.
+
+          include: Specify additional output data to include in the model response. Currently
+              supported values are:
+
+              - `file_search_call.results`: Include the search results of the file search tool
+                call.
+              - `message.input_image.image_url`: Include image urls from the input message.
+              - `computer_call_output.output.image_url`: Include image urls from the computer
+                call output.
+
+          instructions: Inserts a system (or developer) message as the first item in the model's
+              context.
+
+              When using along with `previous_response_id`, the instructions from a previous
+              response will be not be carried over to the next response. This makes it simple
+              to swap out system (or developer) messages in new responses.
+
+          max_output_tokens: An upper bound for the number of tokens that can be generated for a response,
+              including visible output tokens and
+              [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+
+          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
+
+          parallel_tool_calls: Whether to allow the model to run tool calls in parallel.
+
+          previous_response_id: The unique ID of the previous response to the model. Use this to create
+              multi-turn conversations. Learn more about
+              [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+
+          reasoning: **o-series models only**
+
+              Configuration options for
+              [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+
+          store: Whether to store the generated model response for later retrieval via API.
+
+          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
+              make the output more random, while lower values like 0.2 will make it more
+              focused and deterministic. We generally recommend altering this or `top_p` but
+              not both.
+
+          text: Configuration options for a text response from the model. Can be plain text or
+              structured JSON data. Learn more:
+
+              - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+              - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+
+          tool_choice: How the model should select which tool (or tools) to use when generating a
+              response. See the `tools` parameter to see how to specify which tools the model
+              can call.
+
+          tools: An array of tools the model may call while generating a response. You can
+              specify which tool to use by setting the `tool_choice` parameter.
+
+              The two categories of tools you can provide the model are:
+
+              - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+                capabilities, like
+                [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+                [file search](https://platform.openai.com/docs/guides/tools-file-search).
+                Learn more about
+                [built-in tools](https://platform.openai.com/docs/guides/tools).
+              - **Function calls (custom tools)**: Functions that are defined by you, enabling
+                the model to call your own code. Learn more about
+                [function calling](https://platform.openai.com/docs/guides/function-calling).
+
+          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
+              model considers the results of the tokens with top_p probability mass. So 0.1
+              means only the tokens comprising the top 10% probability mass are considered.
+
+              We generally recommend altering this or `temperature` but not both.
+
+          truncation: The truncation strategy to use for the model response.
+
+              - `auto`: If the context of this response and previous ones exceeds the model's
+                context window size, the model will truncate the response to fit the context
+                window by dropping input items in the middle of the conversation.
+              - `disabled` (default): If a model response will exceed the context window size
+                for a model, the request will fail with a 400 error.
+
+          user: A unique identifier representing your end-user, which can help OpenAI to monitor
+              and detect abuse.
+              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        ...
+
+    @required_args(["input", "model"], ["input", "model", "stream"])
+    async def create(
+        self,
+        *,
+        input: Union[str, ResponseInputParam],
+        model: Union[str, ChatModel],
+        include: Optional[List[ResponseIncludable]] | NotGiven = NOT_GIVEN,
+        instructions: Optional[str] | NotGiven = NOT_GIVEN,
+        max_output_tokens: Optional[int] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
+        parallel_tool_calls: Optional[bool] | NotGiven = NOT_GIVEN,
+        previous_response_id: Optional[str] | NotGiven = NOT_GIVEN,
+        reasoning: Optional[Reasoning] | NotGiven = NOT_GIVEN,
+        store: Optional[bool] | NotGiven = NOT_GIVEN,
+        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
+        temperature: Optional[float] | NotGiven = NOT_GIVEN,
+        text: ResponseTextConfigParam | NotGiven = NOT_GIVEN,
+        tool_choice: response_create_params.ToolChoice | NotGiven = NOT_GIVEN,
+        tools: Iterable[ToolParam] | NotGiven = NOT_GIVEN,
+        top_p: Optional[float] | NotGiven = NOT_GIVEN,
+        truncation: Optional[Literal["auto", "disabled"]] | NotGiven = NOT_GIVEN,
+        user: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response | AsyncStream[ResponseStreamEvent]:
+        return await self._post(
+            "/responses",
+            body=await async_maybe_transform(
+                {
+                    "input": input,
+                    "model": model,
+                    "include": include,
+                    "instructions": instructions,
+                    "max_output_tokens": max_output_tokens,
+                    "metadata": metadata,
+                    "parallel_tool_calls": parallel_tool_calls,
+                    "previous_response_id": previous_response_id,
+                    "reasoning": reasoning,
+                    "store": store,
+                    "stream": stream,
+                    "temperature": temperature,
+                    "text": text,
+                    "tool_choice": tool_choice,
+                    "tools": tools,
+                    "top_p": top_p,
+                    "truncation": truncation,
+                    "user": user,
+                },
+                response_create_params.ResponseCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Response,
+            stream=stream or False,
+            stream_cls=AsyncStream[ResponseStreamEvent],
+        )
+
+    async def retrieve(
+        self,
+        response_id: str,
+        *,
+        include: List[ResponseIncludable] | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Response:
+        """
+        Retrieves a model response with the given ID.
+
+        Args:
+          include: Additional fields to include in the response. See the `include` parameter for
+              Response creation above for more information.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        return await self._get(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {"include": include}, response_retrieve_params.ResponseRetrieveParams
+                ),
+            ),
+            cast_to=Response,
+        )
+
+    async def delete(
+        self,
+        response_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> None:
+        """
+        Deletes a model response with the given ID.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not response_id:
+            raise ValueError(f"Expected a non-empty value for `response_id` but received {response_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/responses/{response_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
+        )
+
+
+class ResponsesWithRawResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.to_raw_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithRawResponse:
+        return InputItemsWithRawResponse(self._responses.input_items)
+
+
+class AsyncResponsesWithRawResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = _legacy_response.async_to_raw_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithRawResponse:
+        return AsyncInputItemsWithRawResponse(self._responses.input_items)
+
+
+class ResponsesWithStreamingResponse:
+    def __init__(self, responses: Responses) -> None:
+        self._responses = responses
+
+        self.create = to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = to_streamed_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> InputItemsWithStreamingResponse:
+        return InputItemsWithStreamingResponse(self._responses.input_items)
+
+
+class AsyncResponsesWithStreamingResponse:
+    def __init__(self, responses: AsyncResponses) -> None:
+        self._responses = responses
+
+        self.create = async_to_streamed_response_wrapper(
+            responses.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            responses.retrieve,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            responses.delete,
+        )
+
+    @cached_property
+    def input_items(self) -> AsyncInputItemsWithStreamingResponse:
+        return AsyncInputItemsWithStreamingResponse(self._responses.input_items)
diff --git a/src/openai/resources/uploads/__init__.py b/src/openai/resources/uploads/__init__.py
new file mode 100644
index 0000000000..12d1056f9e
--- /dev/null
+++ b/src/openai/resources/uploads/__init__.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from .parts import (
+    Parts,
+    AsyncParts,
+    PartsWithRawResponse,
+    AsyncPartsWithRawResponse,
+    PartsWithStreamingResponse,
+    AsyncPartsWithStreamingResponse,
+)
+from .uploads import (
+    Uploads,
+    AsyncUploads,
+    UploadsWithRawResponse,
+    AsyncUploadsWithRawResponse,
+    UploadsWithStreamingResponse,
+    AsyncUploadsWithStreamingResponse,
+)
+
+__all__ = [
+    "Parts",
+    "AsyncParts",
+    "PartsWithRawResponse",
+    "AsyncPartsWithRawResponse",
+    "PartsWithStreamingResponse",
+    "AsyncPartsWithStreamingResponse",
+    "Uploads",
+    "AsyncUploads",
+    "UploadsWithRawResponse",
+    "AsyncUploadsWithRawResponse",
+    "UploadsWithStreamingResponse",
+    "AsyncUploadsWithStreamingResponse",
+]
diff --git a/src/openai/resources/uploads/parts.py b/src/openai/resources/uploads/parts.py
new file mode 100644
index 0000000000..777469ac8e
--- /dev/null
+++ b/src/openai/resources/uploads/parts.py
@@ -0,0 +1,210 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Mapping, cast
+
+import httpx
+
+from ... import _legacy_response
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
+from ..._utils import (
+    extract_files,
+    maybe_transform,
+    deepcopy_minimal,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.uploads import part_create_params
+from ...types.uploads.upload_part import UploadPart
+
+__all__ = ["Parts", "AsyncParts"]
+
+
+class Parts(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> PartsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return PartsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> PartsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return PartsWithStreamingResponse(self)
+
+    def create(
+        self,
+        upload_id: str,
+        *,
+        data: FileTypes,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> UploadPart:
+        """
+        Adds a
+        [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object.
+        A Part represents a chunk of bytes from the file you are trying to upload.
+
+        Each Part can be at most 64 MB, and you can add Parts until you hit the Upload
+        maximum of 8 GB.
+
+        It is possible to add multiple Parts in parallel. You can decide the intended
+        order of the Parts when you
+        [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete).
+
+        Args:
+          data: The chunk of bytes for this Part.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        body = deepcopy_minimal({"data": data})
+        files = extract_files(cast(Mapping[str, object], body), paths=[["data"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return self._post(
+            f"/uploads/{upload_id}/parts",
+            body=maybe_transform(body, part_create_params.PartCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=UploadPart,
+        )
+
+
+class AsyncParts(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncPartsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncPartsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncPartsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncPartsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        upload_id: str,
+        *,
+        data: FileTypes,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> UploadPart:
+        """
+        Adds a
+        [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object.
+        A Part represents a chunk of bytes from the file you are trying to upload.
+
+        Each Part can be at most 64 MB, and you can add Parts until you hit the Upload
+        maximum of 8 GB.
+
+        It is possible to add multiple Parts in parallel. You can decide the intended
+        order of the Parts when you
+        [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete).
+
+        Args:
+          data: The chunk of bytes for this Part.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        body = deepcopy_minimal({"data": data})
+        files = extract_files(cast(Mapping[str, object], body), paths=[["data"]])
+        # It should be noted that the actual Content-Type header that will be
+        # sent to the server will contain a `boundary` parameter, e.g.
+        # multipart/form-data; boundary=---abc--
+        extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
+        return await self._post(
+            f"/uploads/{upload_id}/parts",
+            body=await async_maybe_transform(body, part_create_params.PartCreateParams),
+            files=files,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=UploadPart,
+        )
+
+
+class PartsWithRawResponse:
+    def __init__(self, parts: Parts) -> None:
+        self._parts = parts
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            parts.create,
+        )
+
+
+class AsyncPartsWithRawResponse:
+    def __init__(self, parts: AsyncParts) -> None:
+        self._parts = parts
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            parts.create,
+        )
+
+
+class PartsWithStreamingResponse:
+    def __init__(self, parts: Parts) -> None:
+        self._parts = parts
+
+        self.create = to_streamed_response_wrapper(
+            parts.create,
+        )
+
+
+class AsyncPartsWithStreamingResponse:
+    def __init__(self, parts: AsyncParts) -> None:
+        self._parts = parts
+
+        self.create = async_to_streamed_response_wrapper(
+            parts.create,
+        )
diff --git a/src/openai/resources/uploads/uploads.py b/src/openai/resources/uploads/uploads.py
new file mode 100644
index 0000000000..c897c47f33
--- /dev/null
+++ b/src/openai/resources/uploads/uploads.py
@@ -0,0 +1,493 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+
+import httpx
+
+from ... import _legacy_response
+from .parts import (
+    Parts,
+    AsyncParts,
+    PartsWithRawResponse,
+    AsyncPartsWithRawResponse,
+    PartsWithStreamingResponse,
+    AsyncPartsWithStreamingResponse,
+)
+from ...types import FilePurpose, upload_create_params, upload_complete_params
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
+    maybe_transform,
+    async_maybe_transform,
+)
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._base_client import make_request_options
+from ...types.upload import Upload
+from ...types.file_purpose import FilePurpose
+
+__all__ = ["Uploads", "AsyncUploads"]
+
+
+class Uploads(SyncAPIResource):
+    @cached_property
+    def parts(self) -> Parts:
+        return Parts(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> UploadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return UploadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> UploadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return UploadsWithStreamingResponse(self)
+
+    def create(
+        self,
+        *,
+        bytes: int,
+        filename: str,
+        mime_type: str,
+        purpose: FilePurpose,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Creates an intermediate
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object
+        that you can add
+        [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to.
+        Currently, an Upload can accept at most 8 GB in total and expires after an hour
+        after you create it.
+
+        Once you complete the Upload, we will create a
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        contains all the parts you uploaded. This File is usable in the rest of our
+        platform as a regular File object.
+
+        For certain `purpose` values, the correct `mime_type` must be specified. Please
+        refer to documentation for the
+        [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files).
+
+        For guidance on the proper filename extensions for each purpose, please follow
+        the documentation on
+        [creating a File](https://platform.openai.com/docs/api-reference/files/create).
+
+        Args:
+          bytes: The number of bytes in the file you are uploading.
+
+          filename: The name of the file to upload.
+
+          mime_type: The MIME type of the file.
+
+              This must fall within the supported MIME types for your file purpose. See the
+              supported MIME types for assistants and vision.
+
+          purpose: The intended purpose of the uploaded file.
+
+              See the
+              [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/uploads",
+            body=maybe_transform(
+                {
+                    "bytes": bytes,
+                    "filename": filename,
+                    "mime_type": mime_type,
+                    "purpose": purpose,
+                },
+                upload_create_params.UploadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    def cancel(
+        self,
+        upload_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Cancels the Upload.
+
+        No Parts may be added after an Upload is cancelled.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return self._post(
+            f"/uploads/{upload_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    def complete(
+        self,
+        upload_id: str,
+        *,
+        part_ids: List[str],
+        md5: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Completes the
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object).
+
+        Within the returned Upload object, there is a nested
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        is ready to use in the rest of the platform.
+
+        You can specify the order of the Parts by passing in an ordered list of the Part
+        IDs.
+
+        The number of bytes uploaded upon completion must match the number of bytes
+        initially specified when creating the Upload object. No Parts may be added after
+        an Upload is completed.
+
+        Args:
+          part_ids: The ordered list of Part IDs.
+
+          md5: The optional md5 checksum for the file contents to verify if the bytes uploaded
+              matches what you expect.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return self._post(
+            f"/uploads/{upload_id}/complete",
+            body=maybe_transform(
+                {
+                    "part_ids": part_ids,
+                    "md5": md5,
+                },
+                upload_complete_params.UploadCompleteParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+
+class AsyncUploads(AsyncAPIResource):
+    @cached_property
+    def parts(self) -> AsyncParts:
+        return AsyncParts(self._client)
+
+    @cached_property
+    def with_raw_response(self) -> AsyncUploadsWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
+        return AsyncUploadsWithRawResponse(self)
+
+    @cached_property
+    def with_streaming_response(self) -> AsyncUploadsWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
+        return AsyncUploadsWithStreamingResponse(self)
+
+    async def create(
+        self,
+        *,
+        bytes: int,
+        filename: str,
+        mime_type: str,
+        purpose: FilePurpose,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Creates an intermediate
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object
+        that you can add
+        [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to.
+        Currently, an Upload can accept at most 8 GB in total and expires after an hour
+        after you create it.
+
+        Once you complete the Upload, we will create a
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        contains all the parts you uploaded. This File is usable in the rest of our
+        platform as a regular File object.
+
+        For certain `purpose` values, the correct `mime_type` must be specified. Please
+        refer to documentation for the
+        [supported MIME types for your use case](https://platform.openai.com/docs/assistants/tools/file-search#supported-files).
+
+        For guidance on the proper filename extensions for each purpose, please follow
+        the documentation on
+        [creating a File](https://platform.openai.com/docs/api-reference/files/create).
+
+        Args:
+          bytes: The number of bytes in the file you are uploading.
+
+          filename: The name of the file to upload.
+
+          mime_type: The MIME type of the file.
+
+              This must fall within the supported MIME types for your file purpose. See the
+              supported MIME types for assistants and vision.
+
+          purpose: The intended purpose of the uploaded file.
+
+              See the
+              [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/uploads",
+            body=await async_maybe_transform(
+                {
+                    "bytes": bytes,
+                    "filename": filename,
+                    "mime_type": mime_type,
+                    "purpose": purpose,
+                },
+                upload_create_params.UploadCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    async def cancel(
+        self,
+        upload_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """Cancels the Upload.
+
+        No Parts may be added after an Upload is cancelled.
+
+        Args:
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return await self._post(
+            f"/uploads/{upload_id}/cancel",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+    async def complete(
+        self,
+        upload_id: str,
+        *,
+        part_ids: List[str],
+        md5: str | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> Upload:
+        """
+        Completes the
+        [Upload](https://platform.openai.com/docs/api-reference/uploads/object).
+
+        Within the returned Upload object, there is a nested
+        [File](https://platform.openai.com/docs/api-reference/files/object) object that
+        is ready to use in the rest of the platform.
+
+        You can specify the order of the Parts by passing in an ordered list of the Part
+        IDs.
+
+        The number of bytes uploaded upon completion must match the number of bytes
+        initially specified when creating the Upload object. No Parts may be added after
+        an Upload is completed.
+
+        Args:
+          part_ids: The ordered list of Part IDs.
+
+          md5: The optional md5 checksum for the file contents to verify if the bytes uploaded
+              matches what you expect.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not upload_id:
+            raise ValueError(f"Expected a non-empty value for `upload_id` but received {upload_id!r}")
+        return await self._post(
+            f"/uploads/{upload_id}/complete",
+            body=await async_maybe_transform(
+                {
+                    "part_ids": part_ids,
+                    "md5": md5,
+                },
+                upload_complete_params.UploadCompleteParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=Upload,
+        )
+
+
+class UploadsWithRawResponse:
+    def __init__(self, uploads: Uploads) -> None:
+        self._uploads = uploads
+
+        self.create = _legacy_response.to_raw_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = _legacy_response.to_raw_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = _legacy_response.to_raw_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> PartsWithRawResponse:
+        return PartsWithRawResponse(self._uploads.parts)
+
+
+class AsyncUploadsWithRawResponse:
+    def __init__(self, uploads: AsyncUploads) -> None:
+        self._uploads = uploads
+
+        self.create = _legacy_response.async_to_raw_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = _legacy_response.async_to_raw_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = _legacy_response.async_to_raw_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> AsyncPartsWithRawResponse:
+        return AsyncPartsWithRawResponse(self._uploads.parts)
+
+
+class UploadsWithStreamingResponse:
+    def __init__(self, uploads: Uploads) -> None:
+        self._uploads = uploads
+
+        self.create = to_streamed_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = to_streamed_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = to_streamed_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> PartsWithStreamingResponse:
+        return PartsWithStreamingResponse(self._uploads.parts)
+
+
+class AsyncUploadsWithStreamingResponse:
+    def __init__(self, uploads: AsyncUploads) -> None:
+        self._uploads = uploads
+
+        self.create = async_to_streamed_response_wrapper(
+            uploads.create,
+        )
+        self.cancel = async_to_streamed_response_wrapper(
+            uploads.cancel,
+        )
+        self.complete = async_to_streamed_response_wrapper(
+            uploads.complete,
+        )
+
+    @cached_property
+    def parts(self) -> AsyncPartsWithStreamingResponse:
+        return AsyncPartsWithStreamingResponse(self._uploads.parts)
diff --git a/src/openai/resources/beta/vector_stores/__init__.py b/src/openai/resources/vector_stores/__init__.py
similarity index 100%
rename from src/openai/resources/beta/vector_stores/__init__.py
rename to src/openai/resources/vector_stores/__init__.py
diff --git a/src/openai/resources/beta/vector_stores/file_batches.py b/src/openai/resources/vector_stores/file_batches.py
similarity index 67%
rename from src/openai/resources/beta/vector_stores/file_batches.py
rename to src/openai/resources/vector_stores/file_batches.py
index f1ced51700..a400d30a3e 100644
--- a/src/openai/resources/beta/vector_stores/file_batches.py
+++ b/src/openai/resources/vector_stores/file_batches.py
@@ -2,33 +2,27 @@
 
 from __future__ import annotations
 
-import asyncio
-from typing import List, Iterable
+from typing import Dict, List, Union, Optional
 from typing_extensions import Literal
-from concurrent.futures import Future, ThreadPoolExecutor, as_completed
 
 import httpx
-import sniffio
 
-from .... import _legacy_response
-from ....types import FileObject
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ...._utils import (
-    is_given,
+from ... import _legacy_response
+from ...types import FileChunkingStrategyParam
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
     maybe_transform,
     async_maybe_transform,
 )
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from ....types.beta.vector_stores import file_batch_create_params, file_batch_list_files_params
-from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
-from ....types.beta.vector_stores.vector_store_file_batch import VectorStoreFileBatch
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_stores import file_batch_create_params, file_batch_list_files_params
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_stores.vector_store_file import VectorStoreFile
+from ...types.vector_stores.vector_store_file_batch import VectorStoreFileBatch
 
 __all__ = ["FileBatches", "AsyncFileBatches"]
 
@@ -36,10 +30,21 @@
 class FileBatches(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> FileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FileBatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FileBatchesWithStreamingResponse(self)
 
     def create(
@@ -47,6 +52,8 @@ def create(
         vector_store_id: str,
         *,
         file_ids: List[str],
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -62,6 +69,15 @@ def create(
               the vector store should use. Useful for tools like `file_search` that can access
               files.
 
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -75,7 +91,14 @@ def create(
         extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/vector_stores/{vector_store_id}/file_batches",
-            body=maybe_transform({"file_ids": file_ids}, file_batch_create_params.FileBatchCreateParams),
+            body=maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -158,25 +181,6 @@ def cancel(
             cast_to=VectorStoreFileBatch,
         )
 
-    def create_and_poll(
-        self,
-        vector_store_id: str,
-        *,
-        file_ids: List[str],
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Create a vector store batch and poll until all files have been processed."""
-        batch = self.create(
-            vector_store_id=vector_store_id,
-            file_ids=file_ids,
-        )
-        # TODO: don't poll unless necessary??
-        return self.poll(
-            batch.id,
-            vector_store_id=vector_store_id,
-            poll_interval_ms=poll_interval_ms,
-        )
-
     def list_files(
         self,
         batch_id: str,
@@ -205,8 +209,8 @@ def list_files(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
 
@@ -251,99 +255,25 @@ def list_files(
             model=VectorStoreFile,
         )
 
-    def poll(
-        self,
-        batch_id: str,
-        *,
-        vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Wait for the given file batch to be processed.
-
-        Note: this will return even if one of the files failed to process, you need to
-        check batch.file_counts.failed_count to handle this case.
-        """
-        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
-        if is_given(poll_interval_ms):
-            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        while True:
-            response = self.with_raw_response.retrieve(
-                batch_id,
-                vector_store_id=vector_store_id,
-                extra_headers=headers,
-            )
-
-            batch = response.parse()
-            if batch.file_counts.in_progress > 0:
-                if not is_given(poll_interval_ms):
-                    from_header = response.headers.get("openai-poll-after-ms")
-                    if from_header is not None:
-                        poll_interval_ms = int(from_header)
-                    else:
-                        poll_interval_ms = 1000
-
-                self._sleep(poll_interval_ms / 1000)
-                continue
-
-            return batch
-
-    def upload_and_poll(
-        self,
-        vector_store_id: str,
-        *,
-        files: Iterable[FileTypes],
-        max_concurrency: int = 5,
-        file_ids: List[str] = [],
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Uploads the given files concurrently and then creates a vector store file batch.
-
-        If you've already uploaded certain files that you want to include in this batch
-        then you can pass their IDs through the `file_ids` argument.
-
-        By default, if any file upload fails then an exception will be eagerly raised.
-
-        The number of concurrency uploads is configurable using the `max_concurrency`
-        parameter.
-
-        Note: this method only supports `asyncio` or `trio` as the backing async
-        runtime.
-        """
-        results: list[FileObject] = []
-
-        with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
-            futures: list[Future[FileObject]] = [
-                executor.submit(
-                    self._client.files.create,
-                    file=file,
-                    purpose="assistants",
-                )
-                for file in files
-            ]
-
-        for future in as_completed(futures):
-            exc = future.exception()
-            if exc:
-                raise exc
-
-            results.append(future.result())
-
-        batch = self.create_and_poll(
-            vector_store_id=vector_store_id,
-            file_ids=[*file_ids, *(f.id for f in results)],
-            poll_interval_ms=poll_interval_ms,
-        )
-        return batch
-
 
 class AsyncFileBatches(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncFileBatchesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFileBatchesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFileBatchesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFileBatchesWithStreamingResponse(self)
 
     async def create(
@@ -351,6 +281,8 @@ async def create(
         vector_store_id: str,
         *,
         file_ids: List[str],
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -366,6 +298,15 @@ async def create(
               the vector store should use. Useful for tools like `file_search` that can access
               files.
 
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -379,7 +320,14 @@ async def create(
         extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             f"/vector_stores/{vector_store_id}/file_batches",
-            body=await async_maybe_transform({"file_ids": file_ids}, file_batch_create_params.FileBatchCreateParams),
+            body=await async_maybe_transform(
+                {
+                    "file_ids": file_ids,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_batch_create_params.FileBatchCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -462,25 +410,6 @@ async def cancel(
             cast_to=VectorStoreFileBatch,
         )
 
-    async def create_and_poll(
-        self,
-        vector_store_id: str,
-        *,
-        file_ids: List[str],
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Create a vector store batch and poll until all files have been processed."""
-        batch = await self.create(
-            vector_store_id=vector_store_id,
-            file_ids=file_ids,
-        )
-        # TODO: don't poll unless necessary??
-        return await self.poll(
-            batch.id,
-            vector_store_id=vector_store_id,
-            poll_interval_ms=poll_interval_ms,
-        )
-
     def list_files(
         self,
         batch_id: str,
@@ -509,8 +438,8 @@ def list_files(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
 
@@ -555,114 +484,6 @@ def list_files(
             model=VectorStoreFile,
         )
 
-    async def poll(
-        self,
-        batch_id: str,
-        *,
-        vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Wait for the given file batch to be processed.
-
-        Note: this will return even if one of the files failed to process, you need to
-        check batch.file_counts.failed_count to handle this case.
-        """
-        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
-        if is_given(poll_interval_ms):
-            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        while True:
-            response = await self.with_raw_response.retrieve(
-                batch_id,
-                vector_store_id=vector_store_id,
-                extra_headers=headers,
-            )
-
-            batch = response.parse()
-            if batch.file_counts.in_progress > 0:
-                if not is_given(poll_interval_ms):
-                    from_header = response.headers.get("openai-poll-after-ms")
-                    if from_header is not None:
-                        poll_interval_ms = int(from_header)
-                    else:
-                        poll_interval_ms = 1000
-
-                await self._sleep(poll_interval_ms / 1000)
-                continue
-
-            return batch
-
-    async def upload_and_poll(
-        self,
-        vector_store_id: str,
-        *,
-        files: Iterable[FileTypes],
-        max_concurrency: int = 5,
-        file_ids: List[str] = [],
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFileBatch:
-        """Uploads the given files concurrently and then creates a vector store file batch.
-
-        If you've already uploaded certain files that you want to include in this batch
-        then you can pass their IDs through the `file_ids` argument.
-
-        By default, if any file upload fails then an exception will be eagerly raised.
-
-        The number of concurrency uploads is configurable using the `max_concurrency`
-        parameter.
-
-        Note: this method only supports `asyncio` or `trio` as the backing async
-        runtime.
-        """
-        uploaded_files: list[FileObject] = []
-
-        async_library = sniffio.current_async_library()
-
-        if async_library == "asyncio":
-
-            async def asyncio_upload_file(semaphore: asyncio.Semaphore, file: FileTypes) -> None:
-                async with semaphore:
-                    file_obj = await self._client.files.create(
-                        file=file,
-                        purpose="assistants",
-                    )
-                    uploaded_files.append(file_obj)
-
-            semaphore = asyncio.Semaphore(max_concurrency)
-
-            tasks = [asyncio_upload_file(semaphore, file) for file in files]
-
-            await asyncio.gather(*tasks)
-        elif async_library == "trio":
-            # We only import if the library is being used.
-            # We support Python 3.7 so are using an older version of trio that does not have type information
-            import trio  # type: ignore # pyright: ignore[reportMissingTypeStubs]
-
-            async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> None:
-                async with limiter:
-                    file_obj = await self._client.files.create(
-                        file=file,
-                        purpose="assistants",
-                    )
-                    uploaded_files.append(file_obj)
-
-            limiter = trio.CapacityLimiter(max_concurrency)
-
-            async with trio.open_nursery() as nursery:
-                for file in files:
-                    nursery.start_soon(trio_upload_file, limiter, file)  # pyright: ignore [reportUnknownMemberType]
-        else:
-            raise RuntimeError(
-                f"Async runtime {async_library} is not supported yet. Only asyncio or trio is supported",
-            )
-
-        batch = await self.create_and_poll(
-            vector_store_id=vector_store_id,
-            file_ids=[*file_ids, *(f.id for f in uploaded_files)],
-            poll_interval_ms=poll_interval_ms,
-        )
-        return batch
-
 
 class FileBatchesWithRawResponse:
     def __init__(self, file_batches: FileBatches) -> None:
diff --git a/src/openai/resources/beta/vector_stores/files.py b/src/openai/resources/vector_stores/files.py
similarity index 61%
rename from src/openai/resources/beta/vector_stores/files.py
rename to src/openai/resources/vector_stores/files.py
index 5c3db27619..1435e72fd9 100644
--- a/src/openai/resources/beta/vector_stores/files.py
+++ b/src/openai/resources/vector_stores/files.py
@@ -2,29 +2,28 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
-from typing_extensions import Literal, assert_never
+from typing import Dict, Union, Optional
+from typing_extensions import Literal
 
 import httpx
 
-from .... import _legacy_response
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ...._utils import (
-    is_given,
+from ... import _legacy_response
+from ...types import FileChunkingStrategyParam
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
     maybe_transform,
     async_maybe_transform,
 )
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from ....types.beta.vector_stores import file_list_params, file_create_params
-from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
-from ....types.beta.vector_stores.vector_store_file_deleted import VectorStoreFileDeleted
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_stores import file_list_params, file_create_params, file_update_params
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_stores.vector_store_file import VectorStoreFile
+from ...types.vector_stores.file_content_response import FileContentResponse
+from ...types.vector_stores.vector_store_file_deleted import VectorStoreFileDeleted
 
 __all__ = ["Files", "AsyncFiles"]
 
@@ -32,10 +31,21 @@
 class Files(SyncAPIResource):
     @cached_property
     def with_raw_response(self) -> FilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return FilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> FilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return FilesWithStreamingResponse(self)
 
     def create(
@@ -43,6 +53,8 @@ def create(
         vector_store_id: str,
         *,
         file_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -60,6 +72,15 @@ def create(
               vector store should use. Useful for tools like `file_search` that can access
               files.
 
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -73,7 +94,14 @@ def create(
         extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return self._post(
             f"/vector_stores/{vector_store_id}/files",
-            body=maybe_transform({"file_id": file_id}, file_create_params.FileCreateParams),
+            body=maybe_transform(
+                {
+                    "file_id": file_id,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -117,6 +145,51 @@ def retrieve(
             cast_to=VectorStoreFile,
         )
 
+    def update(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Update attributes on a vector store file.
+
+        Args:
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._post(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            body=maybe_transform({"attributes": attributes}, file_update_params.FileUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
     def list(
         self,
         vector_store_id: str,
@@ -144,8 +217,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
 
@@ -229,100 +302,63 @@ def delete(
             cast_to=VectorStoreFileDeleted,
         )
 
-    def create_and_poll(
+    def content(
         self,
         file_id: str,
         *,
         vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Attach a file to the given vector store and wait for it to be processed."""
-        self.create(vector_store_id=vector_store_id, file_id=file_id)
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[FileContentResponse]:
+        """
+        Retrieve the parsed contents of a vector store file.
 
-        return self.poll(
-            file_id,
-            vector_store_id=vector_store_id,
-            poll_interval_ms=poll_interval_ms,
-        )
+        Args:
+          extra_headers: Send extra headers
 
-    def poll(
-        self,
-        file_id: str,
-        *,
-        vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Wait for the vector store file to finish processing.
+          extra_query: Add additional query parameters to the request
 
-        Note: this will return even if the file failed to process, you need to check
-        file.last_error and file.status to handle these cases
-        """
-        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
-        if is_given(poll_interval_ms):
-            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        while True:
-            response = self.with_raw_response.retrieve(
-                file_id,
-                vector_store_id=vector_store_id,
-                extra_headers=headers,
-            )
-
-            file = response.parse()
-            if file.status == "in_progress":
-                if not is_given(poll_interval_ms):
-                    from_header = response.headers.get("openai-poll-after-ms")
-                    if from_header is not None:
-                        poll_interval_ms = int(from_header)
-                    else:
-                        poll_interval_ms = 1000
-
-                self._sleep(poll_interval_ms / 1000)
-            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
-                return file
-            else:
-                if TYPE_CHECKING:  # type: ignore[unreachable]
-                    assert_never(file.status)
-                else:
-                    return file
-
-    def upload(
-        self,
-        *,
-        vector_store_id: str,
-        file: FileTypes,
-    ) -> VectorStoreFile:
-        """Upload a file to the `files` API and then attach it to the given vector store.
+          extra_body: Add additional JSON properties to the request
 
-        Note the file will be asynchronously processed (you can use the alternative
-        polling helper method to wait for processing to complete).
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        file_obj = self._client.files.create(file=file, purpose="assistants")
-        return self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
-
-    def upload_and_poll(
-        self,
-        *,
-        vector_store_id: str,
-        file: FileTypes,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Add a file to a vector store and poll until processing is complete."""
-        file_obj = self._client.files.create(file=file, purpose="assistants")
-        return self.create_and_poll(
-            vector_store_id=vector_store_id,
-            file_id=file_obj.id,
-            poll_interval_ms=poll_interval_ms,
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files/{file_id}/content",
+            page=SyncPage[FileContentResponse],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=FileContentResponse,
         )
 
 
 class AsyncFiles(AsyncAPIResource):
     @cached_property
     def with_raw_response(self) -> AsyncFilesWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncFilesWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncFilesWithStreamingResponse(self)
 
     async def create(
@@ -330,6 +366,8 @@ async def create(
         vector_store_id: str,
         *,
         file_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]] | NotGiven = NOT_GIVEN,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
         extra_headers: Headers | None = None,
@@ -347,6 +385,15 @@ async def create(
               vector store should use. Useful for tools like `file_search` that can access
               files.
 
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
           extra_headers: Send extra headers
 
           extra_query: Add additional query parameters to the request
@@ -360,7 +407,14 @@ async def create(
         extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
         return await self._post(
             f"/vector_stores/{vector_store_id}/files",
-            body=await async_maybe_transform({"file_id": file_id}, file_create_params.FileCreateParams),
+            body=await async_maybe_transform(
+                {
+                    "file_id": file_id,
+                    "attributes": attributes,
+                    "chunking_strategy": chunking_strategy,
+                },
+                file_create_params.FileCreateParams,
+            ),
             options=make_request_options(
                 extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
@@ -404,6 +458,51 @@ async def retrieve(
             cast_to=VectorStoreFile,
         )
 
+    async def update(
+        self,
+        file_id: str,
+        *,
+        vector_store_id: str,
+        attributes: Optional[Dict[str, Union[str, float, bool]]],
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> VectorStoreFile:
+        """
+        Update attributes on a vector store file.
+
+        Args:
+          attributes: Set of 16 key-value pairs that can be attached to an object. This can be useful
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard. Keys are strings with a maximum
+              length of 64 characters. Values are strings with a maximum length of 512
+              characters, booleans, or numbers.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return await self._post(
+            f"/vector_stores/{vector_store_id}/files/{file_id}",
+            body=await async_maybe_transform({"attributes": attributes}, file_update_params.FileUpdateParams),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=VectorStoreFile,
+        )
+
     def list(
         self,
         vector_store_id: str,
@@ -431,8 +530,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
 
@@ -516,90 +615,42 @@ async def delete(
             cast_to=VectorStoreFileDeleted,
         )
 
-    async def create_and_poll(
+    def content(
         self,
         file_id: str,
         *,
         vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Attach a file to the given vector store and wait for it to be processed."""
-        await self.create(vector_store_id=vector_store_id, file_id=file_id)
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[FileContentResponse, AsyncPage[FileContentResponse]]:
+        """
+        Retrieve the parsed contents of a vector store file.
 
-        return await self.poll(
-            file_id,
-            vector_store_id=vector_store_id,
-            poll_interval_ms=poll_interval_ms,
-        )
+        Args:
+          extra_headers: Send extra headers
 
-    async def poll(
-        self,
-        file_id: str,
-        *,
-        vector_store_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Wait for the vector store file to finish processing.
+          extra_query: Add additional query parameters to the request
 
-        Note: this will return even if the file failed to process, you need to check
-        file.last_error and file.status to handle these cases
-        """
-        headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
-        if is_given(poll_interval_ms):
-            headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        while True:
-            response = await self.with_raw_response.retrieve(
-                file_id,
-                vector_store_id=vector_store_id,
-                extra_headers=headers,
-            )
-
-            file = response.parse()
-            if file.status == "in_progress":
-                if not is_given(poll_interval_ms):
-                    from_header = response.headers.get("openai-poll-after-ms")
-                    if from_header is not None:
-                        poll_interval_ms = int(from_header)
-                    else:
-                        poll_interval_ms = 1000
-
-                await self._sleep(poll_interval_ms / 1000)
-            elif file.status == "cancelled" or file.status == "completed" or file.status == "failed":
-                return file
-            else:
-                if TYPE_CHECKING:  # type: ignore[unreachable]
-                    assert_never(file.status)
-                else:
-                    return file
-
-    async def upload(
-        self,
-        *,
-        vector_store_id: str,
-        file: FileTypes,
-    ) -> VectorStoreFile:
-        """Upload a file to the `files` API and then attach it to the given vector store.
+          extra_body: Add additional JSON properties to the request
 
-        Note the file will be asynchronously processed (you can use the alternative
-        polling helper method to wait for processing to complete).
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        file_obj = await self._client.files.create(file=file, purpose="assistants")
-        return await self.create(vector_store_id=vector_store_id, file_id=file_obj.id)
-
-    async def upload_and_poll(
-        self,
-        *,
-        vector_store_id: str,
-        file: FileTypes,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> VectorStoreFile:
-        """Add a file to a vector store and poll until processing is complete."""
-        file_obj = await self._client.files.create(file=file, purpose="assistants")
-        return await self.create_and_poll(
-            vector_store_id=vector_store_id,
-            file_id=file_obj.id,
-            poll_interval_ms=poll_interval_ms,
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        if not file_id:
+            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/files/{file_id}/content",
+            page=AsyncPage[FileContentResponse],
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=FileContentResponse,
         )
 
 
@@ -613,12 +664,18 @@ def __init__(self, files: Files) -> None:
         self.retrieve = _legacy_response.to_raw_response_wrapper(
             files.retrieve,
         )
+        self.update = _legacy_response.to_raw_response_wrapper(
+            files.update,
+        )
         self.list = _legacy_response.to_raw_response_wrapper(
             files.list,
         )
         self.delete = _legacy_response.to_raw_response_wrapper(
             files.delete,
         )
+        self.content = _legacy_response.to_raw_response_wrapper(
+            files.content,
+        )
 
 
 class AsyncFilesWithRawResponse:
@@ -631,12 +688,18 @@ def __init__(self, files: AsyncFiles) -> None:
         self.retrieve = _legacy_response.async_to_raw_response_wrapper(
             files.retrieve,
         )
+        self.update = _legacy_response.async_to_raw_response_wrapper(
+            files.update,
+        )
         self.list = _legacy_response.async_to_raw_response_wrapper(
             files.list,
         )
         self.delete = _legacy_response.async_to_raw_response_wrapper(
             files.delete,
         )
+        self.content = _legacy_response.async_to_raw_response_wrapper(
+            files.content,
+        )
 
 
 class FilesWithStreamingResponse:
@@ -649,12 +712,18 @@ def __init__(self, files: Files) -> None:
         self.retrieve = to_streamed_response_wrapper(
             files.retrieve,
         )
+        self.update = to_streamed_response_wrapper(
+            files.update,
+        )
         self.list = to_streamed_response_wrapper(
             files.list,
         )
         self.delete = to_streamed_response_wrapper(
             files.delete,
         )
+        self.content = to_streamed_response_wrapper(
+            files.content,
+        )
 
 
 class AsyncFilesWithStreamingResponse:
@@ -667,9 +736,15 @@ def __init__(self, files: AsyncFiles) -> None:
         self.retrieve = async_to_streamed_response_wrapper(
             files.retrieve,
         )
+        self.update = async_to_streamed_response_wrapper(
+            files.update,
+        )
         self.list = async_to_streamed_response_wrapper(
             files.list,
         )
         self.delete = async_to_streamed_response_wrapper(
             files.delete,
         )
+        self.content = async_to_streamed_response_wrapper(
+            files.content,
+        )
diff --git a/src/openai/resources/beta/vector_stores/vector_stores.py b/src/openai/resources/vector_stores/vector_stores.py
similarity index 71%
rename from src/openai/resources/beta/vector_stores/vector_stores.py
rename to src/openai/resources/vector_stores/vector_stores.py
index 8a177c2864..aaa6ed2757 100644
--- a/src/openai/resources/beta/vector_stores/vector_stores.py
+++ b/src/openai/resources/vector_stores/vector_stores.py
@@ -2,12 +2,12 @@
 
 from __future__ import annotations
 
-from typing import List, Optional
+from typing import List, Union, Optional
 from typing_extensions import Literal
 
 import httpx
 
-from .... import _legacy_response
+from ... import _legacy_response
 from .files import (
     Files,
     AsyncFiles,
@@ -16,14 +16,22 @@
     FilesWithStreamingResponse,
     AsyncFilesWithStreamingResponse,
 )
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
+from ...types import (
+    FileChunkingStrategyParam,
+    vector_store_list_params,
+    vector_store_create_params,
+    vector_store_search_params,
+    vector_store_update_params,
+)
+from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
+from ..._utils import (
     maybe_transform,
     async_maybe_transform,
 )
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ..._compat import cached_property
+from ..._resource import SyncAPIResource, AsyncAPIResource
+from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
+from ...pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
 from .file_batches import (
     FileBatches,
     AsyncFileBatches,
@@ -32,14 +40,12 @@
     FileBatchesWithStreamingResponse,
     AsyncFileBatchesWithStreamingResponse,
 )
-from ....pagination import SyncCursorPage, AsyncCursorPage
-from ....types.beta import vector_store_list_params, vector_store_create_params, vector_store_update_params
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from ....types.beta.vector_store import VectorStore
-from ....types.beta.vector_store_deleted import VectorStoreDeleted
+from ..._base_client import AsyncPaginator, make_request_options
+from ...types.vector_store import VectorStore
+from ...types.vector_store_deleted import VectorStoreDeleted
+from ...types.shared_params.metadata import Metadata
+from ...types.file_chunking_strategy_param import FileChunkingStrategyParam
+from ...types.vector_store_search_response import VectorStoreSearchResponse
 
 __all__ = ["VectorStores", "AsyncVectorStores"]
 
@@ -55,18 +61,30 @@ def file_batches(self) -> FileBatches:
 
     @cached_property
     def with_raw_response(self) -> VectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return VectorStoresWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> VectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return VectorStoresWithStreamingResponse(self)
 
     def create(
         self,
         *,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
         file_ids: List[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         name: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -79,6 +97,9 @@ def create(
         Create a vector store.
 
         Args:
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
           expires_after: The expiration policy for a vector store.
 
           file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
@@ -86,9 +107,11 @@ def create(
               files.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the vector store.
 
@@ -105,6 +128,7 @@ def create(
             "/vector_stores",
             body=maybe_transform(
                 {
+                    "chunking_strategy": chunking_strategy,
                     "expires_after": expires_after,
                     "file_ids": file_ids,
                     "metadata": metadata,
@@ -157,7 +181,7 @@ def update(
         vector_store_id: str,
         *,
         expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -173,9 +197,11 @@ def update(
           expires_after: The expiration policy for a vector store.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the vector store.
 
@@ -232,8 +258,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -305,6 +331,69 @@ def delete(
             cast_to=VectorStoreDeleted,
         )
 
+    def search(
+        self,
+        vector_store_id: str,
+        *,
+        query: Union[str, List[str]],
+        filters: vector_store_search_params.Filters | NotGiven = NOT_GIVEN,
+        max_num_results: int | NotGiven = NOT_GIVEN,
+        ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
+        rewrite_query: bool | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> SyncPage[VectorStoreSearchResponse]:
+        """
+        Search a vector store for relevant chunks based on a query and file attributes
+        filter.
+
+        Args:
+          query: A query string for a search
+
+          filters: A filter to apply based on file attributes.
+
+          max_num_results: The maximum number of results to return. This number should be between 1 and 50
+              inclusive.
+
+          ranking_options: Ranking options for search.
+
+          rewrite_query: Whether to rewrite the natural language query for vector search.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/search",
+            page=SyncPage[VectorStoreSearchResponse],
+            body=maybe_transform(
+                {
+                    "query": query,
+                    "filters": filters,
+                    "max_num_results": max_num_results,
+                    "ranking_options": ranking_options,
+                    "rewrite_query": rewrite_query,
+                },
+                vector_store_search_params.VectorStoreSearchParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=VectorStoreSearchResponse,
+            method="post",
+        )
+
 
 class AsyncVectorStores(AsyncAPIResource):
     @cached_property
@@ -317,18 +406,30 @@ def file_batches(self) -> AsyncFileBatches:
 
     @cached_property
     def with_raw_response(self) -> AsyncVectorStoresWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+
+        For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
+        """
         return AsyncVectorStoresWithRawResponse(self)
 
     @cached_property
     def with_streaming_response(self) -> AsyncVectorStoresWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
+
+        For more information, see https://www.github.com/openai/openai-python#with_streaming_response
+        """
         return AsyncVectorStoresWithStreamingResponse(self)
 
     async def create(
         self,
         *,
+        chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
         expires_after: vector_store_create_params.ExpiresAfter | NotGiven = NOT_GIVEN,
         file_ids: List[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         name: str | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -341,6 +442,9 @@ async def create(
         Create a vector store.
 
         Args:
+          chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
+              strategy. Only applicable if `file_ids` is non-empty.
+
           expires_after: The expiration policy for a vector store.
 
           file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
@@ -348,9 +452,11 @@ async def create(
               files.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the vector store.
 
@@ -367,6 +473,7 @@ async def create(
             "/vector_stores",
             body=await async_maybe_transform(
                 {
+                    "chunking_strategy": chunking_strategy,
                     "expires_after": expires_after,
                     "file_ids": file_ids,
                     "metadata": metadata,
@@ -419,7 +526,7 @@ async def update(
         vector_store_id: str,
         *,
         expires_after: Optional[vector_store_update_params.ExpiresAfter] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
+        metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
         name: Optional[str] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
         # The extra values given here take precedence over values defined on the client or passed to this method.
@@ -435,9 +542,11 @@ async def update(
           expires_after: The expiration policy for a vector store.
 
           metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
+              for storing additional information about the object in a structured format, and
+              querying for objects via API or the dashboard.
+
+              Keys are strings with a maximum length of 64 characters. Values are strings with
+              a maximum length of 512 characters.
 
           name: The name of the vector store.
 
@@ -494,8 +603,8 @@ def list(
 
           before: A cursor for use in pagination. `before` is an object ID that defines your place
               in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
+              starting with obj_foo, your subsequent call can include before=obj_foo in order
+              to fetch the previous page of the list.
 
           limit: A limit on the number of objects to be returned. Limit can range between 1 and
               100, and the default is 20.
@@ -567,6 +676,69 @@ async def delete(
             cast_to=VectorStoreDeleted,
         )
 
+    def search(
+        self,
+        vector_store_id: str,
+        *,
+        query: Union[str, List[str]],
+        filters: vector_store_search_params.Filters | NotGiven = NOT_GIVEN,
+        max_num_results: int | NotGiven = NOT_GIVEN,
+        ranking_options: vector_store_search_params.RankingOptions | NotGiven = NOT_GIVEN,
+        rewrite_query: bool | NotGiven = NOT_GIVEN,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
+    ) -> AsyncPaginator[VectorStoreSearchResponse, AsyncPage[VectorStoreSearchResponse]]:
+        """
+        Search a vector store for relevant chunks based on a query and file attributes
+        filter.
+
+        Args:
+          query: A query string for a search
+
+          filters: A filter to apply based on file attributes.
+
+          max_num_results: The maximum number of results to return. This number should be between 1 and 50
+              inclusive.
+
+          ranking_options: Ranking options for search.
+
+          rewrite_query: Whether to rewrite the natural language query for vector search.
+
+          extra_headers: Send extra headers
+
+          extra_query: Add additional query parameters to the request
+
+          extra_body: Add additional JSON properties to the request
+
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not vector_store_id:
+            raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
+        extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
+        return self._get_api_list(
+            f"/vector_stores/{vector_store_id}/search",
+            page=AsyncPage[VectorStoreSearchResponse],
+            body=maybe_transform(
+                {
+                    "query": query,
+                    "filters": filters,
+                    "max_num_results": max_num_results,
+                    "ranking_options": ranking_options,
+                    "rewrite_query": rewrite_query,
+                },
+                vector_store_search_params.VectorStoreSearchParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            model=VectorStoreSearchResponse,
+            method="post",
+        )
+
 
 class VectorStoresWithRawResponse:
     def __init__(self, vector_stores: VectorStores) -> None:
@@ -587,6 +759,9 @@ def __init__(self, vector_stores: VectorStores) -> None:
         self.delete = _legacy_response.to_raw_response_wrapper(
             vector_stores.delete,
         )
+        self.search = _legacy_response.to_raw_response_wrapper(
+            vector_stores.search,
+        )
 
     @cached_property
     def files(self) -> FilesWithRawResponse:
@@ -616,6 +791,9 @@ def __init__(self, vector_stores: AsyncVectorStores) -> None:
         self.delete = _legacy_response.async_to_raw_response_wrapper(
             vector_stores.delete,
         )
+        self.search = _legacy_response.async_to_raw_response_wrapper(
+            vector_stores.search,
+        )
 
     @cached_property
     def files(self) -> AsyncFilesWithRawResponse:
@@ -645,6 +823,9 @@ def __init__(self, vector_stores: VectorStores) -> None:
         self.delete = to_streamed_response_wrapper(
             vector_stores.delete,
         )
+        self.search = to_streamed_response_wrapper(
+            vector_stores.search,
+        )
 
     @cached_property
     def files(self) -> FilesWithStreamingResponse:
@@ -674,6 +855,9 @@ def __init__(self, vector_stores: AsyncVectorStores) -> None:
         self.delete = async_to_streamed_response_wrapper(
             vector_stores.delete,
         )
+        self.search = async_to_streamed_response_wrapper(
+            vector_stores.search,
+        )
 
     @cached_property
     def files(self) -> AsyncFilesWithStreamingResponse:
diff --git a/src/openai/types/__init__.py b/src/openai/types/__init__.py
index 7873efb34f..4c337d41c7 100644
--- a/src/openai/types/__init__.py
+++ b/src/openai/types/__init__.py
@@ -6,32 +6,71 @@
 from .image import Image as Image
 from .model import Model as Model
 from .shared import (
+    Metadata as Metadata,
+    ChatModel as ChatModel,
+    Reasoning as Reasoning,
     ErrorObject as ErrorObject,
+    CompoundFilter as CompoundFilter,
+    ReasoningEffort as ReasoningEffort,
+    ComparisonFilter as ComparisonFilter,
     FunctionDefinition as FunctionDefinition,
     FunctionParameters as FunctionParameters,
+    ResponseFormatText as ResponseFormatText,
+    ResponseFormatJSONObject as ResponseFormatJSONObject,
+    ResponseFormatJSONSchema as ResponseFormatJSONSchema,
 )
+from .upload import Upload as Upload
 from .embedding import Embedding as Embedding
 from .chat_model import ChatModel as ChatModel
 from .completion import Completion as Completion
 from .moderation import Moderation as Moderation
+from .audio_model import AudioModel as AudioModel
 from .batch_error import BatchError as BatchError
 from .file_object import FileObject as FileObject
+from .image_model import ImageModel as ImageModel
 from .file_content import FileContent as FileContent
 from .file_deleted import FileDeleted as FileDeleted
+from .file_purpose import FilePurpose as FilePurpose
+from .vector_store import VectorStore as VectorStore
 from .model_deleted import ModelDeleted as ModelDeleted
+from .embedding_model import EmbeddingModel as EmbeddingModel
 from .images_response import ImagesResponse as ImagesResponse
 from .completion_usage import CompletionUsage as CompletionUsage
 from .file_list_params import FileListParams as FileListParams
+from .moderation_model import ModerationModel as ModerationModel
 from .batch_list_params import BatchListParams as BatchListParams
 from .completion_choice import CompletionChoice as CompletionChoice
 from .image_edit_params import ImageEditParams as ImageEditParams
 from .file_create_params import FileCreateParams as FileCreateParams
 from .batch_create_params import BatchCreateParams as BatchCreateParams
 from .batch_request_counts import BatchRequestCounts as BatchRequestCounts
+from .upload_create_params import UploadCreateParams as UploadCreateParams
+from .vector_store_deleted import VectorStoreDeleted as VectorStoreDeleted
+from .audio_response_format import AudioResponseFormat as AudioResponseFormat
 from .image_generate_params import ImageGenerateParams as ImageGenerateParams
+from .file_chunking_strategy import FileChunkingStrategy as FileChunkingStrategy
+from .upload_complete_params import UploadCompleteParams as UploadCompleteParams
 from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
 from .moderation_create_params import ModerationCreateParams as ModerationCreateParams
+from .vector_store_list_params import VectorStoreListParams as VectorStoreListParams
 from .create_embedding_response import CreateEmbeddingResponse as CreateEmbeddingResponse
 from .moderation_create_response import ModerationCreateResponse as ModerationCreateResponse
+from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCreateParams
+from .vector_store_search_params import VectorStoreSearchParams as VectorStoreSearchParams
+from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
+from .moderation_text_input_param import ModerationTextInputParam as ModerationTextInputParam
+from .file_chunking_strategy_param import FileChunkingStrategyParam as FileChunkingStrategyParam
+from .vector_store_search_response import VectorStoreSearchResponse as VectorStoreSearchResponse
+from .websocket_connection_options import WebsocketConnectionOptions as WebsocketConnectionOptions
 from .image_create_variation_params import ImageCreateVariationParams as ImageCreateVariationParams
+from .static_file_chunking_strategy import StaticFileChunkingStrategy as StaticFileChunkingStrategy
+from .moderation_image_url_input_param import ModerationImageURLInputParam as ModerationImageURLInputParam
+from .auto_file_chunking_strategy_param import AutoFileChunkingStrategyParam as AutoFileChunkingStrategyParam
+from .moderation_multi_modal_input_param import ModerationMultiModalInputParam as ModerationMultiModalInputParam
+from .other_file_chunking_strategy_object import OtherFileChunkingStrategyObject as OtherFileChunkingStrategyObject
+from .static_file_chunking_strategy_param import StaticFileChunkingStrategyParam as StaticFileChunkingStrategyParam
+from .static_file_chunking_strategy_object import StaticFileChunkingStrategyObject as StaticFileChunkingStrategyObject
+from .static_file_chunking_strategy_object_param import (
+    StaticFileChunkingStrategyObjectParam as StaticFileChunkingStrategyObjectParam,
+)
diff --git a/src/openai/types/audio/__init__.py b/src/openai/types/audio/__init__.py
index 8d2c44c86a..822e0f3a8d 100644
--- a/src/openai/types/audio/__init__.py
+++ b/src/openai/types/audio/__init__.py
@@ -3,7 +3,14 @@
 from __future__ import annotations
 
 from .translation import Translation as Translation
+from .speech_model import SpeechModel as SpeechModel
 from .transcription import Transcription as Transcription
+from .transcription_word import TranscriptionWord as TranscriptionWord
+from .translation_verbose import TranslationVerbose as TranslationVerbose
 from .speech_create_params import SpeechCreateParams as SpeechCreateParams
+from .transcription_segment import TranscriptionSegment as TranscriptionSegment
+from .transcription_verbose import TranscriptionVerbose as TranscriptionVerbose
 from .translation_create_params import TranslationCreateParams as TranslationCreateParams
 from .transcription_create_params import TranscriptionCreateParams as TranscriptionCreateParams
+from .translation_create_response import TranslationCreateResponse as TranslationCreateResponse
+from .transcription_create_response import TranscriptionCreateResponse as TranscriptionCreateResponse
diff --git a/src/openai/types/audio/speech_create_params.py b/src/openai/types/audio/speech_create_params.py
index 8d75ec4ccc..ed1a1ce748 100644
--- a/src/openai/types/audio/speech_create_params.py
+++ b/src/openai/types/audio/speech_create_params.py
@@ -5,6 +5,8 @@
 from typing import Union
 from typing_extensions import Literal, Required, TypedDict
 
+from .speech_model import SpeechModel
+
 __all__ = ["SpeechCreateParams"]
 
 
@@ -12,18 +14,18 @@ class SpeechCreateParams(TypedDict, total=False):
     input: Required[str]
     """The text to generate audio for. The maximum length is 4096 characters."""
 
-    model: Required[Union[str, Literal["tts-1", "tts-1-hd"]]]
+    model: Required[Union[str, SpeechModel]]
     """
-    One of the available [TTS models](https://platform.openai.com/docs/models/tts):
+    One of the available [TTS models](https://platform.openai.com/docs/models#tts):
     `tts-1` or `tts-1-hd`
     """
 
-    voice: Required[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]]
+    voice: Required[Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]]
     """The voice to use when generating the audio.
 
-    Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
-    Previews of the voices are available in the
-    [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
+    Supported voices are `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova`,
+    `sage` and `shimmer`. Previews of the voices are available in the
+    [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
     """
 
     response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]
diff --git a/src/openai/types/audio/speech_model.py b/src/openai/types/audio/speech_model.py
new file mode 100644
index 0000000000..bd685ab34d
--- /dev/null
+++ b/src/openai/types/audio/speech_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["SpeechModel"]
+
+SpeechModel: TypeAlias = Literal["tts-1", "tts-1-hd"]
diff --git a/src/openai/types/audio/transcription.py b/src/openai/types/audio/transcription.py
index 0b6ab39e78..edb5f227fc 100644
--- a/src/openai/types/audio/transcription.py
+++ b/src/openai/types/audio/transcription.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from ..._models import BaseModel
 
 __all__ = ["Transcription"]
diff --git a/src/openai/types/audio/transcription_create_params.py b/src/openai/types/audio/transcription_create_params.py
index 6b2d5bae79..f1779c35e6 100644
--- a/src/openai/types/audio/transcription_create_params.py
+++ b/src/openai/types/audio/transcription_create_params.py
@@ -6,6 +6,8 @@
 from typing_extensions import Literal, Required, TypedDict
 
 from ..._types import FileTypes
+from ..audio_model import AudioModel
+from ..audio_response_format import AudioResponseFormat
 
 __all__ = ["TranscriptionCreateParams"]
 
@@ -17,7 +19,7 @@ class TranscriptionCreateParams(TypedDict, total=False):
     flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
     """
 
-    model: Required[Union[str, Literal["whisper-1"]]]
+    model: Required[Union[str, AudioModel]]
     """ID of the model to use.
 
     Only `whisper-1` (which is powered by our open source Whisper V2 model) is
@@ -28,22 +30,22 @@ class TranscriptionCreateParams(TypedDict, total=False):
     """The language of the input audio.
 
     Supplying the input language in
-    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-    improve accuracy and latency.
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
     """
 
     prompt: str
     """An optional text to guide the model's style or continue a previous audio
     segment.
 
-    The [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
     should match the audio language.
     """
 
-    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"]
+    response_format: AudioResponseFormat
     """
-    The format of the transcript output, in one of these options: `json`, `text`,
-    `srt`, `verbose_json`, or `vtt`.
+    The format of the output, in one of these options: `json`, `text`, `srt`,
+    `verbose_json`, or `vtt`.
     """
 
     temperature: float
diff --git a/src/openai/types/audio/transcription_create_response.py b/src/openai/types/audio/transcription_create_response.py
new file mode 100644
index 0000000000..2f7bed8114
--- /dev/null
+++ b/src/openai/types/audio/transcription_create_response.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .transcription import Transcription
+from .transcription_verbose import TranscriptionVerbose
+
+__all__ = ["TranscriptionCreateResponse"]
+
+TranscriptionCreateResponse: TypeAlias = Union[Transcription, TranscriptionVerbose]
diff --git a/src/openai/types/audio/transcription_segment.py b/src/openai/types/audio/transcription_segment.py
new file mode 100644
index 0000000000..522c401ebb
--- /dev/null
+++ b/src/openai/types/audio/transcription_segment.py
@@ -0,0 +1,49 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionSegment"]
+
+
+class TranscriptionSegment(BaseModel):
+    id: int
+    """Unique identifier of the segment."""
+
+    avg_logprob: float
+    """Average logprob of the segment.
+
+    If the value is lower than -1, consider the logprobs failed.
+    """
+
+    compression_ratio: float
+    """Compression ratio of the segment.
+
+    If the value is greater than 2.4, consider the compression failed.
+    """
+
+    end: float
+    """End time of the segment in seconds."""
+
+    no_speech_prob: float
+    """Probability of no speech in the segment.
+
+    If the value is higher than 1.0 and the `avg_logprob` is below -1, consider this
+    segment silent.
+    """
+
+    seek: int
+    """Seek offset of the segment."""
+
+    start: float
+    """Start time of the segment in seconds."""
+
+    temperature: float
+    """Temperature parameter used for generating the segment."""
+
+    text: str
+    """Text content of the segment."""
+
+    tokens: List[int]
+    """Array of token IDs for the text content."""
diff --git a/src/openai/types/audio/transcription_verbose.py b/src/openai/types/audio/transcription_verbose.py
new file mode 100644
index 0000000000..2a670189e0
--- /dev/null
+++ b/src/openai/types/audio/transcription_verbose.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+from .transcription_word import TranscriptionWord
+from .transcription_segment import TranscriptionSegment
+
+__all__ = ["TranscriptionVerbose"]
+
+
+class TranscriptionVerbose(BaseModel):
+    duration: float
+    """The duration of the input audio."""
+
+    language: str
+    """The language of the input audio."""
+
+    text: str
+    """The transcribed text."""
+
+    segments: Optional[List[TranscriptionSegment]] = None
+    """Segments of the transcribed text and their corresponding details."""
+
+    words: Optional[List[TranscriptionWord]] = None
+    """Extracted words and their corresponding timestamps."""
diff --git a/src/openai/types/audio/transcription_word.py b/src/openai/types/audio/transcription_word.py
new file mode 100644
index 0000000000..969da32509
--- /dev/null
+++ b/src/openai/types/audio/transcription_word.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["TranscriptionWord"]
+
+
+class TranscriptionWord(BaseModel):
+    end: float
+    """End time of the word in seconds."""
+
+    start: float
+    """Start time of the word in seconds."""
+
+    word: str
+    """The text content of the word."""
diff --git a/src/openai/types/audio/translation.py b/src/openai/types/audio/translation.py
index 3d9ede2939..7c0e905189 100644
--- a/src/openai/types/audio/translation.py
+++ b/src/openai/types/audio/translation.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from ..._models import BaseModel
 
 __all__ = ["Translation"]
diff --git a/src/openai/types/audio/translation_create_params.py b/src/openai/types/audio/translation_create_params.py
index f23a41ed5c..62f85b8757 100644
--- a/src/openai/types/audio/translation_create_params.py
+++ b/src/openai/types/audio/translation_create_params.py
@@ -3,9 +3,11 @@
 from __future__ import annotations
 
 from typing import Union
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Required, TypedDict
 
 from ..._types import FileTypes
+from ..audio_model import AudioModel
+from ..audio_response_format import AudioResponseFormat
 
 __all__ = ["TranslationCreateParams"]
 
@@ -17,7 +19,7 @@ class TranslationCreateParams(TypedDict, total=False):
     mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
     """
 
-    model: Required[Union[str, Literal["whisper-1"]]]
+    model: Required[Union[str, AudioModel]]
     """ID of the model to use.
 
     Only `whisper-1` (which is powered by our open source Whisper V2 model) is
@@ -28,14 +30,14 @@ class TranslationCreateParams(TypedDict, total=False):
     """An optional text to guide the model's style or continue a previous audio
     segment.
 
-    The [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
     should be in English.
     """
 
-    response_format: str
+    response_format: AudioResponseFormat
     """
-    The format of the transcript output, in one of these options: `json`, `text`,
-    `srt`, `verbose_json`, or `vtt`.
+    The format of the output, in one of these options: `json`, `text`, `srt`,
+    `verbose_json`, or `vtt`.
     """
 
     temperature: float
diff --git a/src/openai/types/audio/translation_create_response.py b/src/openai/types/audio/translation_create_response.py
new file mode 100644
index 0000000000..9953813c08
--- /dev/null
+++ b/src/openai/types/audio/translation_create_response.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .translation import Translation
+from .translation_verbose import TranslationVerbose
+
+__all__ = ["TranslationCreateResponse"]
+
+TranslationCreateResponse: TypeAlias = Union[Translation, TranslationVerbose]
diff --git a/src/openai/types/audio/translation_verbose.py b/src/openai/types/audio/translation_verbose.py
new file mode 100644
index 0000000000..27cb02d64f
--- /dev/null
+++ b/src/openai/types/audio/translation_verbose.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+
+from ..._models import BaseModel
+from .transcription_segment import TranscriptionSegment
+
+__all__ = ["TranslationVerbose"]
+
+
+class TranslationVerbose(BaseModel):
+    duration: float
+    """The duration of the input audio."""
+
+    language: str
+    """The language of the output translation (always `english`)."""
+
+    text: str
+    """The translated text."""
+
+    segments: Optional[List[TranscriptionSegment]] = None
+    """Segments of the translated text and their corresponding details."""
diff --git a/src/openai/types/audio_model.py b/src/openai/types/audio_model.py
new file mode 100644
index 0000000000..94ae84c015
--- /dev/null
+++ b/src/openai/types/audio_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["AudioModel"]
+
+AudioModel: TypeAlias = Literal["whisper-1"]
diff --git a/src/openai/types/audio_response_format.py b/src/openai/types/audio_response_format.py
new file mode 100644
index 0000000000..f8c8d45945
--- /dev/null
+++ b/src/openai/types/audio_response_format.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["AudioResponseFormat"]
+
+AudioResponseFormat: TypeAlias = Literal["json", "text", "srt", "verbose_json", "vtt"]
diff --git a/src/openai/types/auto_file_chunking_strategy_param.py b/src/openai/types/auto_file_chunking_strategy_param.py
new file mode 100644
index 0000000000..6f17836bac
--- /dev/null
+++ b/src/openai/types/auto_file_chunking_strategy_param.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["AutoFileChunkingStrategyParam"]
+
+
+class AutoFileChunkingStrategyParam(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
diff --git a/src/openai/types/batch.py b/src/openai/types/batch.py
index 90f6d79572..35de90ac85 100644
--- a/src/openai/types/batch.py
+++ b/src/openai/types/batch.py
@@ -1,11 +1,11 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-import builtins
 from typing import List, Optional
 from typing_extensions import Literal
 
 from .._models import BaseModel
 from .batch_error import BatchError
+from .shared.metadata import Metadata
 from .batch_request_counts import BatchRequestCounts
 
 __all__ = ["Batch", "Errors"]
@@ -70,12 +70,14 @@ class Batch(BaseModel):
     in_progress_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the batch started processing."""
 
-    metadata: Optional[builtins.object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     output_file_id: Optional[str] = None
diff --git a/src/openai/types/batch_create_params.py b/src/openai/types/batch_create_params.py
index 140380d417..cc95afd3ba 100644
--- a/src/openai/types/batch_create_params.py
+++ b/src/openai/types/batch_create_params.py
@@ -2,9 +2,11 @@
 
 from __future__ import annotations
 
-from typing import Dict, Optional
+from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .shared_params.metadata import Metadata
+
 __all__ = ["BatchCreateParams"]
 
 
@@ -15,12 +17,13 @@ class BatchCreateParams(TypedDict, total=False):
     Currently only `24h` is supported.
     """
 
-    endpoint: Required[Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"]]
+    endpoint: Required[Literal["/v1/responses", "/v1/chat/completions", "/v1/embeddings", "/v1/completions"]]
     """The endpoint to be used for all requests in the batch.
 
-    Currently `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are
-    supported. Note that `/v1/embeddings` batches are also restricted to a maximum
-    of 50,000 embedding inputs across all requests in the batch.
+    Currently `/v1/responses`, `/v1/chat/completions`, `/v1/embeddings`, and
+    `/v1/completions` are supported. Note that `/v1/embeddings` batches are also
+    restricted to a maximum of 50,000 embedding inputs across all requests in the
+    batch.
     """
 
     input_file_id: Required[str]
@@ -30,10 +33,17 @@ class BatchCreateParams(TypedDict, total=False):
     for how to upload a file.
 
     Your input file must be formatted as a
-    [JSONL file](https://platform.openai.com/docs/api-reference/batch/requestInput),
+    [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input),
     and must be uploaded with the purpose `batch`. The file can contain up to 50,000
-    requests, and can be up to 100 MB in size.
+    requests, and can be up to 200 MB in size.
     """
 
-    metadata: Optional[Dict[str, str]]
-    """Optional custom metadata for the batch."""
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
diff --git a/src/openai/types/batch_request_counts.py b/src/openai/types/batch_request_counts.py
index ef6c84a0a1..7e1d49fb88 100644
--- a/src/openai/types/batch_request_counts.py
+++ b/src/openai/types/batch_request_counts.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from .._models import BaseModel
 
 __all__ = ["BatchRequestCounts"]
diff --git a/src/openai/types/beta/__init__.py b/src/openai/types/beta/__init__.py
index d851a3619c..5ba3eadf3c 100644
--- a/src/openai/types/beta/__init__.py
+++ b/src/openai/types/beta/__init__.py
@@ -4,7 +4,6 @@
 
 from .thread import Thread as Thread
 from .assistant import Assistant as Assistant
-from .vector_store import VectorStore as VectorStore
 from .function_tool import FunctionTool as FunctionTool
 from .assistant_tool import AssistantTool as AssistantTool
 from .thread_deleted import ThreadDeleted as ThreadDeleted
@@ -14,7 +13,6 @@
 from .assistant_tool_param import AssistantToolParam as AssistantToolParam
 from .thread_create_params import ThreadCreateParams as ThreadCreateParams
 from .thread_update_params import ThreadUpdateParams as ThreadUpdateParams
-from .vector_store_deleted import VectorStoreDeleted as VectorStoreDeleted
 from .assistant_list_params import AssistantListParams as AssistantListParams
 from .assistant_tool_choice import AssistantToolChoice as AssistantToolChoice
 from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
@@ -22,16 +20,11 @@
 from .file_search_tool_param import FileSearchToolParam as FileSearchToolParam
 from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
 from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
-from .vector_store_list_params import VectorStoreListParams as VectorStoreListParams
-from .assistant_response_format import AssistantResponseFormat as AssistantResponseFormat
-from .vector_store_create_params import VectorStoreCreateParams as VectorStoreCreateParams
-from .vector_store_update_params import VectorStoreUpdateParams as VectorStoreUpdateParams
 from .assistant_tool_choice_param import AssistantToolChoiceParam as AssistantToolChoiceParam
 from .code_interpreter_tool_param import CodeInterpreterToolParam as CodeInterpreterToolParam
 from .assistant_tool_choice_option import AssistantToolChoiceOption as AssistantToolChoiceOption
 from .thread_create_and_run_params import ThreadCreateAndRunParams as ThreadCreateAndRunParams
 from .assistant_tool_choice_function import AssistantToolChoiceFunction as AssistantToolChoiceFunction
-from .assistant_response_format_param import AssistantResponseFormatParam as AssistantResponseFormatParam
 from .assistant_response_format_option import AssistantResponseFormatOption as AssistantResponseFormatOption
 from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam as AssistantToolChoiceOptionParam
 from .assistant_tool_choice_function_param import AssistantToolChoiceFunctionParam as AssistantToolChoiceFunctionParam
diff --git a/src/openai/types/beta/assistant.py b/src/openai/types/beta/assistant.py
index 4e5adc766e..58421e0f66 100644
--- a/src/openai/types/beta/assistant.py
+++ b/src/openai/types/beta/assistant.py
@@ -5,6 +5,7 @@
 
 from ..._models import BaseModel
 from .assistant_tool import AssistantTool
+from ..shared.metadata import Metadata
 from .assistant_response_format_option import AssistantResponseFormatOption
 
 __all__ = ["Assistant", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
@@ -51,12 +52,14 @@ class Assistant(BaseModel):
     The maximum length is 256,000 characters.
     """
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     model: str
@@ -65,8 +68,8 @@ class Assistant(BaseModel):
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
     name: Optional[str] = None
@@ -85,11 +88,16 @@ class Assistant(BaseModel):
     response_format: Optional[AssistantResponseFormatOption] = None
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/assistant_create_params.py b/src/openai/types/beta/assistant_create_params.py
index 67e7f7e78c..8b3c331850 100644
--- a/src/openai/types/beta/assistant_create_params.py
+++ b/src/openai/types/beta/assistant_create_params.py
@@ -3,9 +3,12 @@
 from __future__ import annotations
 
 from typing import List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
+from ..shared.chat_model import ChatModel
 from .assistant_tool_param import AssistantToolParam
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
 from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = [
@@ -14,44 +17,22 @@
     "ToolResourcesCodeInterpreter",
     "ToolResourcesFileSearch",
     "ToolResourcesFileSearchVectorStore",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategy",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
 ]
 
 
 class AssistantCreateParams(TypedDict, total=False):
-    model: Required[
-        Union[
-            str,
-            Literal[
-                "gpt-4o",
-                "gpt-4o-2024-05-13",
-                "gpt-4-turbo",
-                "gpt-4-turbo-2024-04-09",
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ]
-    ]
+    model: Required[Union[str, ChatModel]]
     """ID of the model to use.
 
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
     description: Optional[str]
@@ -63,25 +44,41 @@ class AssistantCreateParams(TypedDict, total=False):
     The maximum length is 256,000 characters.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     name: Optional[str]
     """The name of the assistant. The maximum length is 256 characters."""
 
+    reasoning_effort: Optional[ReasoningEffort]
+    """**o-series models only**
+
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
+    """
+
     response_format: Optional[AssistantResponseFormatOptionParam]
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -134,7 +131,45 @@ class ToolResourcesCodeInterpreter(TypedDict, total=False):
     """
 
 
+class ToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
+
+
+ToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
+    ToolResourcesFileSearchVectorStoreChunkingStrategyAuto, ToolResourcesFileSearchVectorStoreChunkingStrategyStatic
+]
+
+
 class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    chunking_strategy: ToolResourcesFileSearchVectorStoreChunkingStrategy
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy.
+    """
+
     file_ids: List[str]
     """
     A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
@@ -142,12 +177,14 @@ class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
     store.
     """
 
-    metadata: object
-    """Set of 16 key-value pairs that can be attached to a vector store.
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
 
-    This can be useful for storing additional information about the vector store in
-    a structured format. Keys can be a maximum of 64 characters long and values can
-    be a maxium of 512 characters long.
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
diff --git a/src/openai/types/beta/assistant_list_params.py b/src/openai/types/beta/assistant_list_params.py
index f54f63120b..834ffbcaf8 100644
--- a/src/openai/types/beta/assistant_list_params.py
+++ b/src/openai/types/beta/assistant_list_params.py
@@ -21,7 +21,7 @@ class AssistantListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/beta/assistant_response_format.py b/src/openai/types/beta/assistant_response_format.py
deleted file mode 100644
index f53bdaf62a..0000000000
--- a/src/openai/types/beta/assistant_response_format.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["AssistantResponseFormat"]
-
-
-class AssistantResponseFormat(BaseModel):
-    type: Optional[Literal["text", "json_object"]] = None
-    """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_response_format_option.py b/src/openai/types/beta/assistant_response_format_option.py
index d4e05e0ea9..6f06a3442f 100644
--- a/src/openai/types/beta/assistant_response_format_option.py
+++ b/src/openai/types/beta/assistant_response_format_option.py
@@ -1,10 +1,14 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
-from .assistant_response_format import AssistantResponseFormat
+from ..shared.response_format_text import ResponseFormatText
+from ..shared.response_format_json_object import ResponseFormatJSONObject
+from ..shared.response_format_json_schema import ResponseFormatJSONSchema
 
 __all__ = ["AssistantResponseFormatOption"]
 
-AssistantResponseFormatOption = Union[Literal["none", "auto"], AssistantResponseFormat]
+AssistantResponseFormatOption: TypeAlias = Union[
+    Literal["auto"], ResponseFormatText, ResponseFormatJSONObject, ResponseFormatJSONSchema
+]
diff --git a/src/openai/types/beta/assistant_response_format_option_param.py b/src/openai/types/beta/assistant_response_format_option_param.py
index 46e04125d1..5e724a4d98 100644
--- a/src/openai/types/beta/assistant_response_format_option_param.py
+++ b/src/openai/types/beta/assistant_response_format_option_param.py
@@ -3,10 +3,14 @@
 from __future__ import annotations
 
 from typing import Union
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
-from .assistant_response_format_param import AssistantResponseFormatParam
+from ..shared_params.response_format_text import ResponseFormatText
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from ..shared_params.response_format_json_schema import ResponseFormatJSONSchema
 
 __all__ = ["AssistantResponseFormatOptionParam"]
 
-AssistantResponseFormatOptionParam = Union[Literal["none", "auto"], AssistantResponseFormatParam]
+AssistantResponseFormatOptionParam: TypeAlias = Union[
+    Literal["auto"], ResponseFormatText, ResponseFormatJSONObject, ResponseFormatJSONSchema
+]
diff --git a/src/openai/types/beta/assistant_response_format_param.py b/src/openai/types/beta/assistant_response_format_param.py
deleted file mode 100644
index 96e1d02115..0000000000
--- a/src/openai/types/beta/assistant_response_format_param.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, TypedDict
-
-__all__ = ["AssistantResponseFormatParam"]
-
-
-class AssistantResponseFormatParam(TypedDict, total=False):
-    type: Literal["text", "json_object"]
-    """Must be one of `text` or `json_object`."""
diff --git a/src/openai/types/beta/assistant_stream_event.py b/src/openai/types/beta/assistant_stream_event.py
index 91925e93b3..41d3a0c5ea 100644
--- a/src/openai/types/beta/assistant_stream_event.py
+++ b/src/openai/types/beta/assistant_stream_event.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Union
-from typing_extensions import Literal, Annotated
+from typing import Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
 
 from .thread import Thread
 from ..._utils import PropertyInfo
@@ -21,6 +21,7 @@
     "ThreadRunInProgress",
     "ThreadRunRequiresAction",
     "ThreadRunCompleted",
+    "ThreadRunIncomplete",
     "ThreadRunFailed",
     "ThreadRunCancelling",
     "ThreadRunCancelled",
@@ -50,6 +51,9 @@ class ThreadCreated(BaseModel):
 
     event: Literal["thread.created"]
 
+    enabled: Optional[bool] = None
+    """Whether to enable input audio transcription."""
+
 
 class ThreadRunCreated(BaseModel):
     data: Run
@@ -101,6 +105,16 @@ class ThreadRunCompleted(BaseModel):
     event: Literal["thread.run.completed"]
 
 
+class ThreadRunIncomplete(BaseModel):
+    data: Run
+    """
+    Represents an execution run on a
+    [thread](https://platform.openai.com/docs/api-reference/threads).
+    """
+
+    event: Literal["thread.run.incomplete"]
+
+
 class ThreadRunFailed(BaseModel):
     data: Run
     """
@@ -249,7 +263,7 @@ class ErrorEvent(BaseModel):
     event: Literal["error"]
 
 
-AssistantStreamEvent = Annotated[
+AssistantStreamEvent: TypeAlias = Annotated[
     Union[
         ThreadCreated,
         ThreadRunCreated,
@@ -257,6 +271,7 @@ class ErrorEvent(BaseModel):
         ThreadRunInProgress,
         ThreadRunRequiresAction,
         ThreadRunCompleted,
+        ThreadRunIncomplete,
         ThreadRunFailed,
         ThreadRunCancelling,
         ThreadRunCancelled,
diff --git a/src/openai/types/beta/assistant_tool.py b/src/openai/types/beta/assistant_tool.py
index 7832da48cc..1bde6858b1 100644
--- a/src/openai/types/beta/assistant_tool.py
+++ b/src/openai/types/beta/assistant_tool.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ..._utils import PropertyInfo
 from .function_tool import FunctionTool
@@ -10,4 +10,6 @@
 
 __all__ = ["AssistantTool"]
 
-AssistantTool = Annotated[Union[CodeInterpreterTool, FileSearchTool, FunctionTool], PropertyInfo(discriminator="type")]
+AssistantTool: TypeAlias = Annotated[
+    Union[CodeInterpreterTool, FileSearchTool, FunctionTool], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/beta/assistant_tool_choice_function.py b/src/openai/types/beta/assistant_tool_choice_function.py
index d0d4255357..0c896d8087 100644
--- a/src/openai/types/beta/assistant_tool_choice_function.py
+++ b/src/openai/types/beta/assistant_tool_choice_function.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from ..._models import BaseModel
 
 __all__ = ["AssistantToolChoiceFunction"]
diff --git a/src/openai/types/beta/assistant_tool_choice_option.py b/src/openai/types/beta/assistant_tool_choice_option.py
index 8958bc8fb0..e57c3278fb 100644
--- a/src/openai/types/beta/assistant_tool_choice_option.py
+++ b/src/openai/types/beta/assistant_tool_choice_option.py
@@ -1,10 +1,10 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 from .assistant_tool_choice import AssistantToolChoice
 
 __all__ = ["AssistantToolChoiceOption"]
 
-AssistantToolChoiceOption = Union[Literal["none", "auto", "required"], AssistantToolChoice]
+AssistantToolChoiceOption: TypeAlias = Union[Literal["none", "auto", "required"], AssistantToolChoice]
diff --git a/src/openai/types/beta/assistant_tool_choice_option_param.py b/src/openai/types/beta/assistant_tool_choice_option_param.py
index 81b7f15136..cc0053d37e 100644
--- a/src/openai/types/beta/assistant_tool_choice_option_param.py
+++ b/src/openai/types/beta/assistant_tool_choice_option_param.py
@@ -3,10 +3,10 @@
 from __future__ import annotations
 
 from typing import Union
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 from .assistant_tool_choice_param import AssistantToolChoiceParam
 
 __all__ = ["AssistantToolChoiceOptionParam"]
 
-AssistantToolChoiceOptionParam = Union[Literal["none", "auto", "required"], AssistantToolChoiceParam]
+AssistantToolChoiceOptionParam: TypeAlias = Union[Literal["none", "auto", "required"], AssistantToolChoiceParam]
diff --git a/src/openai/types/beta/assistant_tool_param.py b/src/openai/types/beta/assistant_tool_param.py
index 5b1d30ba2f..321c4b1ddb 100644
--- a/src/openai/types/beta/assistant_tool_param.py
+++ b/src/openai/types/beta/assistant_tool_param.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from typing import Union
+from typing_extensions import TypeAlias
 
 from .function_tool_param import FunctionToolParam
 from .file_search_tool_param import FileSearchToolParam
@@ -10,4 +11,4 @@
 
 __all__ = ["AssistantToolParam"]
 
-AssistantToolParam = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
+AssistantToolParam: TypeAlias = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
diff --git a/src/openai/types/beta/assistant_update_params.py b/src/openai/types/beta/assistant_update_params.py
index b401e1a891..d3ec7614fd 100644
--- a/src/openai/types/beta/assistant_update_params.py
+++ b/src/openai/types/beta/assistant_update_params.py
@@ -2,10 +2,12 @@
 
 from __future__ import annotations
 
-from typing import List, Iterable, Optional
-from typing_extensions import TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, TypedDict
 
 from .assistant_tool_param import AssistantToolParam
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
 from .assistant_response_format_option_param import AssistantResponseFormatOptionParam
 
 __all__ = ["AssistantUpdateParams", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
@@ -21,35 +23,85 @@ class AssistantUpdateParams(TypedDict, total=False):
     The maximum length is 256,000 characters.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
-    model: str
+    model: Union[
+        str,
+        Literal[
+            "o3-mini",
+            "o3-mini-2025-01-31",
+            "o1",
+            "o1-2024-12-17",
+            "gpt-4o",
+            "gpt-4o-2024-11-20",
+            "gpt-4o-2024-08-06",
+            "gpt-4o-2024-05-13",
+            "gpt-4o-mini",
+            "gpt-4o-mini-2024-07-18",
+            "gpt-4.5-preview",
+            "gpt-4.5-preview-2025-02-27",
+            "gpt-4-turbo",
+            "gpt-4-turbo-2024-04-09",
+            "gpt-4-0125-preview",
+            "gpt-4-turbo-preview",
+            "gpt-4-1106-preview",
+            "gpt-4-vision-preview",
+            "gpt-4",
+            "gpt-4-0314",
+            "gpt-4-0613",
+            "gpt-4-32k",
+            "gpt-4-32k-0314",
+            "gpt-4-32k-0613",
+            "gpt-3.5-turbo",
+            "gpt-3.5-turbo-16k",
+            "gpt-3.5-turbo-0613",
+            "gpt-3.5-turbo-1106",
+            "gpt-3.5-turbo-0125",
+            "gpt-3.5-turbo-16k-0613",
+        ],
+    ]
     """ID of the model to use.
 
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
     name: Optional[str]
     """The name of the assistant. The maximum length is 256 characters."""
 
+    reasoning_effort: Optional[ReasoningEffort]
+    """**o-series models only**
+
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
+    """
+
     response_format: Optional[AssistantResponseFormatOptionParam]
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/file_search_tool.py b/src/openai/types/beta/file_search_tool.py
index eea55ea6ac..89fc16c04c 100644
--- a/src/openai/types/beta/file_search_tool.py
+++ b/src/openai/types/beta/file_search_tool.py
@@ -1,12 +1,55 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
 
-__all__ = ["FileSearchTool"]
+__all__ = ["FileSearchTool", "FileSearch", "FileSearchRankingOptions"]
+
+
+class FileSearchRankingOptions(BaseModel):
+    score_threshold: float
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    ranker: Optional[Literal["auto", "default_2024_08_21"]] = None
+    """The ranker to use for the file search.
+
+    If not specified will use the `auto` ranker.
+    """
+
+
+class FileSearch(BaseModel):
+    max_num_results: Optional[int] = None
+    """The maximum number of results the file search tool should output.
+
+    The default is 20 for `gpt-4*` models and 5 for `gpt-3.5-turbo`. This number
+    should be between 1 and 50 inclusive.
+
+    Note that the file search tool may output fewer than `max_num_results` results.
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
+    ranking_options: Optional[FileSearchRankingOptions] = None
+    """The ranking options for the file search.
+
+    If not specified, the file search tool will use the `auto` ranker and a
+    score_threshold of 0.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
 
 
 class FileSearchTool(BaseModel):
     type: Literal["file_search"]
     """The type of tool being defined: `file_search`"""
+
+    file_search: Optional[FileSearch] = None
+    """Overrides for the file search tool."""
diff --git a/src/openai/types/beta/file_search_tool_param.py b/src/openai/types/beta/file_search_tool_param.py
index d33fd06da4..c73d0af79d 100644
--- a/src/openai/types/beta/file_search_tool_param.py
+++ b/src/openai/types/beta/file_search_tool_param.py
@@ -4,9 +4,51 @@
 
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["FileSearchToolParam"]
+__all__ = ["FileSearchToolParam", "FileSearch", "FileSearchRankingOptions"]
+
+
+class FileSearchRankingOptions(TypedDict, total=False):
+    score_threshold: Required[float]
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    ranker: Literal["auto", "default_2024_08_21"]
+    """The ranker to use for the file search.
+
+    If not specified will use the `auto` ranker.
+    """
+
+
+class FileSearch(TypedDict, total=False):
+    max_num_results: int
+    """The maximum number of results the file search tool should output.
+
+    The default is 20 for `gpt-4*` models and 5 for `gpt-3.5-turbo`. This number
+    should be between 1 and 50 inclusive.
+
+    Note that the file search tool may output fewer than `max_num_results` results.
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
+    ranking_options: FileSearchRankingOptions
+    """The ranking options for the file search.
+
+    If not specified, the file search tool will use the `auto` ranker and a
+    score_threshold of 0.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
 
 
 class FileSearchToolParam(TypedDict, total=False):
     type: Required[Literal["file_search"]]
     """The type of tool being defined: `file_search`"""
+
+    file_search: FileSearch
+    """Overrides for the file search tool."""
diff --git a/src/openai/types/beta/function_tool_param.py b/src/openai/types/beta/function_tool_param.py
index b44c0d47ef..d906e02b88 100644
--- a/src/openai/types/beta/function_tool_param.py
+++ b/src/openai/types/beta/function_tool_param.py
@@ -4,13 +4,13 @@
 
 from typing_extensions import Literal, Required, TypedDict
 
-from ...types import shared_params
+from ..shared_params.function_definition import FunctionDefinition
 
 __all__ = ["FunctionToolParam"]
 
 
 class FunctionToolParam(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
+    function: Required[FunctionDefinition]
 
     type: Required[Literal["function"]]
     """The type of tool being defined: `function`"""
diff --git a/src/openai/types/beta/realtime/__init__.py b/src/openai/types/beta/realtime/__init__.py
new file mode 100644
index 0000000000..cd0616dcfa
--- /dev/null
+++ b/src/openai/types/beta/realtime/__init__.py
@@ -0,0 +1,84 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .session import Session as Session
+from .error_event import ErrorEvent as ErrorEvent
+from .conversation_item import ConversationItem as ConversationItem
+from .realtime_response import RealtimeResponse as RealtimeResponse
+from .response_done_event import ResponseDoneEvent as ResponseDoneEvent
+from .session_update_event import SessionUpdateEvent as SessionUpdateEvent
+from .realtime_client_event import RealtimeClientEvent as RealtimeClientEvent
+from .realtime_server_event import RealtimeServerEvent as RealtimeServerEvent
+from .response_cancel_event import ResponseCancelEvent as ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent as ResponseCreateEvent
+from .session_create_params import SessionCreateParams as SessionCreateParams
+from .session_created_event import SessionCreatedEvent as SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent as SessionUpdatedEvent
+from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
+from .conversation_item_param import ConversationItemParam as ConversationItemParam
+from .realtime_connect_params import RealtimeConnectParams as RealtimeConnectParams
+from .realtime_response_usage import RealtimeResponseUsage as RealtimeResponseUsage
+from .session_create_response import SessionCreateResponse as SessionCreateResponse
+from .realtime_response_status import RealtimeResponseStatus as RealtimeResponseStatus
+from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
+from .conversation_item_content import ConversationItemContent as ConversationItemContent
+from .rate_limits_updated_event import RateLimitsUpdatedEvent as RateLimitsUpdatedEvent
+from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent as ConversationCreatedEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent as ResponseAudioDeltaEvent
+from .session_update_event_param import SessionUpdateEventParam as SessionUpdateEventParam
+from .realtime_client_event_param import RealtimeClientEventParam as RealtimeClientEventParam
+from .response_cancel_event_param import ResponseCancelEventParam as ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam as ResponseCreateEventParam
+from .conversation_item_create_event import ConversationItemCreateEvent as ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent as ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent as InputAudioBufferClearEvent
+from .conversation_item_content_param import ConversationItemContentParam as ConversationItemContentParam
+from .conversation_item_created_event import ConversationItemCreatedEvent as ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent as ConversationItemDeletedEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent as InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent as InputAudioBufferCommitEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent as ResponseOutputItemDoneEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent as ConversationItemTruncateEvent
+from .conversation_item_with_reference import ConversationItemWithReference as ConversationItemWithReference
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent as InputAudioBufferClearedEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent as ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent as ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent as ConversationItemTruncatedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent as ResponseContentPartAddedEvent
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent as InputAudioBufferCommittedEvent
+from .conversation_item_create_event_param import ConversationItemCreateEventParam as ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam as ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam as InputAudioBufferClearEventParam
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam as InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam as InputAudioBufferCommitEventParam
+from .response_audio_transcript_delta_event import (
+    ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent,
+)
+from .conversation_item_truncate_event_param import (
+    ConversationItemTruncateEventParam as ConversationItemTruncateEventParam,
+)
+from .conversation_item_with_reference_param import (
+    ConversationItemWithReferenceParam as ConversationItemWithReferenceParam,
+)
+from .input_audio_buffer_speech_started_event import (
+    InputAudioBufferSpeechStartedEvent as InputAudioBufferSpeechStartedEvent,
+)
+from .input_audio_buffer_speech_stopped_event import (
+    InputAudioBufferSpeechStoppedEvent as InputAudioBufferSpeechStoppedEvent,
+)
+from .response_function_call_arguments_done_event import (
+    ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
+)
+from .response_function_call_arguments_delta_event import (
+    ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
+)
+from .conversation_item_input_audio_transcription_failed_event import (
+    ConversationItemInputAudioTranscriptionFailedEvent as ConversationItemInputAudioTranscriptionFailedEvent,
+)
+from .conversation_item_input_audio_transcription_completed_event import (
+    ConversationItemInputAudioTranscriptionCompletedEvent as ConversationItemInputAudioTranscriptionCompletedEvent,
+)
diff --git a/src/openai/types/beta/realtime/conversation_created_event.py b/src/openai/types/beta/realtime/conversation_created_event.py
new file mode 100644
index 0000000000..4ba0540867
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_created_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationCreatedEvent", "Conversation"]
+
+
+class Conversation(BaseModel):
+    id: Optional[str] = None
+    """The unique ID of the conversation."""
+
+    object: Optional[Literal["realtime.conversation"]] = None
+    """The object type, must be `realtime.conversation`."""
+
+
+class ConversationCreatedEvent(BaseModel):
+    conversation: Conversation
+    """The conversation resource."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["conversation.created"]
+    """The event type, must be `conversation.created`."""
diff --git a/src/openai/types/beta/realtime/conversation_item.py b/src/openai/types/beta/realtime/conversation_item.py
new file mode 100644
index 0000000000..4edf6c4d5f
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item.py
@@ -0,0 +1,61 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item_content import ConversationItemContent
+
+__all__ = ["ConversationItem"]
+
+
+class ConversationItem(BaseModel):
+    id: Optional[str] = None
+    """
+    The unique ID of the item, this can be generated by the client to help manage
+    server-side context, but is not required because the server will generate one if
+    not provided.
+    """
+
+    arguments: Optional[str] = None
+    """The arguments of the function call (for `function_call` items)."""
+
+    call_id: Optional[str] = None
+    """
+    The ID of the function call (for `function_call` and `function_call_output`
+    items). If passed on a `function_call_output` item, the server will check that a
+    `function_call` item with the same ID exists in the conversation history.
+    """
+
+    content: Optional[List[ConversationItemContent]] = None
+    """The content of the message, applicable for `message` items.
+
+    - Message items of role `system` support only `input_text` content
+    - Message items of role `user` support `input_text` and `input_audio` content
+    - Message items of role `assistant` support `text` content.
+    """
+
+    name: Optional[str] = None
+    """The name of the function being called (for `function_call` items)."""
+
+    object: Optional[Literal["realtime.item"]] = None
+    """Identifier for the API object being returned - always `realtime.item`."""
+
+    output: Optional[str] = None
+    """The output of the function call (for `function_call_output` items)."""
+
+    role: Optional[Literal["user", "assistant", "system"]] = None
+    """
+    The role of the message sender (`user`, `assistant`, `system`), only applicable
+    for `message` items.
+    """
+
+    status: Optional[Literal["completed", "incomplete"]] = None
+    """The status of the item (`completed`, `incomplete`).
+
+    These have no effect on the conversation, but are accepted for consistency with
+    the `conversation.item.created` event.
+    """
+
+    type: Optional[Literal["message", "function_call", "function_call_output"]] = None
+    """The type of the item (`message`, `function_call`, `function_call_output`)."""
diff --git a/src/openai/types/beta/realtime/conversation_item_content.py b/src/openai/types/beta/realtime/conversation_item_content.py
new file mode 100644
index 0000000000..ab40a4a1a7
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_content.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemContent"]
+
+
+class ConversationItemContent(BaseModel):
+    id: Optional[str] = None
+    """
+    ID of a previous conversation item to reference (for `item_reference` content
+    types in `response.create` events). These can reference both client and server
+    created items.
+    """
+
+    audio: Optional[str] = None
+    """Base64-encoded audio bytes, used for `input_audio` content type."""
+
+    text: Optional[str] = None
+    """The text content, used for `input_text` and `text` content types."""
+
+    transcript: Optional[str] = None
+    """The transcript of the audio, used for `input_audio` content type."""
+
+    type: Optional[Literal["input_text", "input_audio", "item_reference", "text"]] = None
+    """The content type (`input_text`, `input_audio`, `item_reference`, `text`)."""
diff --git a/src/openai/types/beta/realtime/conversation_item_content_param.py b/src/openai/types/beta/realtime/conversation_item_content_param.py
new file mode 100644
index 0000000000..7a3a92a39d
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_content_param.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["ConversationItemContentParam"]
+
+
+class ConversationItemContentParam(TypedDict, total=False):
+    id: str
+    """
+    ID of a previous conversation item to reference (for `item_reference` content
+    types in `response.create` events). These can reference both client and server
+    created items.
+    """
+
+    audio: str
+    """Base64-encoded audio bytes, used for `input_audio` content type."""
+
+    text: str
+    """The text content, used for `input_text` and `text` content types."""
+
+    transcript: str
+    """The transcript of the audio, used for `input_audio` content type."""
+
+    type: Literal["input_text", "input_audio", "item_reference", "text"]
+    """The content type (`input_text`, `input_audio`, `item_reference`, `text`)."""
diff --git a/src/openai/types/beta/realtime/conversation_item_create_event.py b/src/openai/types/beta/realtime/conversation_item_create_event.py
new file mode 100644
index 0000000000..f19d552a92
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_create_event.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreateEvent"]
+
+
+class ConversationItemCreateEvent(BaseModel):
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    type: Literal["conversation.item.create"]
+    """The event type, must be `conversation.item.create`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    previous_item_id: Optional[str] = None
+    """The ID of the preceding item after which the new item will be inserted.
+
+    If not set, the new item will be appended to the end of the conversation. If set
+    to `root`, the new item will be added to the beginning of the conversation. If
+    set to an existing ID, it allows an item to be inserted mid-conversation. If the
+    ID cannot be found, an error will be returned and the item will not be added.
+    """
diff --git a/src/openai/types/beta/realtime/conversation_item_create_event_param.py b/src/openai/types/beta/realtime/conversation_item_create_event_param.py
new file mode 100644
index 0000000000..693d0fd54d
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_create_event_param.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .conversation_item_param import ConversationItemParam
+
+__all__ = ["ConversationItemCreateEventParam"]
+
+
+class ConversationItemCreateEventParam(TypedDict, total=False):
+    item: Required[ConversationItemParam]
+    """The item to add to the conversation."""
+
+    type: Required[Literal["conversation.item.create"]]
+    """The event type, must be `conversation.item.create`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    previous_item_id: str
+    """The ID of the preceding item after which the new item will be inserted.
+
+    If not set, the new item will be appended to the end of the conversation. If set
+    to `root`, the new item will be added to the beginning of the conversation. If
+    set to an existing ID, it allows an item to be inserted mid-conversation. If the
+    ID cannot be found, an error will be returned and the item will not be added.
+    """
diff --git a/src/openai/types/beta/realtime/conversation_item_created_event.py b/src/openai/types/beta/realtime/conversation_item_created_event.py
new file mode 100644
index 0000000000..2f20388246
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_created_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ConversationItemCreatedEvent"]
+
+
+class ConversationItemCreatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    previous_item_id: str
+    """
+    The ID of the preceding item in the Conversation context, allows the client to
+    understand the order of the conversation.
+    """
+
+    type: Literal["conversation.item.created"]
+    """The event type, must be `conversation.item.created`."""
diff --git a/src/openai/types/beta/realtime/conversation_item_delete_event.py b/src/openai/types/beta/realtime/conversation_item_delete_event.py
new file mode 100644
index 0000000000..02ca8250ce
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_delete_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemDeleteEvent"]
+
+
+class ConversationItemDeleteEvent(BaseModel):
+    item_id: str
+    """The ID of the item to delete."""
+
+    type: Literal["conversation.item.delete"]
+    """The event type, must be `conversation.item.delete`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/conversation_item_delete_event_param.py b/src/openai/types/beta/realtime/conversation_item_delete_event_param.py
new file mode 100644
index 0000000000..c3f88d6627
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_delete_event_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemDeleteEventParam"]
+
+
+class ConversationItemDeleteEventParam(TypedDict, total=False):
+    item_id: Required[str]
+    """The ID of the item to delete."""
+
+    type: Required[Literal["conversation.item.delete"]]
+    """The event type, must be `conversation.item.delete`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/conversation_item_deleted_event.py b/src/openai/types/beta/realtime/conversation_item_deleted_event.py
new file mode 100644
index 0000000000..a35a97817a
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_deleted_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemDeletedEvent"]
+
+
+class ConversationItemDeletedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item that was deleted."""
+
+    type: Literal["conversation.item.deleted"]
+    """The event type, must be `conversation.item.deleted`."""
diff --git a/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_completed_event.py b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_completed_event.py
new file mode 100644
index 0000000000..ded79cc0f7
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_completed_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionCompletedEvent"]
+
+
+class ConversationItemInputAudioTranscriptionCompletedEvent(BaseModel):
+    content_index: int
+    """The index of the content part containing the audio."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item containing the audio."""
+
+    transcript: str
+    """The transcribed text."""
+
+    type: Literal["conversation.item.input_audio_transcription.completed"]
+    """
+    The event type, must be `conversation.item.input_audio_transcription.completed`.
+    """
diff --git a/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_failed_event.py b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_failed_event.py
new file mode 100644
index 0000000000..cecac93e64
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_input_audio_transcription_failed_event.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemInputAudioTranscriptionFailedEvent", "Error"]
+
+
+class Error(BaseModel):
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    message: Optional[str] = None
+    """A human-readable error message."""
+
+    param: Optional[str] = None
+    """Parameter related to the error, if any."""
+
+    type: Optional[str] = None
+    """The type of error."""
+
+
+class ConversationItemInputAudioTranscriptionFailedEvent(BaseModel):
+    content_index: int
+    """The index of the content part containing the audio."""
+
+    error: Error
+    """Details of the transcription error."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item."""
+
+    type: Literal["conversation.item.input_audio_transcription.failed"]
+    """The event type, must be `conversation.item.input_audio_transcription.failed`."""
diff --git a/src/openai/types/beta/realtime/conversation_item_param.py b/src/openai/types/beta/realtime/conversation_item_param.py
new file mode 100644
index 0000000000..ac0f8431e5
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_param.py
@@ -0,0 +1,62 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, TypedDict
+
+from .conversation_item_content_param import ConversationItemContentParam
+
+__all__ = ["ConversationItemParam"]
+
+
+class ConversationItemParam(TypedDict, total=False):
+    id: str
+    """
+    The unique ID of the item, this can be generated by the client to help manage
+    server-side context, but is not required because the server will generate one if
+    not provided.
+    """
+
+    arguments: str
+    """The arguments of the function call (for `function_call` items)."""
+
+    call_id: str
+    """
+    The ID of the function call (for `function_call` and `function_call_output`
+    items). If passed on a `function_call_output` item, the server will check that a
+    `function_call` item with the same ID exists in the conversation history.
+    """
+
+    content: Iterable[ConversationItemContentParam]
+    """The content of the message, applicable for `message` items.
+
+    - Message items of role `system` support only `input_text` content
+    - Message items of role `user` support `input_text` and `input_audio` content
+    - Message items of role `assistant` support `text` content.
+    """
+
+    name: str
+    """The name of the function being called (for `function_call` items)."""
+
+    object: Literal["realtime.item"]
+    """Identifier for the API object being returned - always `realtime.item`."""
+
+    output: str
+    """The output of the function call (for `function_call_output` items)."""
+
+    role: Literal["user", "assistant", "system"]
+    """
+    The role of the message sender (`user`, `assistant`, `system`), only applicable
+    for `message` items.
+    """
+
+    status: Literal["completed", "incomplete"]
+    """The status of the item (`completed`, `incomplete`).
+
+    These have no effect on the conversation, but are accepted for consistency with
+    the `conversation.item.created` event.
+    """
+
+    type: Literal["message", "function_call", "function_call_output"]
+    """The type of the item (`message`, `function_call`, `function_call_output`)."""
diff --git a/src/openai/types/beta/realtime/conversation_item_truncate_event.py b/src/openai/types/beta/realtime/conversation_item_truncate_event.py
new file mode 100644
index 0000000000..cb336bba2c
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_truncate_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemTruncateEvent"]
+
+
+class ConversationItemTruncateEvent(BaseModel):
+    audio_end_ms: int
+    """Inclusive duration up to which audio is truncated, in milliseconds.
+
+    If the audio_end_ms is greater than the actual audio duration, the server will
+    respond with an error.
+    """
+
+    content_index: int
+    """The index of the content part to truncate. Set this to 0."""
+
+    item_id: str
+    """The ID of the assistant message item to truncate.
+
+    Only assistant message items can be truncated.
+    """
+
+    type: Literal["conversation.item.truncate"]
+    """The event type, must be `conversation.item.truncate`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/conversation_item_truncate_event_param.py b/src/openai/types/beta/realtime/conversation_item_truncate_event_param.py
new file mode 100644
index 0000000000..d3ad1e1e25
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_truncate_event_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ConversationItemTruncateEventParam"]
+
+
+class ConversationItemTruncateEventParam(TypedDict, total=False):
+    audio_end_ms: Required[int]
+    """Inclusive duration up to which audio is truncated, in milliseconds.
+
+    If the audio_end_ms is greater than the actual audio duration, the server will
+    respond with an error.
+    """
+
+    content_index: Required[int]
+    """The index of the content part to truncate. Set this to 0."""
+
+    item_id: Required[str]
+    """The ID of the assistant message item to truncate.
+
+    Only assistant message items can be truncated.
+    """
+
+    type: Required[Literal["conversation.item.truncate"]]
+    """The event type, must be `conversation.item.truncate`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/conversation_item_truncated_event.py b/src/openai/types/beta/realtime/conversation_item_truncated_event.py
new file mode 100644
index 0000000000..36368fa28f
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_truncated_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ConversationItemTruncatedEvent"]
+
+
+class ConversationItemTruncatedEvent(BaseModel):
+    audio_end_ms: int
+    """The duration up to which the audio was truncated, in milliseconds."""
+
+    content_index: int
+    """The index of the content part that was truncated."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the assistant message item that was truncated."""
+
+    type: Literal["conversation.item.truncated"]
+    """The event type, must be `conversation.item.truncated`."""
diff --git a/src/openai/types/beta/realtime/conversation_item_with_reference.py b/src/openai/types/beta/realtime/conversation_item_with_reference.py
new file mode 100644
index 0000000000..31806afc33
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_with_reference.py
@@ -0,0 +1,67 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item_content import ConversationItemContent
+
+__all__ = ["ConversationItemWithReference"]
+
+
+class ConversationItemWithReference(BaseModel):
+    id: Optional[str] = None
+    """
+    For an item of type (`message` | `function_call` | `function_call_output`) this
+    field allows the client to assign the unique ID of the item. It is not required
+    because the server will generate one if not provided.
+
+    For an item of type `item_reference`, this field is required and is a reference
+    to any item that has previously existed in the conversation.
+    """
+
+    arguments: Optional[str] = None
+    """The arguments of the function call (for `function_call` items)."""
+
+    call_id: Optional[str] = None
+    """
+    The ID of the function call (for `function_call` and `function_call_output`
+    items). If passed on a `function_call_output` item, the server will check that a
+    `function_call` item with the same ID exists in the conversation history.
+    """
+
+    content: Optional[List[ConversationItemContent]] = None
+    """The content of the message, applicable for `message` items.
+
+    - Message items of role `system` support only `input_text` content
+    - Message items of role `user` support `input_text` and `input_audio` content
+    - Message items of role `assistant` support `text` content.
+    """
+
+    name: Optional[str] = None
+    """The name of the function being called (for `function_call` items)."""
+
+    object: Optional[Literal["realtime.item"]] = None
+    """Identifier for the API object being returned - always `realtime.item`."""
+
+    output: Optional[str] = None
+    """The output of the function call (for `function_call_output` items)."""
+
+    role: Optional[Literal["user", "assistant", "system"]] = None
+    """
+    The role of the message sender (`user`, `assistant`, `system`), only applicable
+    for `message` items.
+    """
+
+    status: Optional[Literal["completed", "incomplete"]] = None
+    """The status of the item (`completed`, `incomplete`).
+
+    These have no effect on the conversation, but are accepted for consistency with
+    the `conversation.item.created` event.
+    """
+
+    type: Optional[Literal["message", "function_call", "function_call_output", "item_reference"]] = None
+    """
+    The type of the item (`message`, `function_call`, `function_call_output`,
+    `item_reference`).
+    """
diff --git a/src/openai/types/beta/realtime/conversation_item_with_reference_param.py b/src/openai/types/beta/realtime/conversation_item_with_reference_param.py
new file mode 100644
index 0000000000..e266cdce32
--- /dev/null
+++ b/src/openai/types/beta/realtime/conversation_item_with_reference_param.py
@@ -0,0 +1,68 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, TypedDict
+
+from .conversation_item_content_param import ConversationItemContentParam
+
+__all__ = ["ConversationItemWithReferenceParam"]
+
+
+class ConversationItemWithReferenceParam(TypedDict, total=False):
+    id: str
+    """
+    For an item of type (`message` | `function_call` | `function_call_output`) this
+    field allows the client to assign the unique ID of the item. It is not required
+    because the server will generate one if not provided.
+
+    For an item of type `item_reference`, this field is required and is a reference
+    to any item that has previously existed in the conversation.
+    """
+
+    arguments: str
+    """The arguments of the function call (for `function_call` items)."""
+
+    call_id: str
+    """
+    The ID of the function call (for `function_call` and `function_call_output`
+    items). If passed on a `function_call_output` item, the server will check that a
+    `function_call` item with the same ID exists in the conversation history.
+    """
+
+    content: Iterable[ConversationItemContentParam]
+    """The content of the message, applicable for `message` items.
+
+    - Message items of role `system` support only `input_text` content
+    - Message items of role `user` support `input_text` and `input_audio` content
+    - Message items of role `assistant` support `text` content.
+    """
+
+    name: str
+    """The name of the function being called (for `function_call` items)."""
+
+    object: Literal["realtime.item"]
+    """Identifier for the API object being returned - always `realtime.item`."""
+
+    output: str
+    """The output of the function call (for `function_call_output` items)."""
+
+    role: Literal["user", "assistant", "system"]
+    """
+    The role of the message sender (`user`, `assistant`, `system`), only applicable
+    for `message` items.
+    """
+
+    status: Literal["completed", "incomplete"]
+    """The status of the item (`completed`, `incomplete`).
+
+    These have no effect on the conversation, but are accepted for consistency with
+    the `conversation.item.created` event.
+    """
+
+    type: Literal["message", "function_call", "function_call_output", "item_reference"]
+    """
+    The type of the item (`message`, `function_call`, `function_call_output`,
+    `item_reference`).
+    """
diff --git a/src/openai/types/beta/realtime/error_event.py b/src/openai/types/beta/realtime/error_event.py
new file mode 100644
index 0000000000..e020fc3848
--- /dev/null
+++ b/src/openai/types/beta/realtime/error_event.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ErrorEvent", "Error"]
+
+
+class Error(BaseModel):
+    message: str
+    """A human-readable error message."""
+
+    type: str
+    """The type of error (e.g., "invalid_request_error", "server_error")."""
+
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    event_id: Optional[str] = None
+    """The event_id of the client event that caused the error, if applicable."""
+
+    param: Optional[str] = None
+    """Parameter related to the error, if any."""
+
+
+class ErrorEvent(BaseModel):
+    error: Error
+    """Details of the error."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["error"]
+    """The event type, must be `error`."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_append_event.py b/src/openai/types/beta/realtime/input_audio_buffer_append_event.py
new file mode 100644
index 0000000000..a253a6488c
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_append_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferAppendEvent"]
+
+
+class InputAudioBufferAppendEvent(BaseModel):
+    audio: str
+    """Base64-encoded audio bytes.
+
+    This must be in the format specified by the `input_audio_format` field in the
+    session configuration.
+    """
+
+    type: Literal["input_audio_buffer.append"]
+    """The event type, must be `input_audio_buffer.append`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_append_event_param.py b/src/openai/types/beta/realtime/input_audio_buffer_append_event_param.py
new file mode 100644
index 0000000000..3ad0bc737d
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_append_event_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferAppendEventParam"]
+
+
+class InputAudioBufferAppendEventParam(TypedDict, total=False):
+    audio: Required[str]
+    """Base64-encoded audio bytes.
+
+    This must be in the format specified by the `input_audio_format` field in the
+    session configuration.
+    """
+
+    type: Required[Literal["input_audio_buffer.append"]]
+    """The event type, must be `input_audio_buffer.append`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_clear_event.py b/src/openai/types/beta/realtime/input_audio_buffer_clear_event.py
new file mode 100644
index 0000000000..b0624d34df
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_clear_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferClearEvent"]
+
+
+class InputAudioBufferClearEvent(BaseModel):
+    type: Literal["input_audio_buffer.clear"]
+    """The event type, must be `input_audio_buffer.clear`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_clear_event_param.py b/src/openai/types/beta/realtime/input_audio_buffer_clear_event_param.py
new file mode 100644
index 0000000000..2bd6bc5a02
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_clear_event_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferClearEventParam"]
+
+
+class InputAudioBufferClearEventParam(TypedDict, total=False):
+    type: Required[Literal["input_audio_buffer.clear"]]
+    """The event type, must be `input_audio_buffer.clear`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_cleared_event.py b/src/openai/types/beta/realtime/input_audio_buffer_cleared_event.py
new file mode 100644
index 0000000000..632e1b94bc
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_cleared_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferClearedEvent"]
+
+
+class InputAudioBufferClearedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    type: Literal["input_audio_buffer.cleared"]
+    """The event type, must be `input_audio_buffer.cleared`."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_commit_event.py b/src/openai/types/beta/realtime/input_audio_buffer_commit_event.py
new file mode 100644
index 0000000000..7b6f5e46b7
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_commit_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferCommitEvent"]
+
+
+class InputAudioBufferCommitEvent(BaseModel):
+    type: Literal["input_audio_buffer.commit"]
+    """The event type, must be `input_audio_buffer.commit`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_commit_event_param.py b/src/openai/types/beta/realtime/input_audio_buffer_commit_event_param.py
new file mode 100644
index 0000000000..c9c927ab98
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_commit_event_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["InputAudioBufferCommitEventParam"]
+
+
+class InputAudioBufferCommitEventParam(TypedDict, total=False):
+    type: Required[Literal["input_audio_buffer.commit"]]
+    """The event type, must be `input_audio_buffer.commit`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_committed_event.py b/src/openai/types/beta/realtime/input_audio_buffer_committed_event.py
new file mode 100644
index 0000000000..3071eff357
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_committed_event.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferCommittedEvent"]
+
+
+class InputAudioBufferCommittedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created."""
+
+    previous_item_id: str
+    """The ID of the preceding item after which the new item will be inserted."""
+
+    type: Literal["input_audio_buffer.committed"]
+    """The event type, must be `input_audio_buffer.committed`."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_speech_started_event.py b/src/openai/types/beta/realtime/input_audio_buffer_speech_started_event.py
new file mode 100644
index 0000000000..4f3ab082c4
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_speech_started_event.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStartedEvent"]
+
+
+class InputAudioBufferSpeechStartedEvent(BaseModel):
+    audio_start_ms: int
+    """
+    Milliseconds from the start of all audio written to the buffer during the
+    session when speech was first detected. This will correspond to the beginning of
+    audio sent to the model, and thus includes the `prefix_padding_ms` configured in
+    the Session.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created when speech stops."""
+
+    type: Literal["input_audio_buffer.speech_started"]
+    """The event type, must be `input_audio_buffer.speech_started`."""
diff --git a/src/openai/types/beta/realtime/input_audio_buffer_speech_stopped_event.py b/src/openai/types/beta/realtime/input_audio_buffer_speech_stopped_event.py
new file mode 100644
index 0000000000..40568170f2
--- /dev/null
+++ b/src/openai/types/beta/realtime/input_audio_buffer_speech_stopped_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["InputAudioBufferSpeechStoppedEvent"]
+
+
+class InputAudioBufferSpeechStoppedEvent(BaseModel):
+    audio_end_ms: int
+    """Milliseconds since the session started when speech stopped.
+
+    This will correspond to the end of audio sent to the model, and thus includes
+    the `min_silence_duration_ms` configured in the Session.
+    """
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the user message item that will be created."""
+
+    type: Literal["input_audio_buffer.speech_stopped"]
+    """The event type, must be `input_audio_buffer.speech_stopped`."""
diff --git a/src/openai/types/beta/realtime/rate_limits_updated_event.py b/src/openai/types/beta/realtime/rate_limits_updated_event.py
new file mode 100644
index 0000000000..7e12283c46
--- /dev/null
+++ b/src/openai/types/beta/realtime/rate_limits_updated_event.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RateLimitsUpdatedEvent", "RateLimit"]
+
+
+class RateLimit(BaseModel):
+    limit: Optional[int] = None
+    """The maximum allowed value for the rate limit."""
+
+    name: Optional[Literal["requests", "tokens"]] = None
+    """The name of the rate limit (`requests`, `tokens`)."""
+
+    remaining: Optional[int] = None
+    """The remaining value before the limit is reached."""
+
+    reset_seconds: Optional[float] = None
+    """Seconds until the rate limit resets."""
+
+
+class RateLimitsUpdatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    rate_limits: List[RateLimit]
+    """List of rate limit information."""
+
+    type: Literal["rate_limits.updated"]
+    """The event type, must be `rate_limits.updated`."""
diff --git a/src/openai/types/beta/realtime/realtime_client_event.py b/src/openai/types/beta/realtime/realtime_client_event.py
new file mode 100644
index 0000000000..0769184cd0
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_client_event.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from .session_update_event import SessionUpdateEvent
+from .response_cancel_event import ResponseCancelEvent
+from .response_create_event import ResponseCreateEvent
+from .conversation_item_create_event import ConversationItemCreateEvent
+from .conversation_item_delete_event import ConversationItemDeleteEvent
+from .input_audio_buffer_clear_event import InputAudioBufferClearEvent
+from .input_audio_buffer_append_event import InputAudioBufferAppendEvent
+from .input_audio_buffer_commit_event import InputAudioBufferCommitEvent
+from .conversation_item_truncate_event import ConversationItemTruncateEvent
+
+__all__ = ["RealtimeClientEvent"]
+
+RealtimeClientEvent: TypeAlias = Annotated[
+    Union[
+        SessionUpdateEvent,
+        InputAudioBufferAppendEvent,
+        InputAudioBufferCommitEvent,
+        InputAudioBufferClearEvent,
+        ConversationItemCreateEvent,
+        ConversationItemTruncateEvent,
+        ConversationItemDeleteEvent,
+        ResponseCreateEvent,
+        ResponseCancelEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/beta/realtime/realtime_client_event_param.py b/src/openai/types/beta/realtime/realtime_client_event_param.py
new file mode 100644
index 0000000000..4020892c33
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_client_event_param.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .session_update_event_param import SessionUpdateEventParam
+from .response_cancel_event_param import ResponseCancelEventParam
+from .response_create_event_param import ResponseCreateEventParam
+from .conversation_item_create_event_param import ConversationItemCreateEventParam
+from .conversation_item_delete_event_param import ConversationItemDeleteEventParam
+from .input_audio_buffer_clear_event_param import InputAudioBufferClearEventParam
+from .input_audio_buffer_append_event_param import InputAudioBufferAppendEventParam
+from .input_audio_buffer_commit_event_param import InputAudioBufferCommitEventParam
+from .conversation_item_truncate_event_param import ConversationItemTruncateEventParam
+
+__all__ = ["RealtimeClientEventParam"]
+
+RealtimeClientEventParam: TypeAlias = Union[
+    SessionUpdateEventParam,
+    InputAudioBufferAppendEventParam,
+    InputAudioBufferCommitEventParam,
+    InputAudioBufferClearEventParam,
+    ConversationItemCreateEventParam,
+    ConversationItemTruncateEventParam,
+    ConversationItemDeleteEventParam,
+    ResponseCreateEventParam,
+    ResponseCancelEventParam,
+]
diff --git a/src/openai/types/beta/realtime/realtime_connect_params.py b/src/openai/types/beta/realtime/realtime_connect_params.py
new file mode 100644
index 0000000000..76474f3de4
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_connect_params.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["RealtimeConnectParams"]
+
+
+class RealtimeConnectParams(TypedDict, total=False):
+    model: Required[str]
diff --git a/src/openai/types/beta/realtime/realtime_response.py b/src/openai/types/beta/realtime/realtime_response.py
new file mode 100644
index 0000000000..4c3c83d666
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_response.py
@@ -0,0 +1,87 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ...shared.metadata import Metadata
+from .conversation_item import ConversationItem
+from .realtime_response_usage import RealtimeResponseUsage
+from .realtime_response_status import RealtimeResponseStatus
+
+__all__ = ["RealtimeResponse"]
+
+
+class RealtimeResponse(BaseModel):
+    id: Optional[str] = None
+    """The unique ID of the response."""
+
+    conversation_id: Optional[str] = None
+    """
+    Which conversation the response is added to, determined by the `conversation`
+    field in the `response.create` event. If `auto`, the response will be added to
+    the default conversation and the value of `conversation_id` will be an id like
+    `conv_1234`. If `none`, the response will not be added to any conversation and
+    the value of `conversation_id` will be `null`. If responses are being triggered
+    by server VAD, the response will be added to the default conversation, thus the
+    `conversation_id` will be an id like `conv_1234`.
+    """
+
+    max_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls, that was used in this response.
+    """
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model used to respond.
+
+    If there are multiple modalities, the model will pick one, for example if
+    `modalities` is `["text", "audio"]`, the model could be responding in either
+    text or audio.
+    """
+
+    object: Optional[Literal["realtime.response"]] = None
+    """The object type, must be `realtime.response`."""
+
+    output: Optional[List[ConversationItem]] = None
+    """The list of output items generated by the response."""
+
+    output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    status: Optional[Literal["completed", "cancelled", "failed", "incomplete"]] = None
+    """
+    The final status of the response (`completed`, `cancelled`, `failed`, or
+    `incomplete`).
+    """
+
+    status_details: Optional[RealtimeResponseStatus] = None
+    """Additional details about the status."""
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    usage: Optional[RealtimeResponseUsage] = None
+    """Usage statistics for the Response, this will correspond to billing.
+
+    A Realtime API session will maintain a conversation context and append new Items
+    to the Conversation, thus output from previous turns (text and audio tokens)
+    will become the input for later turns.
+    """
+
+    voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+    """
+    The voice the model used to respond. Current voice options are `alloy`, `ash`,
+    `ballad`, `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
diff --git a/src/openai/types/beta/realtime/realtime_response_status.py b/src/openai/types/beta/realtime/realtime_response_status.py
new file mode 100644
index 0000000000..7189cd58a1
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_response_status.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RealtimeResponseStatus", "Error"]
+
+
+class Error(BaseModel):
+    code: Optional[str] = None
+    """Error code, if any."""
+
+    type: Optional[str] = None
+    """The type of error."""
+
+
+class RealtimeResponseStatus(BaseModel):
+    error: Optional[Error] = None
+    """
+    A description of the error that caused the response to fail, populated when the
+    `status` is `failed`.
+    """
+
+    reason: Optional[Literal["turn_detected", "client_cancelled", "max_output_tokens", "content_filter"]] = None
+    """The reason the Response did not complete.
+
+    For a `cancelled` Response, one of `turn_detected` (the server VAD detected a
+    new start of speech) or `client_cancelled` (the client sent a cancel event). For
+    an `incomplete` Response, one of `max_output_tokens` or `content_filter` (the
+    server-side safety filter activated and cut off the response).
+    """
+
+    type: Optional[Literal["completed", "cancelled", "incomplete", "failed"]] = None
+    """
+    The type of error that caused the response to fail, corresponding with the
+    `status` field (`completed`, `cancelled`, `incomplete`, `failed`).
+    """
diff --git a/src/openai/types/beta/realtime/realtime_response_usage.py b/src/openai/types/beta/realtime/realtime_response_usage.py
new file mode 100644
index 0000000000..7ca822e25e
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_response_usage.py
@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ...._models import BaseModel
+
+__all__ = ["RealtimeResponseUsage", "InputTokenDetails", "OutputTokenDetails"]
+
+
+class InputTokenDetails(BaseModel):
+    audio_tokens: Optional[int] = None
+    """The number of audio tokens used in the Response."""
+
+    cached_tokens: Optional[int] = None
+    """The number of cached tokens used in the Response."""
+
+    text_tokens: Optional[int] = None
+    """The number of text tokens used in the Response."""
+
+
+class OutputTokenDetails(BaseModel):
+    audio_tokens: Optional[int] = None
+    """The number of audio tokens used in the Response."""
+
+    text_tokens: Optional[int] = None
+    """The number of text tokens used in the Response."""
+
+
+class RealtimeResponseUsage(BaseModel):
+    input_token_details: Optional[InputTokenDetails] = None
+    """Details about the input tokens used in the Response."""
+
+    input_tokens: Optional[int] = None
+    """
+    The number of input tokens used in the Response, including text and audio
+    tokens.
+    """
+
+    output_token_details: Optional[OutputTokenDetails] = None
+    """Details about the output tokens used in the Response."""
+
+    output_tokens: Optional[int] = None
+    """
+    The number of output tokens sent in the Response, including text and audio
+    tokens.
+    """
+
+    total_tokens: Optional[int] = None
+    """
+    The total number of tokens in the Response including input and output text and
+    audio tokens.
+    """
diff --git a/src/openai/types/beta/realtime/realtime_server_event.py b/src/openai/types/beta/realtime/realtime_server_event.py
new file mode 100644
index 0000000000..5f8ed55b13
--- /dev/null
+++ b/src/openai/types/beta/realtime/realtime_server_event.py
@@ -0,0 +1,72 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ...._utils import PropertyInfo
+from .error_event import ErrorEvent
+from .response_done_event import ResponseDoneEvent
+from .session_created_event import SessionCreatedEvent
+from .session_updated_event import SessionUpdatedEvent
+from .response_created_event import ResponseCreatedEvent
+from .response_text_done_event import ResponseTextDoneEvent
+from .rate_limits_updated_event import RateLimitsUpdatedEvent
+from .response_audio_done_event import ResponseAudioDoneEvent
+from .response_text_delta_event import ResponseTextDeltaEvent
+from .conversation_created_event import ConversationCreatedEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent
+from .conversation_item_created_event import ConversationItemCreatedEvent
+from .conversation_item_deleted_event import ConversationItemDeletedEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent
+from .input_audio_buffer_cleared_event import InputAudioBufferClearedEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent
+from .conversation_item_truncated_event import ConversationItemTruncatedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent
+from .input_audio_buffer_committed_event import InputAudioBufferCommittedEvent
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
+from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
+from .input_audio_buffer_speech_started_event import InputAudioBufferSpeechStartedEvent
+from .input_audio_buffer_speech_stopped_event import InputAudioBufferSpeechStoppedEvent
+from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
+from .conversation_item_input_audio_transcription_failed_event import ConversationItemInputAudioTranscriptionFailedEvent
+from .conversation_item_input_audio_transcription_completed_event import (
+    ConversationItemInputAudioTranscriptionCompletedEvent,
+)
+
+__all__ = ["RealtimeServerEvent"]
+
+RealtimeServerEvent: TypeAlias = Annotated[
+    Union[
+        ErrorEvent,
+        SessionCreatedEvent,
+        SessionUpdatedEvent,
+        ConversationCreatedEvent,
+        InputAudioBufferCommittedEvent,
+        InputAudioBufferClearedEvent,
+        InputAudioBufferSpeechStartedEvent,
+        InputAudioBufferSpeechStoppedEvent,
+        ConversationItemCreatedEvent,
+        ConversationItemInputAudioTranscriptionCompletedEvent,
+        ConversationItemInputAudioTranscriptionFailedEvent,
+        ConversationItemTruncatedEvent,
+        ConversationItemDeletedEvent,
+        ResponseCreatedEvent,
+        ResponseDoneEvent,
+        ResponseOutputItemAddedEvent,
+        ResponseOutputItemDoneEvent,
+        ResponseContentPartAddedEvent,
+        ResponseContentPartDoneEvent,
+        ResponseTextDeltaEvent,
+        ResponseTextDoneEvent,
+        ResponseAudioTranscriptDeltaEvent,
+        ResponseAudioTranscriptDoneEvent,
+        ResponseAudioDeltaEvent,
+        ResponseAudioDoneEvent,
+        ResponseFunctionCallArgumentsDeltaEvent,
+        ResponseFunctionCallArgumentsDoneEvent,
+        RateLimitsUpdatedEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/beta/realtime/response_audio_delta_event.py b/src/openai/types/beta/realtime/response_audio_delta_event.py
new file mode 100644
index 0000000000..8e0128d942
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_audio_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioDeltaEvent"]
+
+
+class ResponseAudioDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """Base64-encoded audio data delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.audio.delta"]
+    """The event type, must be `response.audio.delta`."""
diff --git a/src/openai/types/beta/realtime/response_audio_done_event.py b/src/openai/types/beta/realtime/response_audio_done_event.py
new file mode 100644
index 0000000000..68e78bc778
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_audio_done_event.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioDoneEvent"]
+
+
+class ResponseAudioDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.audio.done"]
+    """The event type, must be `response.audio.done`."""
diff --git a/src/openai/types/beta/realtime/response_audio_transcript_delta_event.py b/src/openai/types/beta/realtime/response_audio_transcript_delta_event.py
new file mode 100644
index 0000000000..3609948d10
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_audio_transcript_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDeltaEvent"]
+
+
+class ResponseAudioTranscriptDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """The transcript delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.audio_transcript.delta"]
+    """The event type, must be `response.audio_transcript.delta`."""
diff --git a/src/openai/types/beta/realtime/response_audio_transcript_done_event.py b/src/openai/types/beta/realtime/response_audio_transcript_done_event.py
new file mode 100644
index 0000000000..4e4436a95f
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_audio_transcript_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDoneEvent"]
+
+
+class ResponseAudioTranscriptDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    transcript: str
+    """The final transcript of the audio."""
+
+    type: Literal["response.audio_transcript.done"]
+    """The event type, must be `response.audio_transcript.done`."""
diff --git a/src/openai/types/beta/realtime/response_cancel_event.py b/src/openai/types/beta/realtime/response_cancel_event.py
new file mode 100644
index 0000000000..c5ff991e9a
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_cancel_event.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseCancelEvent"]
+
+
+class ResponseCancelEvent(BaseModel):
+    type: Literal["response.cancel"]
+    """The event type, must be `response.cancel`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    response_id: Optional[str] = None
+    """
+    A specific response ID to cancel - if not provided, will cancel an in-progress
+    response in the default conversation.
+    """
diff --git a/src/openai/types/beta/realtime/response_cancel_event_param.py b/src/openai/types/beta/realtime/response_cancel_event_param.py
new file mode 100644
index 0000000000..f33740730a
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_cancel_event_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseCancelEventParam"]
+
+
+class ResponseCancelEventParam(TypedDict, total=False):
+    type: Required[Literal["response.cancel"]]
+    """The event type, must be `response.cancel`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    response_id: str
+    """
+    A specific response ID to cancel - if not provided, will cancel an in-progress
+    response in the default conversation.
+    """
diff --git a/src/openai/types/beta/realtime/response_content_part_added_event.py b/src/openai/types/beta/realtime/response_content_part_added_event.py
new file mode 100644
index 0000000000..45c8f20f97
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_content_part_added_event.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseContentPartAddedEvent", "Part"]
+
+
+class Part(BaseModel):
+    audio: Optional[str] = None
+    """Base64-encoded audio data (if type is "audio")."""
+
+    text: Optional[str] = None
+    """The text content (if type is "text")."""
+
+    transcript: Optional[str] = None
+    """The transcript of the audio (if type is "audio")."""
+
+    type: Optional[Literal["text", "audio"]] = None
+    """The content type ("text", "audio")."""
+
+
+class ResponseContentPartAddedEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item to which the content part was added."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    part: Part
+    """The content part that was added."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.content_part.added"]
+    """The event type, must be `response.content_part.added`."""
diff --git a/src/openai/types/beta/realtime/response_content_part_done_event.py b/src/openai/types/beta/realtime/response_content_part_done_event.py
new file mode 100644
index 0000000000..3d16116106
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_content_part_done_event.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseContentPartDoneEvent", "Part"]
+
+
+class Part(BaseModel):
+    audio: Optional[str] = None
+    """Base64-encoded audio data (if type is "audio")."""
+
+    text: Optional[str] = None
+    """The text content (if type is "text")."""
+
+    transcript: Optional[str] = None
+    """The transcript of the audio (if type is "audio")."""
+
+    type: Optional[Literal["text", "audio"]] = None
+    """The content type ("text", "audio")."""
+
+
+class ResponseContentPartDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    part: Part
+    """The content part that is done."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.content_part.done"]
+    """The event type, must be `response.content_part.done`."""
diff --git a/src/openai/types/beta/realtime/response_create_event.py b/src/openai/types/beta/realtime/response_create_event.py
new file mode 100644
index 0000000000..d6c5fda926
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_create_event.py
@@ -0,0 +1,121 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from ...shared.metadata import Metadata
+from .conversation_item_with_reference import ConversationItemWithReference
+
+__all__ = ["ResponseCreateEvent", "Response", "ResponseTool"]
+
+
+class ResponseTool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class Response(BaseModel):
+    conversation: Union[str, Literal["auto", "none"], None] = None
+    """Controls which conversation the response is added to.
+
+    Currently supports `auto` and `none`, with `auto` as the default value. The
+    `auto` value means that the contents of the response will be added to the
+    default conversation. Set this to `none` to create an out-of-band response which
+    will not add items to default conversation.
+    """
+
+    input: Optional[List[ConversationItemWithReference]] = None
+    """Input items to include in the prompt for the model.
+
+    Using this field creates a new context for this Response instead of using the
+    default conversation. An empty array `[]` will clear the context for this
+    Response. Note that this can include references to items from the default
+    conversation.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function, like
+    `{"type": "function", "function": {"name": "my_function"}}`.
+    """
+
+    tools: Optional[List[ResponseTool]] = None
+    """Tools (functions) available to the model."""
+
+    voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
+
+
+class ResponseCreateEvent(BaseModel):
+    type: Literal["response.create"]
+    """The event type, must be `response.create`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
+
+    response: Optional[Response] = None
+    """Create a new Realtime response with these parameters"""
diff --git a/src/openai/types/beta/realtime/response_create_event_param.py b/src/openai/types/beta/realtime/response_create_event_param.py
new file mode 100644
index 0000000000..c02fe1b34e
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_create_event_param.py
@@ -0,0 +1,122 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+from ...shared_params.metadata import Metadata
+from .conversation_item_with_reference_param import ConversationItemWithReferenceParam
+
+__all__ = ["ResponseCreateEventParam", "Response", "ResponseTool"]
+
+
+class ResponseTool(TypedDict, total=False):
+    description: str
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: str
+    """The name of the function."""
+
+    parameters: object
+    """Parameters of the function in JSON Schema."""
+
+    type: Literal["function"]
+    """The type of the tool, i.e. `function`."""
+
+
+class Response(TypedDict, total=False):
+    conversation: Union[str, Literal["auto", "none"]]
+    """Controls which conversation the response is added to.
+
+    Currently supports `auto` and `none`, with `auto` as the default value. The
+    `auto` value means that the contents of the response will be added to the
+    default conversation. Set this to `none` to create an out-of-band response which
+    will not add items to default conversation.
+    """
+
+    input: Iterable[ConversationItemWithReferenceParam]
+    """Input items to include in the prompt for the model.
+
+    Using this field creates a new context for this Response instead of using the
+    default conversation. An empty array `[]` will clear the context for this
+    Response. Note that this can include references to items from the default
+    conversation.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: float
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: str
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function, like
+    `{"type": "function", "function": {"name": "my_function"}}`.
+    """
+
+    tools: Iterable[ResponseTool]
+    """Tools (functions) available to the model."""
+
+    voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
+
+
+class ResponseCreateEventParam(TypedDict, total=False):
+    type: Required[Literal["response.create"]]
+    """The event type, must be `response.create`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
+
+    response: Response
+    """Create a new Realtime response with these parameters"""
diff --git a/src/openai/types/beta/realtime/response_created_event.py b/src/openai/types/beta/realtime/response_created_event.py
new file mode 100644
index 0000000000..a4990cf095
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_created_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseCreatedEvent"]
+
+
+class ResponseCreatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response: RealtimeResponse
+    """The response resource."""
+
+    type: Literal["response.created"]
+    """The event type, must be `response.created`."""
diff --git a/src/openai/types/beta/realtime/response_done_event.py b/src/openai/types/beta/realtime/response_done_event.py
new file mode 100644
index 0000000000..9e655184b6
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_done_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .realtime_response import RealtimeResponse
+
+__all__ = ["ResponseDoneEvent"]
+
+
+class ResponseDoneEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    response: RealtimeResponse
+    """The response resource."""
+
+    type: Literal["response.done"]
+    """The event type, must be `response.done`."""
diff --git a/src/openai/types/beta/realtime/response_function_call_arguments_delta_event.py b/src/openai/types/beta/realtime/response_function_call_arguments_delta_event.py
new file mode 100644
index 0000000000..cdbb64e658
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_function_call_arguments_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDeltaEvent"]
+
+
+class ResponseFunctionCallArgumentsDeltaEvent(BaseModel):
+    call_id: str
+    """The ID of the function call."""
+
+    delta: str
+    """The arguments delta as a JSON string."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the function call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.function_call_arguments.delta"]
+    """The event type, must be `response.function_call_arguments.delta`."""
diff --git a/src/openai/types/beta/realtime/response_function_call_arguments_done_event.py b/src/openai/types/beta/realtime/response_function_call_arguments_done_event.py
new file mode 100644
index 0000000000..0a5db53323
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_function_call_arguments_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDoneEvent"]
+
+
+class ResponseFunctionCallArgumentsDoneEvent(BaseModel):
+    arguments: str
+    """The final arguments as a JSON string."""
+
+    call_id: str
+    """The ID of the function call."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the function call item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.function_call_arguments.done"]
+    """The event type, must be `response.function_call_arguments.done`."""
diff --git a/src/openai/types/beta/realtime/response_output_item_added_event.py b/src/openai/types/beta/realtime/response_output_item_added_event.py
new file mode 100644
index 0000000000..c89bfdc3be
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_output_item_added_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemAddedEvent"]
+
+
+class ResponseOutputItemAddedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    output_index: int
+    """The index of the output item in the Response."""
+
+    response_id: str
+    """The ID of the Response to which the item belongs."""
+
+    type: Literal["response.output_item.added"]
+    """The event type, must be `response.output_item.added`."""
diff --git a/src/openai/types/beta/realtime/response_output_item_done_event.py b/src/openai/types/beta/realtime/response_output_item_done_event.py
new file mode 100644
index 0000000000..b5910e22aa
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_output_item_done_event.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+from .conversation_item import ConversationItem
+
+__all__ = ["ResponseOutputItemDoneEvent"]
+
+
+class ResponseOutputItemDoneEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    item: ConversationItem
+    """The item to add to the conversation."""
+
+    output_index: int
+    """The index of the output item in the Response."""
+
+    response_id: str
+    """The ID of the Response to which the item belongs."""
+
+    type: Literal["response.output_item.done"]
+    """The event type, must be `response.output_item.done`."""
diff --git a/src/openai/types/beta/realtime/response_text_delta_event.py b/src/openai/types/beta/realtime/response_text_delta_event.py
new file mode 100644
index 0000000000..c463b3c3d0
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_text_delta_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseTextDeltaEvent"]
+
+
+class ResponseTextDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    delta: str
+    """The text delta."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    type: Literal["response.text.delta"]
+    """The event type, must be `response.text.delta`."""
diff --git a/src/openai/types/beta/realtime/response_text_done_event.py b/src/openai/types/beta/realtime/response_text_done_event.py
new file mode 100644
index 0000000000..020ff41d58
--- /dev/null
+++ b/src/openai/types/beta/realtime/response_text_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["ResponseTextDoneEvent"]
+
+
+class ResponseTextDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part in the item's content array."""
+
+    event_id: str
+    """The unique ID of the server event."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item in the response."""
+
+    response_id: str
+    """The ID of the response."""
+
+    text: str
+    """The final text content."""
+
+    type: Literal["response.text.done"]
+    """The event type, must be `response.text.done`."""
diff --git a/src/openai/types/beta/realtime/session.py b/src/openai/types/beta/realtime/session.py
new file mode 100644
index 0000000000..aee20fa906
--- /dev/null
+++ b/src/openai/types/beta/realtime/session.py
@@ -0,0 +1,171 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["Session", "InputAudioTranscription", "Tool", "TurnDetection"]
+
+
+class InputAudioTranscription(BaseModel):
+    model: Optional[str] = None
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+
+class Tool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class TurnDetection(BaseModel):
+    create_response: Optional[bool] = None
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    `true` by default.
+    """
+
+    interrupt_response: Optional[bool] = None
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs. `true` by default.
+    """
+
+    prefix_padding_ms: Optional[int] = None
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: Optional[Literal["server_vad"]] = None
+    """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class Session(BaseModel):
+    id: Optional[str] = None
+    """Unique identifier for the session object."""
+
+    input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
+
+    input_audio_transcription: Optional[InputAudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through Whisper and should be treated as rough guidance rather
+    than the representation understood by the model.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    model: Union[
+        str,
+        Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ],
+        None,
+    ] = None
+    """The Realtime model used for this session."""
+
+    output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Optional[List[Tool]] = None
+    """Tools (functions) available to the model."""
+
+    turn_detection: Optional[TurnDetection] = None
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
diff --git a/src/openai/types/beta/realtime/session_create_params.py b/src/openai/types/beta/realtime/session_create_params.py
new file mode 100644
index 0000000000..bbc86d7c7d
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_create_params.py
@@ -0,0 +1,183 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["SessionCreateParams", "InputAudioTranscription", "Tool", "TurnDetection"]
+
+
+class SessionCreateParams(TypedDict, total=False):
+    input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
+
+    input_audio_transcription: InputAudioTranscription
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as rough guidance rather than the representation
+    understood by the model. The client can optionally set the language and prompt
+    for transcription, these fields will be passed to the Whisper API.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    model: Literal[
+        "gpt-4o-realtime-preview",
+        "gpt-4o-realtime-preview-2024-10-01",
+        "gpt-4o-realtime-preview-2024-12-17",
+        "gpt-4o-mini-realtime-preview",
+        "gpt-4o-mini-realtime-preview-2024-12-17",
+    ]
+    """The Realtime model used for this session."""
+
+    output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
+
+    temperature: float
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: str
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Iterable[Tool]
+    """Tools (functions) available to the model."""
+
+    turn_detection: TurnDetection
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
+
+
+class InputAudioTranscription(TypedDict, total=False):
+    language: str
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: str
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+    prompt: str
+    """An optional text to guide the model's style or continue a previous audio
+    segment.
+
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+    should match the audio language.
+    """
+
+
+class Tool(TypedDict, total=False):
+    description: str
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: str
+    """The name of the function."""
+
+    parameters: object
+    """Parameters of the function in JSON Schema."""
+
+    type: Literal["function"]
+    """The type of the tool, i.e. `function`."""
+
+
+class TurnDetection(TypedDict, total=False):
+    create_response: bool
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    `true` by default.
+    """
+
+    interrupt_response: bool
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs. `true` by default.
+    """
+
+    prefix_padding_ms: int
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: int
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: float
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: str
+    """Type of turn detection, only `server_vad` is currently supported."""
diff --git a/src/openai/types/beta/realtime/session_create_response.py b/src/openai/types/beta/realtime/session_create_response.py
new file mode 100644
index 0000000000..c26e62bef1
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_create_response.py
@@ -0,0 +1,150 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["SessionCreateResponse", "ClientSecret", "InputAudioTranscription", "Tool", "TurnDetection"]
+
+
+class ClientSecret(BaseModel):
+    expires_at: int
+    """Timestamp for when the token expires.
+
+    Currently, all tokens expire after one minute.
+    """
+
+    value: str
+    """
+    Ephemeral key usable in client environments to authenticate connections to the
+    Realtime API. Use this in client-side environments rather than a standard API
+    token, which should only be used server-side.
+    """
+
+
+class InputAudioTranscription(BaseModel):
+    model: Optional[str] = None
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+
+class Tool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class TurnDetection(BaseModel):
+    prefix_padding_ms: Optional[int] = None
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: Optional[str] = None
+    """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class SessionCreateResponse(BaseModel):
+    client_secret: ClientSecret
+    """Ephemeral key returned by the API."""
+
+    input_audio_format: Optional[str] = None
+    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    input_audio_transcription: Optional[InputAudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through Whisper and should be treated as rough guidance rather
+    than the representation understood by the model.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    output_audio_format: Optional[str] = None
+    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Optional[List[Tool]] = None
+    """Tools (functions) available to the model."""
+
+    turn_detection: Optional[TurnDetection] = None
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
diff --git a/src/openai/types/beta/realtime/session_created_event.py b/src/openai/types/beta/realtime/session_created_event.py
new file mode 100644
index 0000000000..baf6af388b
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_created_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .session import Session
+from ...._models import BaseModel
+
+__all__ = ["SessionCreatedEvent"]
+
+
+class SessionCreatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    session: Session
+    """Realtime session object configuration."""
+
+    type: Literal["session.created"]
+    """The event type, must be `session.created`."""
diff --git a/src/openai/types/beta/realtime/session_update_event.py b/src/openai/types/beta/realtime/session_update_event.py
new file mode 100644
index 0000000000..999cd8d660
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_update_event.py
@@ -0,0 +1,196 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["SessionUpdateEvent", "Session", "SessionInputAudioTranscription", "SessionTool", "SessionTurnDetection"]
+
+
+class SessionInputAudioTranscription(BaseModel):
+    language: Optional[str] = None
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: Optional[str] = None
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+    prompt: Optional[str] = None
+    """An optional text to guide the model's style or continue a previous audio
+    segment.
+
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+    should match the audio language.
+    """
+
+
+class SessionTool(BaseModel):
+    description: Optional[str] = None
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: Optional[str] = None
+    """The name of the function."""
+
+    parameters: Optional[object] = None
+    """Parameters of the function in JSON Schema."""
+
+    type: Optional[Literal["function"]] = None
+    """The type of the tool, i.e. `function`."""
+
+
+class SessionTurnDetection(BaseModel):
+    create_response: Optional[bool] = None
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    `true` by default.
+    """
+
+    interrupt_response: Optional[bool] = None
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs. `true` by default.
+    """
+
+    prefix_padding_ms: Optional[int] = None
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: Optional[int] = None
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: Optional[float] = None
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: Optional[str] = None
+    """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class Session(BaseModel):
+    input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
+
+    input_audio_transcription: Optional[SessionInputAudioTranscription] = None
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as rough guidance rather than the representation
+    understood by the model. The client can optionally set the language and prompt
+    for transcription, these fields will be passed to the Whisper API.
+    """
+
+    instructions: Optional[str] = None
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"], None] = None
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: Optional[List[Literal["text", "audio"]]] = None
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    model: Optional[
+        Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+    ] = None
+    """The Realtime model used for this session."""
+
+    output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
+
+    temperature: Optional[float] = None
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: Optional[str] = None
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Optional[List[SessionTool]] = None
+    """Tools (functions) available to the model."""
+
+    turn_detection: Optional[SessionTurnDetection] = None
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Optional[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]] = None
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
+
+
+class SessionUpdateEvent(BaseModel):
+    session: Session
+    """Realtime session object configuration."""
+
+    type: Literal["session.update"]
+    """The event type, must be `session.update`."""
+
+    event_id: Optional[str] = None
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/session_update_event_param.py b/src/openai/types/beta/realtime/session_update_event_param.py
new file mode 100644
index 0000000000..07fdba9d85
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_update_event_param.py
@@ -0,0 +1,200 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = [
+    "SessionUpdateEventParam",
+    "Session",
+    "SessionInputAudioTranscription",
+    "SessionTool",
+    "SessionTurnDetection",
+]
+
+
+class SessionInputAudioTranscription(TypedDict, total=False):
+    language: str
+    """The language of the input audio.
+
+    Supplying the input language in
+    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
+    format will improve accuracy and latency.
+    """
+
+    model: str
+    """
+    The model to use for transcription, `whisper-1` is the only currently supported
+    model.
+    """
+
+    prompt: str
+    """An optional text to guide the model's style or continue a previous audio
+    segment.
+
+    The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
+    should match the audio language.
+    """
+
+
+class SessionTool(TypedDict, total=False):
+    description: str
+    """
+    The description of the function, including guidance on when and how to call it,
+    and guidance about what to tell the user when calling (if anything).
+    """
+
+    name: str
+    """The name of the function."""
+
+    parameters: object
+    """Parameters of the function in JSON Schema."""
+
+    type: Literal["function"]
+    """The type of the tool, i.e. `function`."""
+
+
+class SessionTurnDetection(TypedDict, total=False):
+    create_response: bool
+    """Whether or not to automatically generate a response when a VAD stop event
+    occurs.
+
+    `true` by default.
+    """
+
+    interrupt_response: bool
+    """
+    Whether or not to automatically interrupt any ongoing response with output to
+    the default conversation (i.e. `conversation` of `auto`) when a VAD start event
+    occurs. `true` by default.
+    """
+
+    prefix_padding_ms: int
+    """Amount of audio to include before the VAD detected speech (in milliseconds).
+
+    Defaults to 300ms.
+    """
+
+    silence_duration_ms: int
+    """Duration of silence to detect speech stop (in milliseconds).
+
+    Defaults to 500ms. With shorter values the model will respond more quickly, but
+    may jump in on short pauses from the user.
+    """
+
+    threshold: float
+    """Activation threshold for VAD (0.0 to 1.0), this defaults to 0.5.
+
+    A higher threshold will require louder audio to activate the model, and thus
+    might perform better in noisy environments.
+    """
+
+    type: str
+    """Type of turn detection, only `server_vad` is currently supported."""
+
+
+class Session(TypedDict, total=False):
+    input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
+
+    input_audio_transcription: SessionInputAudioTranscription
+    """
+    Configuration for input audio transcription, defaults to off and can be set to
+    `null` to turn off once on. Input audio transcription is not native to the
+    model, since the model consumes audio directly. Transcription runs
+    asynchronously through
+    [OpenAI Whisper transcription](https://platform.openai.com/docs/api-reference/audio/createTranscription)
+    and should be treated as rough guidance rather than the representation
+    understood by the model. The client can optionally set the language and prompt
+    for transcription, these fields will be passed to the Whisper API.
+    """
+
+    instructions: str
+    """The default system instructions (i.e.
+
+    system message) prepended to model calls. This field allows the client to guide
+    the model on desired responses. The model can be instructed on response content
+    and format, (e.g. "be extremely succinct", "act friendly", "here are examples of
+    good responses") and on audio behavior (e.g. "talk quickly", "inject emotion
+    into your voice", "laugh frequently"). The instructions are not guaranteed to be
+    followed by the model, but they provide guidance to the model on the desired
+    behavior.
+
+    Note that the server sets default instructions which will be used if this field
+    is not set and are visible in the `session.created` event at the start of the
+    session.
+    """
+
+    max_response_output_tokens: Union[int, Literal["inf"]]
+    """
+    Maximum number of output tokens for a single assistant response, inclusive of
+    tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
+    `inf` for the maximum available tokens for a given model. Defaults to `inf`.
+    """
+
+    modalities: List[Literal["text", "audio"]]
+    """The set of modalities the model can respond with.
+
+    To disable audio, set this to ["text"].
+    """
+
+    model: Literal[
+        "gpt-4o-realtime-preview",
+        "gpt-4o-realtime-preview-2024-10-01",
+        "gpt-4o-realtime-preview-2024-12-17",
+        "gpt-4o-mini-realtime-preview",
+        "gpt-4o-mini-realtime-preview-2024-12-17",
+    ]
+    """The Realtime model used for this session."""
+
+    output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
+
+    temperature: float
+    """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
+
+    tool_choice: str
+    """How the model chooses tools.
+
+    Options are `auto`, `none`, `required`, or specify a function.
+    """
+
+    tools: Iterable[SessionTool]
+    """Tools (functions) available to the model."""
+
+    turn_detection: SessionTurnDetection
+    """Configuration for turn detection.
+
+    Can be set to `null` to turn off. Server VAD means that the model will detect
+    the start and end of speech based on audio volume and respond at the end of user
+    speech.
+    """
+
+    voice: Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]
+    """The voice the model uses to respond.
+
+    Voice cannot be changed during the session once the model has responded with
+    audio at least once. Current voice options are `alloy`, `ash`, `ballad`,
+    `coral`, `echo` `sage`, `shimmer` and `verse`.
+    """
+
+
+class SessionUpdateEventParam(TypedDict, total=False):
+    session: Required[Session]
+    """Realtime session object configuration."""
+
+    type: Required[Literal["session.update"]]
+    """The event type, must be `session.update`."""
+
+    event_id: str
+    """Optional client-generated ID used to identify this event."""
diff --git a/src/openai/types/beta/realtime/session_updated_event.py b/src/openai/types/beta/realtime/session_updated_event.py
new file mode 100644
index 0000000000..b9b6488eb3
--- /dev/null
+++ b/src/openai/types/beta/realtime/session_updated_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .session import Session
+from ...._models import BaseModel
+
+__all__ = ["SessionUpdatedEvent"]
+
+
+class SessionUpdatedEvent(BaseModel):
+    event_id: str
+    """The unique ID of the server event."""
+
+    session: Session
+    """Realtime session object configuration."""
+
+    type: Literal["session.updated"]
+    """The event type, must be `session.updated`."""
diff --git a/src/openai/types/beta/thread.py b/src/openai/types/beta/thread.py
index 6f7a6c7d0c..789f66e48b 100644
--- a/src/openai/types/beta/thread.py
+++ b/src/openai/types/beta/thread.py
@@ -4,6 +4,7 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from ..shared.metadata import Metadata
 
 __all__ = ["Thread", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
@@ -40,12 +41,14 @@ class Thread(BaseModel):
     created_at: int
     """The Unix timestamp (in seconds) for when the thread was created."""
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     object: Literal["thread"]
diff --git a/src/openai/types/beta/thread_create_and_run_params.py b/src/openai/types/beta/thread_create_and_run_params.py
index 6efe6e7aee..065c390f4e 100644
--- a/src/openai/types/beta/thread_create_and_run_params.py
+++ b/src/openai/types/beta/thread_create_and_run_params.py
@@ -3,10 +3,12 @@
 from __future__ import annotations
 
 from typing import List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
+from ..shared.chat_model import ChatModel
 from .function_tool_param import FunctionToolParam
 from .file_search_tool_param import FileSearchToolParam
+from ..shared_params.metadata import Metadata
 from .code_interpreter_tool_param import CodeInterpreterToolParam
 from .assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
 from .threads.message_content_part_param import MessageContentPartParam
@@ -18,10 +20,15 @@
     "ThreadMessage",
     "ThreadMessageAttachment",
     "ThreadMessageAttachmentTool",
+    "ThreadMessageAttachmentToolFileSearch",
     "ThreadToolResources",
     "ThreadToolResourcesCodeInterpreter",
     "ThreadToolResourcesFileSearch",
     "ThreadToolResourcesFileSearchVectorStore",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategy",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
+    "ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
     "ToolResources",
     "ToolResourcesCodeInterpreter",
     "ToolResourcesFileSearch",
@@ -64,40 +71,17 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     `incomplete_details` for more info.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    model: Union[
-        str,
-        Literal[
-            "gpt-4o",
-            "gpt-4o-2024-05-13",
-            "gpt-4-turbo",
-            "gpt-4-turbo-2024-04-09",
-            "gpt-4-0125-preview",
-            "gpt-4-turbo-preview",
-            "gpt-4-1106-preview",
-            "gpt-4-vision-preview",
-            "gpt-4",
-            "gpt-4-0314",
-            "gpt-4-0613",
-            "gpt-4-32k",
-            "gpt-4-32k-0314",
-            "gpt-4-32k-0613",
-            "gpt-3.5-turbo",
-            "gpt-3.5-turbo-16k",
-            "gpt-3.5-turbo-0613",
-            "gpt-3.5-turbo-1106",
-            "gpt-3.5-turbo-0125",
-            "gpt-3.5-turbo-16k-0613",
-        ],
-        None,
-    ]
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Union[str, ChatModel, None]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -105,14 +89,26 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     assistant will be used.
     """
 
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
     response_format: Optional[AssistantResponseFormatOptionParam]
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -132,7 +128,11 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     """
 
     thread: Thread
-    """If no thread is provided, an empty thread will be created."""
+    """Options to create a new thread.
+
+    If no thread is provided when running a request, an empty thread will be
+    created.
+    """
 
     tool_choice: Optional[AssistantToolChoiceOptionParam]
     """
@@ -175,7 +175,12 @@ class ThreadCreateAndRunParamsBase(TypedDict, total=False):
     """
 
 
-ThreadMessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+class ThreadMessageAttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+ThreadMessageAttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, ThreadMessageAttachmentToolFileSearch]
 
 
 class ThreadMessageAttachment(TypedDict, total=False):
@@ -202,12 +207,14 @@ class ThreadMessage(TypedDict, total=False):
     attachments: Optional[Iterable[ThreadMessageAttachment]]
     """A list of files attached to the message, and the tools they should be added to."""
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
@@ -220,7 +227,46 @@ class ThreadToolResourcesCodeInterpreter(TypedDict, total=False):
     """
 
 
+class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
+
+
+class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
+
+
+class ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
+
+
+ThreadToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
+    ThreadToolResourcesFileSearchVectorStoreChunkingStrategyAuto,
+    ThreadToolResourcesFileSearchVectorStoreChunkingStrategyStatic,
+]
+
+
 class ThreadToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    chunking_strategy: ThreadToolResourcesFileSearchVectorStoreChunkingStrategy
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy.
+    """
+
     file_ids: List[str]
     """
     A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
@@ -228,12 +274,14 @@ class ThreadToolResourcesFileSearchVectorStore(TypedDict, total=False):
     store.
     """
 
-    metadata: object
-    """Set of 16 key-value pairs that can be attached to a vector store.
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
 
-    This can be useful for storing additional information about the vector store in
-    a structured format. Keys can be a maximum of 64 characters long and values can
-    be a maxium of 512 characters long.
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
@@ -268,12 +316,14 @@ class Thread(TypedDict, total=False):
     start the thread with.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     tool_resources: Optional[ThreadToolResources]
@@ -310,7 +360,7 @@ class ToolResources(TypedDict, total=False):
     file_search: ToolResourcesFileSearch
 
 
-Tool = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
+Tool: TypeAlias = Union[CodeInterpreterToolParam, FileSearchToolParam, FunctionToolParam]
 
 
 class TruncationStrategy(TypedDict, total=False):
@@ -330,7 +380,7 @@ class TruncationStrategy(TypedDict, total=False):
     """
 
 
-class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
+class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/beta/thread_create_params.py b/src/openai/types/beta/thread_create_params.py
index ccf50d58dc..ec1ccf19a6 100644
--- a/src/openai/types/beta/thread_create_params.py
+++ b/src/openai/types/beta/thread_create_params.py
@@ -3,9 +3,9 @@
 from __future__ import annotations
 
 from typing import List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from .file_search_tool_param import FileSearchToolParam
+from ..shared_params.metadata import Metadata
 from .code_interpreter_tool_param import CodeInterpreterToolParam
 from .threads.message_content_part_param import MessageContentPartParam
 
@@ -14,10 +14,15 @@
     "Message",
     "MessageAttachment",
     "MessageAttachmentTool",
+    "MessageAttachmentToolFileSearch",
     "ToolResources",
     "ToolResourcesCodeInterpreter",
     "ToolResourcesFileSearch",
     "ToolResourcesFileSearchVectorStore",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategy",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyAuto",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStatic",
+    "ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic",
 ]
 
 
@@ -28,12 +33,14 @@ class ThreadCreateParams(TypedDict, total=False):
     start the thread with.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     tool_resources: Optional[ToolResources]
@@ -45,7 +52,12 @@ class ThreadCreateParams(TypedDict, total=False):
     """
 
 
-MessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+class MessageAttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+MessageAttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, MessageAttachmentToolFileSearch]
 
 
 class MessageAttachment(TypedDict, total=False):
@@ -72,12 +84,14 @@ class Message(TypedDict, total=False):
     attachments: Optional[Iterable[MessageAttachment]]
     """A list of files attached to the message, and the tools they should be added to."""
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
@@ -90,7 +104,45 @@ class ToolResourcesCodeInterpreter(TypedDict, total=False):
     """
 
 
+class ToolResourcesFileSearchVectorStoreChunkingStrategyAuto(TypedDict, total=False):
+    type: Required[Literal["auto"]]
+    """Always `auto`."""
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
+
+
+class ToolResourcesFileSearchVectorStoreChunkingStrategyStatic(TypedDict, total=False):
+    static: Required[ToolResourcesFileSearchVectorStoreChunkingStrategyStaticStatic]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
+
+
+ToolResourcesFileSearchVectorStoreChunkingStrategy: TypeAlias = Union[
+    ToolResourcesFileSearchVectorStoreChunkingStrategyAuto, ToolResourcesFileSearchVectorStoreChunkingStrategyStatic
+]
+
+
 class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
+    chunking_strategy: ToolResourcesFileSearchVectorStoreChunkingStrategy
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy.
+    """
+
     file_ids: List[str]
     """
     A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to
@@ -98,12 +150,14 @@ class ToolResourcesFileSearchVectorStore(TypedDict, total=False):
     store.
     """
 
-    metadata: object
-    """Set of 16 key-value pairs that can be attached to a vector store.
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
 
-    This can be useful for storing additional information about the vector store in
-    a structured format. Keys can be a maximum of 64 characters long and values can
-    be a maxium of 512 characters long.
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
diff --git a/src/openai/types/beta/thread_update_params.py b/src/openai/types/beta/thread_update_params.py
index 7210ab77c9..b47ea8f3b0 100644
--- a/src/openai/types/beta/thread_update_params.py
+++ b/src/openai/types/beta/thread_update_params.py
@@ -5,16 +5,20 @@
 from typing import List, Optional
 from typing_extensions import TypedDict
 
+from ..shared_params.metadata import Metadata
+
 __all__ = ["ThreadUpdateParams", "ToolResources", "ToolResourcesCodeInterpreter", "ToolResourcesFileSearch"]
 
 
 class ThreadUpdateParams(TypedDict, total=False):
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     tool_resources: Optional[ToolResources]
diff --git a/src/openai/types/beta/threads/__init__.py b/src/openai/types/beta/threads/__init__.py
index 023d76fc13..70853177bd 100644
--- a/src/openai/types/beta/threads/__init__.py
+++ b/src/openai/types/beta/threads/__init__.py
@@ -25,11 +25,13 @@
 from .text_content_block import TextContentBlock as TextContentBlock
 from .message_delta_event import MessageDeltaEvent as MessageDeltaEvent
 from .message_list_params import MessageListParams as MessageListParams
+from .refusal_delta_block import RefusalDeltaBlock as RefusalDeltaBlock
 from .file_path_annotation import FilePathAnnotation as FilePathAnnotation
 from .image_url_delta_block import ImageURLDeltaBlock as ImageURLDeltaBlock
 from .message_content_delta import MessageContentDelta as MessageContentDelta
 from .message_create_params import MessageCreateParams as MessageCreateParams
 from .message_update_params import MessageUpdateParams as MessageUpdateParams
+from .refusal_content_block import RefusalContentBlock as RefusalContentBlock
 from .image_file_delta_block import ImageFileDeltaBlock as ImageFileDeltaBlock
 from .image_url_content_block import ImageURLContentBlock as ImageURLContentBlock
 from .file_citation_annotation import FileCitationAnnotation as FileCitationAnnotation
diff --git a/src/openai/types/beta/threads/annotation.py b/src/openai/types/beta/threads/annotation.py
index 31e228c831..13c10abf4d 100644
--- a/src/openai/types/beta/threads/annotation.py
+++ b/src/openai/types/beta/threads/annotation.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ...._utils import PropertyInfo
 from .file_path_annotation import FilePathAnnotation
@@ -9,4 +9,4 @@
 
 __all__ = ["Annotation"]
 
-Annotation = Annotated[Union[FileCitationAnnotation, FilePathAnnotation], PropertyInfo(discriminator="type")]
+Annotation: TypeAlias = Annotated[Union[FileCitationAnnotation, FilePathAnnotation], PropertyInfo(discriminator="type")]
diff --git a/src/openai/types/beta/threads/annotation_delta.py b/src/openai/types/beta/threads/annotation_delta.py
index 912429672f..c7c6c89837 100644
--- a/src/openai/types/beta/threads/annotation_delta.py
+++ b/src/openai/types/beta/threads/annotation_delta.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ...._utils import PropertyInfo
 from .file_path_delta_annotation import FilePathDeltaAnnotation
@@ -9,6 +9,6 @@
 
 __all__ = ["AnnotationDelta"]
 
-AnnotationDelta = Annotated[
+AnnotationDelta: TypeAlias = Annotated[
     Union[FileCitationDeltaAnnotation, FilePathDeltaAnnotation], PropertyInfo(discriminator="type")
 ]
diff --git a/src/openai/types/beta/threads/file_citation_annotation.py b/src/openai/types/beta/threads/file_citation_annotation.py
index 68571cd477..c3085aed9b 100644
--- a/src/openai/types/beta/threads/file_citation_annotation.py
+++ b/src/openai/types/beta/threads/file_citation_annotation.py
@@ -11,9 +11,6 @@ class FileCitation(BaseModel):
     file_id: str
     """The ID of the specific File the citation is from."""
 
-    quote: str
-    """The specific quote in the file."""
-
 
 class FileCitationAnnotation(BaseModel):
     end_index: int
diff --git a/src/openai/types/beta/threads/message.py b/src/openai/types/beta/threads/message.py
index ebaabdb0f5..4a05a128eb 100644
--- a/src/openai/types/beta/threads/message.py
+++ b/src/openai/types/beta/threads/message.py
@@ -1,16 +1,28 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Union, Optional
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 from ...._models import BaseModel
 from .message_content import MessageContent
-from ..file_search_tool import FileSearchTool
+from ...shared.metadata import Metadata
 from ..code_interpreter_tool import CodeInterpreterTool
 
-__all__ = ["Message", "Attachment", "AttachmentTool", "IncompleteDetails"]
+__all__ = [
+    "Message",
+    "Attachment",
+    "AttachmentTool",
+    "AttachmentToolAssistantToolsFileSearchTypeOnly",
+    "IncompleteDetails",
+]
 
-AttachmentTool = Union[CodeInterpreterTool, FileSearchTool]
+
+class AttachmentToolAssistantToolsFileSearchTypeOnly(BaseModel):
+    type: Literal["file_search"]
+    """The type of tool being defined: `file_search`"""
+
+
+AttachmentTool: TypeAlias = Union[CodeInterpreterTool, AttachmentToolAssistantToolsFileSearchTypeOnly]
 
 
 class Attachment(BaseModel):
@@ -55,12 +67,14 @@ class Message(BaseModel):
     incomplete_details: Optional[IncompleteDetails] = None
     """On an incomplete message, details about why the message is incomplete."""
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     object: Literal["thread.message"]
diff --git a/src/openai/types/beta/threads/message_content.py b/src/openai/types/beta/threads/message_content.py
index 4f17d14786..b313d35af6 100644
--- a/src/openai/types/beta/threads/message_content.py
+++ b/src/openai/types/beta/threads/message_content.py
@@ -1,15 +1,17 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ...._utils import PropertyInfo
 from .text_content_block import TextContentBlock
+from .refusal_content_block import RefusalContentBlock
 from .image_url_content_block import ImageURLContentBlock
 from .image_file_content_block import ImageFileContentBlock
 
 __all__ = ["MessageContent"]
 
-MessageContent = Annotated[
-    Union[ImageFileContentBlock, ImageURLContentBlock, TextContentBlock], PropertyInfo(discriminator="type")
+MessageContent: TypeAlias = Annotated[
+    Union[ImageFileContentBlock, ImageURLContentBlock, TextContentBlock, RefusalContentBlock],
+    PropertyInfo(discriminator="type"),
 ]
diff --git a/src/openai/types/beta/threads/message_content_delta.py b/src/openai/types/beta/threads/message_content_delta.py
index 6c5f732b12..b6e7dfa45a 100644
--- a/src/openai/types/beta/threads/message_content_delta.py
+++ b/src/openai/types/beta/threads/message_content_delta.py
@@ -1,15 +1,17 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ...._utils import PropertyInfo
 from .text_delta_block import TextDeltaBlock
+from .refusal_delta_block import RefusalDeltaBlock
 from .image_url_delta_block import ImageURLDeltaBlock
 from .image_file_delta_block import ImageFileDeltaBlock
 
 __all__ = ["MessageContentDelta"]
 
-MessageContentDelta = Annotated[
-    Union[ImageFileDeltaBlock, TextDeltaBlock, ImageURLDeltaBlock], PropertyInfo(discriminator="type")
+MessageContentDelta: TypeAlias = Annotated[
+    Union[ImageFileDeltaBlock, TextDeltaBlock, RefusalDeltaBlock, ImageURLDeltaBlock],
+    PropertyInfo(discriminator="type"),
 ]
diff --git a/src/openai/types/beta/threads/message_content_part_param.py b/src/openai/types/beta/threads/message_content_part_param.py
index d11442a3a9..dc09a01c27 100644
--- a/src/openai/types/beta/threads/message_content_part_param.py
+++ b/src/openai/types/beta/threads/message_content_part_param.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 from typing import Union
+from typing_extensions import TypeAlias
 
 from .text_content_block_param import TextContentBlockParam
 from .image_url_content_block_param import ImageURLContentBlockParam
@@ -10,4 +11,4 @@
 
 __all__ = ["MessageContentPartParam"]
 
-MessageContentPartParam = Union[ImageFileContentBlockParam, ImageURLContentBlockParam, TextContentBlockParam]
+MessageContentPartParam: TypeAlias = Union[ImageFileContentBlockParam, ImageURLContentBlockParam, TextContentBlockParam]
diff --git a/src/openai/types/beta/threads/message_create_params.py b/src/openai/types/beta/threads/message_create_params.py
index 3668df950d..b52386824a 100644
--- a/src/openai/types/beta/threads/message_create_params.py
+++ b/src/openai/types/beta/threads/message_create_params.py
@@ -3,13 +3,13 @@
 from __future__ import annotations
 
 from typing import Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ..file_search_tool_param import FileSearchToolParam
+from ...shared_params.metadata import Metadata
 from .message_content_part_param import MessageContentPartParam
 from ..code_interpreter_tool_param import CodeInterpreterToolParam
 
-__all__ = ["MessageCreateParams", "Attachment", "AttachmentTool"]
+__all__ = ["MessageCreateParams", "Attachment", "AttachmentTool", "AttachmentToolFileSearch"]
 
 
 class MessageCreateParams(TypedDict, total=False):
@@ -28,16 +28,23 @@ class MessageCreateParams(TypedDict, total=False):
     attachments: Optional[Iterable[Attachment]]
     """A list of files attached to the message, and the tools they should be added to."""
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
-AttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+class AttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+AttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, AttachmentToolFileSearch]
 
 
 class Attachment(TypedDict, total=False):
diff --git a/src/openai/types/beta/threads/message_list_params.py b/src/openai/types/beta/threads/message_list_params.py
index 18c2442fb5..a7c22a66fb 100644
--- a/src/openai/types/beta/threads/message_list_params.py
+++ b/src/openai/types/beta/threads/message_list_params.py
@@ -21,7 +21,7 @@ class MessageListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/beta/threads/message_update_params.py b/src/openai/types/beta/threads/message_update_params.py
index 7000f33122..bb078281e6 100644
--- a/src/openai/types/beta/threads/message_update_params.py
+++ b/src/openai/types/beta/threads/message_update_params.py
@@ -5,16 +5,20 @@
 from typing import Optional
 from typing_extensions import Required, TypedDict
 
+from ...shared_params.metadata import Metadata
+
 __all__ = ["MessageUpdateParams"]
 
 
 class MessageUpdateParams(TypedDict, total=False):
     thread_id: Required[str]
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
diff --git a/src/openai/types/beta/threads/refusal_content_block.py b/src/openai/types/beta/threads/refusal_content_block.py
new file mode 100644
index 0000000000..d54f948554
--- /dev/null
+++ b/src/openai/types/beta/threads/refusal_content_block.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RefusalContentBlock"]
+
+
+class RefusalContentBlock(BaseModel):
+    refusal: str
+
+    type: Literal["refusal"]
+    """Always `refusal`."""
diff --git a/src/openai/types/beta/threads/refusal_delta_block.py b/src/openai/types/beta/threads/refusal_delta_block.py
new file mode 100644
index 0000000000..dbd8e62697
--- /dev/null
+++ b/src/openai/types/beta/threads/refusal_delta_block.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ...._models import BaseModel
+
+__all__ = ["RefusalDeltaBlock"]
+
+
+class RefusalDeltaBlock(BaseModel):
+    index: int
+    """The index of the refusal part in the message."""
+
+    type: Literal["refusal"]
+    """Always `refusal`."""
+
+    refusal: Optional[str] = None
diff --git a/src/openai/types/beta/threads/run.py b/src/openai/types/beta/threads/run.py
index 8244ffd598..da9418d6f9 100644
--- a/src/openai/types/beta/threads/run.py
+++ b/src/openai/types/beta/threads/run.py
@@ -6,6 +6,7 @@
 from ...._models import BaseModel
 from .run_status import RunStatus
 from ..assistant_tool import AssistantTool
+from ...shared.metadata import Metadata
 from ..assistant_tool_choice_option import AssistantToolChoiceOption
 from ..assistant_response_format_option import AssistantResponseFormatOption
 from .required_action_function_tool_call import RequiredActionFunctionToolCall
@@ -133,12 +134,14 @@ class Run(BaseModel):
     of the run.
     """
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     model: str
@@ -151,6 +154,13 @@ class Run(BaseModel):
     object: Literal["thread.run"]
     """The object type, which is always `thread.run`."""
 
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
     required_action: Optional[RequiredAction] = None
     """Details on the action required to continue the run.
 
@@ -160,11 +170,16 @@ class Run(BaseModel):
     response_format: Optional[AssistantResponseFormatOption] = None
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
diff --git a/src/openai/types/beta/threads/run_create_params.py b/src/openai/types/beta/threads/run_create_params.py
index 90c9708596..fc70227862 100644
--- a/src/openai/types/beta/threads/run_create_params.py
+++ b/src/openai/types/beta/threads/run_create_params.py
@@ -2,11 +2,14 @@
 
 from __future__ import annotations
 
-from typing import Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
+from ...shared.chat_model import ChatModel
 from ..assistant_tool_param import AssistantToolParam
-from ..file_search_tool_param import FileSearchToolParam
+from .runs.run_step_include import RunStepInclude
+from ...shared_params.metadata import Metadata
+from ...shared.reasoning_effort import ReasoningEffort
 from .message_content_part_param import MessageContentPartParam
 from ..code_interpreter_tool_param import CodeInterpreterToolParam
 from ..assistant_tool_choice_option_param import AssistantToolChoiceOptionParam
@@ -17,6 +20,7 @@
     "AdditionalMessage",
     "AdditionalMessageAttachment",
     "AdditionalMessageAttachmentTool",
+    "AdditionalMessageAttachmentToolFileSearch",
     "TruncationStrategy",
     "RunCreateParamsNonStreaming",
     "RunCreateParamsStreaming",
@@ -31,6 +35,18 @@ class RunCreateParamsBase(TypedDict, total=False):
     execute this run.
     """
 
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
     additional_instructions: Optional[str]
     """Appends additional instructions at the end of the instructions for the run.
 
@@ -66,40 +82,17 @@ class RunCreateParamsBase(TypedDict, total=False):
     `incomplete_details` for more info.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    model: Union[
-        str,
-        Literal[
-            "gpt-4o",
-            "gpt-4o-2024-05-13",
-            "gpt-4-turbo",
-            "gpt-4-turbo-2024-04-09",
-            "gpt-4-0125-preview",
-            "gpt-4-turbo-preview",
-            "gpt-4-1106-preview",
-            "gpt-4-vision-preview",
-            "gpt-4",
-            "gpt-4-0314",
-            "gpt-4-0613",
-            "gpt-4-32k",
-            "gpt-4-32k-0314",
-            "gpt-4-32k-0613",
-            "gpt-3.5-turbo",
-            "gpt-3.5-turbo-16k",
-            "gpt-3.5-turbo-0613",
-            "gpt-3.5-turbo-1106",
-            "gpt-3.5-turbo-0125",
-            "gpt-3.5-turbo-16k-0613",
-        ],
-        None,
-    ]
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Union[str, ChatModel, None]
     """
     The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
     be used to execute this run. If a value is provided here, it will override the
@@ -107,14 +100,35 @@ class RunCreateParamsBase(TypedDict, total=False):
     assistant will be used.
     """
 
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
+    reasoning_effort: Optional[ReasoningEffort]
+    """**o-series models only**
+
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
+    """
+
     response_format: Optional[AssistantResponseFormatOptionParam]
     """Specifies the format that the model must output.
 
-    Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o),
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4),
+    Compatible with [GPT-4o](https://platform.openai.com/docs/models#gpt-4o),
+    [GPT-4 Turbo](https://platform.openai.com/docs/models#gpt-4-turbo-and-gpt-4),
     and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
 
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    Setting to `{ "type": "json_object" }` enables JSON mode, which ensures the
     message the model generates is valid JSON.
 
     **Important:** when using JSON mode, you **must** also instruct the model to
@@ -166,7 +180,12 @@ class RunCreateParamsBase(TypedDict, total=False):
     """
 
 
-AdditionalMessageAttachmentTool = Union[CodeInterpreterToolParam, FileSearchToolParam]
+class AdditionalMessageAttachmentToolFileSearch(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of tool being defined: `file_search`"""
+
+
+AdditionalMessageAttachmentTool: TypeAlias = Union[CodeInterpreterToolParam, AdditionalMessageAttachmentToolFileSearch]
 
 
 class AdditionalMessageAttachment(TypedDict, total=False):
@@ -193,12 +212,14 @@ class AdditionalMessage(TypedDict, total=False):
     attachments: Optional[Iterable[AdditionalMessageAttachment]]
     """A list of files attached to the message, and the tools they should be added to."""
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
 
@@ -219,7 +240,7 @@ class TruncationStrategy(TypedDict, total=False):
     """
 
 
-class RunCreateParamsNonStreaming(RunCreateParamsBase):
+class RunCreateParamsNonStreaming(RunCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/beta/threads/run_list_params.py b/src/openai/types/beta/threads/run_list_params.py
index 1e32bca4b4..fbea54f6f2 100644
--- a/src/openai/types/beta/threads/run_list_params.py
+++ b/src/openai/types/beta/threads/run_list_params.py
@@ -21,7 +21,7 @@ class RunListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/beta/threads/run_status.py b/src/openai/types/beta/threads/run_status.py
index 6666d00e5a..47c7cbd007 100644
--- a/src/openai/types/beta/threads/run_status.py
+++ b/src/openai/types/beta/threads/run_status.py
@@ -1,10 +1,10 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 __all__ = ["RunStatus"]
 
-RunStatus = Literal[
+RunStatus: TypeAlias = Literal[
     "queued",
     "in_progress",
     "requires_action",
diff --git a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
index ccb5e5e97e..147728603a 100644
--- a/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
+++ b/src/openai/types/beta/threads/run_submit_tool_outputs_params.py
@@ -31,7 +31,7 @@ class ToolOutput(TypedDict, total=False):
     """
 
 
-class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase):
+class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase, total=False):
     stream: Optional[Literal[False]]
     """
     If `true`, returns a stream of events that happen during the Run as server-sent
diff --git a/src/openai/types/beta/threads/run_update_params.py b/src/openai/types/beta/threads/run_update_params.py
index e595eac882..fbcbd3fb14 100644
--- a/src/openai/types/beta/threads/run_update_params.py
+++ b/src/openai/types/beta/threads/run_update_params.py
@@ -5,16 +5,20 @@
 from typing import Optional
 from typing_extensions import Required, TypedDict
 
+from ...shared_params.metadata import Metadata
+
 __all__ = ["RunUpdateParams"]
 
 
 class RunUpdateParams(TypedDict, total=False):
     thread_id: Required[str]
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
diff --git a/src/openai/types/beta/threads/runs/__init__.py b/src/openai/types/beta/threads/runs/__init__.py
index a312ce3df2..467d5d793d 100644
--- a/src/openai/types/beta/threads/runs/__init__.py
+++ b/src/openai/types/beta/threads/runs/__init__.py
@@ -6,9 +6,11 @@
 from .tool_call import ToolCall as ToolCall
 from .run_step_delta import RunStepDelta as RunStepDelta
 from .tool_call_delta import ToolCallDelta as ToolCallDelta
+from .run_step_include import RunStepInclude as RunStepInclude
 from .step_list_params import StepListParams as StepListParams
 from .function_tool_call import FunctionToolCall as FunctionToolCall
 from .run_step_delta_event import RunStepDeltaEvent as RunStepDeltaEvent
+from .step_retrieve_params import StepRetrieveParams as StepRetrieveParams
 from .code_interpreter_logs import CodeInterpreterLogs as CodeInterpreterLogs
 from .file_search_tool_call import FileSearchToolCall as FileSearchToolCall
 from .tool_call_delta_object import ToolCallDeltaObject as ToolCallDeltaObject
diff --git a/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py b/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
index 2f07243684..e7df4e19c4 100644
--- a/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
+++ b/src/openai/types/beta/threads/runs/code_interpreter_tool_call.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Union
-from typing_extensions import Literal, Annotated
+from typing_extensions import Literal, Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from ....._models import BaseModel
@@ -39,7 +39,7 @@ class CodeInterpreterOutputImage(BaseModel):
     """Always `image`."""
 
 
-CodeInterpreterOutput = Annotated[
+CodeInterpreterOutput: TypeAlias = Annotated[
     Union[CodeInterpreterOutputLogs, CodeInterpreterOutputImage], PropertyInfo(discriminator="type")
 ]
 
diff --git a/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py b/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
index eff76355b3..9d7a1563cd 100644
--- a/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
+++ b/src/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import List, Union, Optional
-from typing_extensions import Literal, Annotated
+from typing_extensions import Literal, Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from ....._models import BaseModel
@@ -10,7 +10,7 @@
 
 __all__ = ["CodeInterpreterToolCallDelta", "CodeInterpreter", "CodeInterpreterOutput"]
 
-CodeInterpreterOutput = Annotated[
+CodeInterpreterOutput: TypeAlias = Annotated[
     Union[CodeInterpreterLogs, CodeInterpreterOutputImage], PropertyInfo(discriminator="type")
 ]
 
diff --git a/src/openai/types/beta/threads/runs/file_search_tool_call.py b/src/openai/types/beta/threads/runs/file_search_tool_call.py
index 57c0ca9a90..a2068daad1 100644
--- a/src/openai/types/beta/threads/runs/file_search_tool_call.py
+++ b/src/openai/types/beta/threads/runs/file_search_tool_call.py
@@ -1,17 +1,74 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import List, Optional
 from typing_extensions import Literal
 
 from ....._models import BaseModel
 
-__all__ = ["FileSearchToolCall"]
+__all__ = [
+    "FileSearchToolCall",
+    "FileSearch",
+    "FileSearchRankingOptions",
+    "FileSearchResult",
+    "FileSearchResultContent",
+]
+
+
+class FileSearchRankingOptions(BaseModel):
+    ranker: Literal["auto", "default_2024_08_21"]
+    """The ranker to use for the file search.
+
+    If not specified will use the `auto` ranker.
+    """
+
+    score_threshold: float
+    """The score threshold for the file search.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+
+class FileSearchResultContent(BaseModel):
+    text: Optional[str] = None
+    """The text content of the file."""
+
+    type: Optional[Literal["text"]] = None
+    """The type of the content."""
+
+
+class FileSearchResult(BaseModel):
+    file_id: str
+    """The ID of the file that result was found in."""
+
+    file_name: str
+    """The name of the file that result was found in."""
+
+    score: float
+    """The score of the result.
+
+    All values must be a floating point number between 0 and 1.
+    """
+
+    content: Optional[List[FileSearchResultContent]] = None
+    """The content of the result that was found.
+
+    The content is only included if requested via the include query parameter.
+    """
+
+
+class FileSearch(BaseModel):
+    ranking_options: Optional[FileSearchRankingOptions] = None
+    """The ranking options for the file search."""
+
+    results: Optional[List[FileSearchResult]] = None
+    """The results of the file search."""
 
 
 class FileSearchToolCall(BaseModel):
     id: str
     """The ID of the tool call object."""
 
-    file_search: object
+    file_search: FileSearch
     """For now, this is always going to be an empty object."""
 
     type: Literal["file_search"]
diff --git a/src/openai/types/beta/threads/runs/run_step.py b/src/openai/types/beta/threads/runs/run_step.py
index 7c81dcac2b..b5f380c7b1 100644
--- a/src/openai/types/beta/threads/runs/run_step.py
+++ b/src/openai/types/beta/threads/runs/run_step.py
@@ -1,10 +1,11 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union, Optional
-from typing_extensions import Literal, Annotated
+from typing_extensions import Literal, Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from ....._models import BaseModel
+from ....shared.metadata import Metadata
 from .tool_calls_step_details import ToolCallsStepDetails
 from .message_creation_step_details import MessageCreationStepDetails
 
@@ -19,7 +20,9 @@ class LastError(BaseModel):
     """A human-readable description of the error."""
 
 
-StepDetails = Annotated[Union[MessageCreationStepDetails, ToolCallsStepDetails], PropertyInfo(discriminator="type")]
+StepDetails: TypeAlias = Annotated[
+    Union[MessageCreationStepDetails, ToolCallsStepDetails], PropertyInfo(discriminator="type")
+]
 
 
 class Usage(BaseModel):
@@ -68,12 +71,14 @@ class RunStep(BaseModel):
     Will be `null` if there are no errors.
     """
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     object: Literal["thread.run.step"]
diff --git a/src/openai/types/beta/threads/runs/run_step_delta.py b/src/openai/types/beta/threads/runs/run_step_delta.py
index d6b4aefeb9..1139088fb4 100644
--- a/src/openai/types/beta/threads/runs/run_step_delta.py
+++ b/src/openai/types/beta/threads/runs/run_step_delta.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union, Optional
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from ....._models import BaseModel
@@ -10,7 +10,9 @@
 
 __all__ = ["RunStepDelta", "StepDetails"]
 
-StepDetails = Annotated[Union[RunStepDeltaMessageDelta, ToolCallDeltaObject], PropertyInfo(discriminator="type")]
+StepDetails: TypeAlias = Annotated[
+    Union[RunStepDeltaMessageDelta, ToolCallDeltaObject], PropertyInfo(discriminator="type")
+]
 
 
 class RunStepDelta(BaseModel):
diff --git a/src/openai/types/beta/threads/runs/run_step_include.py b/src/openai/types/beta/threads/runs/run_step_include.py
new file mode 100644
index 0000000000..8e76c1b716
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/run_step_include.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["RunStepInclude"]
+
+RunStepInclude: TypeAlias = Literal["step_details.tool_calls[*].file_search.results[*].content"]
diff --git a/src/openai/types/beta/threads/runs/step_list_params.py b/src/openai/types/beta/threads/runs/step_list_params.py
index 606d444539..a6be771d9f 100644
--- a/src/openai/types/beta/threads/runs/step_list_params.py
+++ b/src/openai/types/beta/threads/runs/step_list_params.py
@@ -2,8 +2,11 @@
 
 from __future__ import annotations
 
+from typing import List
 from typing_extensions import Literal, Required, TypedDict
 
+from .run_step_include import RunStepInclude
+
 __all__ = ["StepListParams"]
 
 
@@ -23,11 +26,23 @@ class StepListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
 
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
+
     limit: int
     """A limit on the number of objects to be returned.
 
diff --git a/src/openai/types/beta/threads/runs/step_retrieve_params.py b/src/openai/types/beta/threads/runs/step_retrieve_params.py
new file mode 100644
index 0000000000..ecbb72edbd
--- /dev/null
+++ b/src/openai/types/beta/threads/runs/step_retrieve_params.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+from .run_step_include import RunStepInclude
+
+__all__ = ["StepRetrieveParams"]
+
+
+class StepRetrieveParams(TypedDict, total=False):
+    thread_id: Required[str]
+
+    run_id: Required[str]
+
+    include: List[RunStepInclude]
+    """A list of additional fields to include in the response.
+
+    Currently the only supported value is
+    `step_details.tool_calls[*].file_search.results[*].content` to fetch the file
+    search result content.
+
+    See the
+    [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings)
+    for more information.
+    """
diff --git a/src/openai/types/beta/threads/runs/tool_call.py b/src/openai/types/beta/threads/runs/tool_call.py
index 77d86b46d9..565e3109be 100644
--- a/src/openai/types/beta/threads/runs/tool_call.py
+++ b/src/openai/types/beta/threads/runs/tool_call.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from .function_tool_call import FunctionToolCall
@@ -10,6 +10,6 @@
 
 __all__ = ["ToolCall"]
 
-ToolCall = Annotated[
+ToolCall: TypeAlias = Annotated[
     Union[CodeInterpreterToolCall, FileSearchToolCall, FunctionToolCall], PropertyInfo(discriminator="type")
 ]
diff --git a/src/openai/types/beta/threads/runs/tool_call_delta.py b/src/openai/types/beta/threads/runs/tool_call_delta.py
index 90cfe0657e..f0b8070c97 100644
--- a/src/openai/types/beta/threads/runs/tool_call_delta.py
+++ b/src/openai/types/beta/threads/runs/tool_call_delta.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Union
-from typing_extensions import Annotated
+from typing_extensions import Annotated, TypeAlias
 
 from ....._utils import PropertyInfo
 from .function_tool_call_delta import FunctionToolCallDelta
@@ -10,7 +10,7 @@
 
 __all__ = ["ToolCallDelta"]
 
-ToolCallDelta = Annotated[
+ToolCallDelta: TypeAlias = Annotated[
     Union[CodeInterpreterToolCallDelta, FileSearchToolCallDelta, FunctionToolCallDelta],
     PropertyInfo(discriminator="type"),
 ]
diff --git a/src/openai/types/beta/vector_stores/file_batch_create_params.py b/src/openai/types/beta/vector_stores/file_batch_create_params.py
deleted file mode 100644
index 0882829732..0000000000
--- a/src/openai/types/beta/vector_stores/file_batch_create_params.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List
-from typing_extensions import Required, TypedDict
-
-__all__ = ["FileBatchCreateParams"]
-
-
-class FileBatchCreateParams(TypedDict, total=False):
-    file_ids: Required[List[str]]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-    the vector store should use. Useful for tools like `file_search` that can access
-    files.
-    """
diff --git a/src/openai/types/beta/vector_stores/file_create_params.py b/src/openai/types/beta/vector_stores/file_create_params.py
deleted file mode 100644
index 2fee588abf..0000000000
--- a/src/openai/types/beta/vector_stores/file_create_params.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["FileCreateParams"]
-
-
-class FileCreateParams(TypedDict, total=False):
-    file_id: Required[str]
-    """
-    A [File](https://platform.openai.com/docs/api-reference/files) ID that the
-    vector store should use. Useful for tools like `file_search` that can access
-    files.
-    """
diff --git a/src/openai/types/chat/__init__.py b/src/openai/types/chat/__init__.py
index 0ba812ff9b..e34e2a4177 100644
--- a/src/openai/types/chat/__init__.py
+++ b/src/openai/types/chat/__init__.py
@@ -4,12 +4,20 @@
 
 from .chat_completion import ChatCompletion as ChatCompletion
 from .chat_completion_role import ChatCompletionRole as ChatCompletionRole
+from .chat_completion_audio import ChatCompletionAudio as ChatCompletionAudio
 from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
+from .completion_list_params import CompletionListParams as CompletionListParams
+from .chat_completion_deleted import ChatCompletionDeleted as ChatCompletionDeleted
 from .chat_completion_message import ChatCompletionMessage as ChatCompletionMessage
+from .chat_completion_modality import ChatCompletionModality as ChatCompletionModality
 from .completion_create_params import CompletionCreateParams as CompletionCreateParams
+from .completion_update_params import CompletionUpdateParams as CompletionUpdateParams
 from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam
+from .chat_completion_audio_param import ChatCompletionAudioParam as ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam
+from .chat_completion_store_message import ChatCompletionStoreMessage as ChatCompletionStoreMessage
 from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
+from .chat_completion_reasoning_effort import ChatCompletionReasoningEffort as ChatCompletionReasoningEffort
 from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall
 from .chat_completion_content_part_param import ChatCompletionContentPartParam as ChatCompletionContentPartParam
 from .chat_completion_tool_message_param import ChatCompletionToolMessageParam as ChatCompletionToolMessageParam
@@ -25,6 +33,9 @@
 from .chat_completion_content_part_text_param import (
     ChatCompletionContentPartTextParam as ChatCompletionContentPartTextParam,
 )
+from .chat_completion_developer_message_param import (
+    ChatCompletionDeveloperMessageParam as ChatCompletionDeveloperMessageParam,
+)
 from .chat_completion_message_tool_call_param import (
     ChatCompletionMessageToolCallParam as ChatCompletionMessageToolCallParam,
 )
@@ -34,9 +45,18 @@
 from .chat_completion_content_part_image_param import (
     ChatCompletionContentPartImageParam as ChatCompletionContentPartImageParam,
 )
+from .chat_completion_prediction_content_param import (
+    ChatCompletionPredictionContentParam as ChatCompletionPredictionContentParam,
+)
 from .chat_completion_tool_choice_option_param import (
     ChatCompletionToolChoiceOptionParam as ChatCompletionToolChoiceOptionParam,
 )
+from .chat_completion_content_part_refusal_param import (
+    ChatCompletionContentPartRefusalParam as ChatCompletionContentPartRefusalParam,
+)
 from .chat_completion_function_call_option_param import (
     ChatCompletionFunctionCallOptionParam as ChatCompletionFunctionCallOptionParam,
 )
+from .chat_completion_content_part_input_audio_param import (
+    ChatCompletionContentPartInputAudioParam as ChatCompletionContentPartInputAudioParam,
+)
diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py
index 61a94a258e..cb812a2702 100644
--- a/src/openai/types/chat/chat_completion.py
+++ b/src/openai/types/chat/chat_completion.py
@@ -15,6 +15,9 @@ class ChoiceLogprobs(BaseModel):
     content: Optional[List[ChatCompletionTokenLogprob]] = None
     """A list of message content tokens with log probability information."""
 
+    refusal: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message refusal tokens with log probability information."""
+
 
 class Choice(BaseModel):
     finish_reason: Literal["stop", "length", "tool_calls", "content_filter", "function_call"]
@@ -56,6 +59,9 @@ class ChatCompletion(BaseModel):
     object: Literal["chat.completion"]
     """The object type, which is always `chat.completion`."""
 
+    service_tier: Optional[Literal["scale", "default"]] = None
+    """The service tier used for processing the request."""
+
     system_fingerprint: Optional[str] = None
     """This fingerprint represents the backend configuration that the model runs with.
 
diff --git a/src/openai/types/chat/chat_completion_assistant_message_param.py b/src/openai/types/chat/chat_completion_assistant_message_param.py
index e1e399486e..35e3a3d784 100644
--- a/src/openai/types/chat/chat_completion_assistant_message_param.py
+++ b/src/openai/types/chat/chat_completion_assistant_message_param.py
@@ -2,12 +2,22 @@
 
 from __future__ import annotations
 
-from typing import Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing import Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
 from .chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam
+from .chat_completion_content_part_refusal_param import ChatCompletionContentPartRefusalParam
 
-__all__ = ["ChatCompletionAssistantMessageParam", "FunctionCall"]
+__all__ = ["ChatCompletionAssistantMessageParam", "Audio", "ContentArrayOfContentPart", "FunctionCall"]
+
+
+class Audio(TypedDict, total=False):
+    id: Required[str]
+    """Unique identifier for a previous audio response from the model."""
+
+
+ContentArrayOfContentPart: TypeAlias = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartRefusalParam]
 
 
 class FunctionCall(TypedDict, total=False):
@@ -27,13 +37,19 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False):
     role: Required[Literal["assistant"]]
     """The role of the messages author, in this case `assistant`."""
 
-    content: Optional[str]
+    audio: Optional[Audio]
+    """Data about a previous audio response from the model.
+
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
+    content: Union[str, Iterable[ContentArrayOfContentPart], None]
     """The contents of the assistant message.
 
     Required unless `tool_calls` or `function_call` is specified.
     """
 
-    function_call: FunctionCall
+    function_call: Optional[FunctionCall]
     """Deprecated and replaced by `tool_calls`.
 
     The name and arguments of a function that should be called, as generated by the
@@ -47,5 +63,8 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False):
     role.
     """
 
+    refusal: Optional[str]
+    """The refusal message by the assistant."""
+
     tool_calls: Iterable[ChatCompletionMessageToolCallParam]
     """The tool calls generated by the model, such as function calls."""
diff --git a/src/openai/types/chat/chat_completion_audio.py b/src/openai/types/chat/chat_completion_audio.py
new file mode 100644
index 0000000000..dd15508ebb
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_audio.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionAudio"]
+
+
+class ChatCompletionAudio(BaseModel):
+    id: str
+    """Unique identifier for this audio response."""
+
+    data: str
+    """
+    Base64 encoded audio bytes generated by the model, in the format specified in
+    the request.
+    """
+
+    expires_at: int
+    """
+    The Unix timestamp (in seconds) for when this audio response will no longer be
+    accessible on the server for use in multi-turn conversations.
+    """
+
+    transcript: str
+    """Transcript of the audio generated by the model."""
diff --git a/src/openai/types/chat/chat_completion_audio_param.py b/src/openai/types/chat/chat_completion_audio_param.py
new file mode 100644
index 0000000000..6321417826
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_audio_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionAudioParam"]
+
+
+class ChatCompletionAudioParam(TypedDict, total=False):
+    format: Required[Literal["wav", "mp3", "flac", "opus", "pcm16"]]
+    """Specifies the output audio format.
+
+    Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
+    """
+
+    voice: Required[Literal["alloy", "ash", "ballad", "coral", "echo", "sage", "shimmer", "verse"]]
+    """The voice the model uses to respond.
+
+    Supported voices are `alloy`, `ash`, `ballad`, `coral`, `echo`, `sage`, and
+    `shimmer`.
+    """
diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py
index 084a5fcc07..31b9cb5456 100644
--- a/src/openai/types/chat/chat_completion_chunk.py
+++ b/src/openai/types/chat/chat_completion_chunk.py
@@ -67,7 +67,10 @@ class ChoiceDelta(BaseModel):
     model.
     """
 
-    role: Optional[Literal["system", "user", "assistant", "tool"]] = None
+    refusal: Optional[str] = None
+    """The refusal message generated by the model."""
+
+    role: Optional[Literal["developer", "system", "user", "assistant", "tool"]] = None
     """The role of the author of this message."""
 
     tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
@@ -77,6 +80,9 @@ class ChoiceLogprobs(BaseModel):
     content: Optional[List[ChatCompletionTokenLogprob]] = None
     """A list of message content tokens with log probability information."""
 
+    refusal: Optional[List[ChatCompletionTokenLogprob]] = None
+    """A list of message refusal tokens with log probability information."""
+
 
 class Choice(BaseModel):
     delta: ChoiceDelta
@@ -122,6 +128,9 @@ class ChatCompletionChunk(BaseModel):
     object: Literal["chat.completion.chunk"]
     """The object type, which is always `chat.completion.chunk`."""
 
+    service_tier: Optional[Literal["scale", "default"]] = None
+    """The service tier used for processing the request."""
+
     system_fingerprint: Optional[str] = None
     """
     This fingerprint represents the backend configuration that the model runs with.
@@ -133,6 +142,9 @@ class ChatCompletionChunk(BaseModel):
     """
     An optional field that will only be present when you set
     `stream_options: {"include_usage": true}` in your request. When present, it
-    contains a null value except for the last chunk which contains the token usage
-    statistics for the entire request.
+    contains a null value **except for the last chunk** which contains the token
+    usage statistics for the entire request.
+
+    **NOTE:** If the stream is interrupted or cancelled, you may not receive the
+    final usage chunk which contains the total token usage for the request.
     """
diff --git a/src/openai/types/chat/chat_completion_content_part_image_param.py b/src/openai/types/chat/chat_completion_content_part_image_param.py
index b1a186aa6d..9d407324d0 100644
--- a/src/openai/types/chat/chat_completion_content_part_image_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_image_param.py
@@ -15,7 +15,7 @@ class ImageURL(TypedDict, total=False):
     """Specifies the detail level of the image.
 
     Learn more in the
-    [Vision guide](https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding).
+    [Vision guide](https://platform.openai.com/docs/guides/vision#low-or-high-fidelity-image-understanding).
     """
 
 
diff --git a/src/openai/types/chat/chat_completion_content_part_input_audio_param.py b/src/openai/types/chat/chat_completion_content_part_input_audio_param.py
new file mode 100644
index 0000000000..0b1b1a80b1
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_content_part_input_audio_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartInputAudioParam", "InputAudio"]
+
+
+class InputAudio(TypedDict, total=False):
+    data: Required[str]
+    """Base64 encoded audio data."""
+
+    format: Required[Literal["wav", "mp3"]]
+    """The format of the encoded audio data. Currently supports "wav" and "mp3"."""
+
+
+class ChatCompletionContentPartInputAudioParam(TypedDict, total=False):
+    input_audio: Required[InputAudio]
+
+    type: Required[Literal["input_audio"]]
+    """The type of the content part. Always `input_audio`."""
diff --git a/src/openai/types/chat/chat_completion_content_part_param.py b/src/openai/types/chat/chat_completion_content_part_param.py
index f9b5f71e43..cbedc853ba 100644
--- a/src/openai/types/chat/chat_completion_content_part_param.py
+++ b/src/openai/types/chat/chat_completion_content_part_param.py
@@ -3,10 +3,39 @@
 from __future__ import annotations
 
 from typing import Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
 from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
 from .chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
+from .chat_completion_content_part_input_audio_param import ChatCompletionContentPartInputAudioParam
 
-__all__ = ["ChatCompletionContentPartParam"]
+__all__ = ["ChatCompletionContentPartParam", "File", "FileFile"]
 
-ChatCompletionContentPartParam = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam]
+
+class FileFile(TypedDict, total=False):
+    file_data: str
+    """
+    The base64 encoded file data, used when passing the file to the model as a
+    string.
+    """
+
+    file_id: str
+    """The ID of an uploaded file to use as input."""
+
+    filename: str
+    """The name of the file, used when passing the file to the model as a string."""
+
+
+class File(TypedDict, total=False):
+    file: Required[FileFile]
+
+    type: Required[Literal["file"]]
+    """The type of the content part. Always `file`."""
+
+
+ChatCompletionContentPartParam: TypeAlias = Union[
+    ChatCompletionContentPartTextParam,
+    ChatCompletionContentPartImageParam,
+    ChatCompletionContentPartInputAudioParam,
+    File,
+]
diff --git a/src/openai/types/chat/chat_completion_content_part_refusal_param.py b/src/openai/types/chat/chat_completion_content_part_refusal_param.py
new file mode 100644
index 0000000000..c18c7db770
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_content_part_refusal_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ChatCompletionContentPartRefusalParam"]
+
+
+class ChatCompletionContentPartRefusalParam(TypedDict, total=False):
+    refusal: Required[str]
+    """The refusal message generated by the model."""
+
+    type: Required[Literal["refusal"]]
+    """The type of the content part."""
diff --git a/src/openai/types/chat/chat_completion_deleted.py b/src/openai/types/chat/chat_completion_deleted.py
new file mode 100644
index 0000000000..0a541cb23d
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_deleted.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ChatCompletionDeleted"]
+
+
+class ChatCompletionDeleted(BaseModel):
+    id: str
+    """The ID of the chat completion that was deleted."""
+
+    deleted: bool
+    """Whether the chat completion was deleted."""
+
+    object: Literal["chat.completion.deleted"]
+    """The type of object being deleted."""
diff --git a/src/openai/types/chat/chat_completion_developer_message_param.py b/src/openai/types/chat/chat_completion_developer_message_param.py
new file mode 100644
index 0000000000..01e4fdb654
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_developer_message_param.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
+__all__ = ["ChatCompletionDeveloperMessageParam"]
+
+
+class ChatCompletionDeveloperMessageParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
+    """The contents of the developer message."""
+
+    role: Required[Literal["developer"]]
+    """The role of the messages author, in this case `developer`."""
+
+    name: str
+    """An optional name for the participant.
+
+    Provides the model information to differentiate between participants of the same
+    role.
+    """
diff --git a/src/openai/types/chat/chat_completion_message.py b/src/openai/types/chat/chat_completion_message.py
index 8db7d17d24..c659ac3da0 100644
--- a/src/openai/types/chat/chat_completion_message.py
+++ b/src/openai/types/chat/chat_completion_message.py
@@ -4,9 +4,32 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from .chat_completion_audio import ChatCompletionAudio
 from .chat_completion_message_tool_call import ChatCompletionMessageToolCall
 
-__all__ = ["ChatCompletionMessage", "FunctionCall"]
+__all__ = ["ChatCompletionMessage", "Annotation", "AnnotationURLCitation", "FunctionCall"]
+
+
+class AnnotationURLCitation(BaseModel):
+    end_index: int
+    """The index of the last character of the URL citation in the message."""
+
+    start_index: int
+    """The index of the first character of the URL citation in the message."""
+
+    title: str
+    """The title of the web resource."""
+
+    url: str
+    """The URL of the web resource."""
+
+
+class Annotation(BaseModel):
+    type: Literal["url_citation"]
+    """The type of the URL citation. Always `url_citation`."""
+
+    url_citation: AnnotationURLCitation
+    """A URL citation when using web search."""
 
 
 class FunctionCall(BaseModel):
@@ -26,9 +49,25 @@ class ChatCompletionMessage(BaseModel):
     content: Optional[str] = None
     """The contents of the message."""
 
+    refusal: Optional[str] = None
+    """The refusal message generated by the model."""
+
     role: Literal["assistant"]
     """The role of the author of this message."""
 
+    annotations: Optional[List[Annotation]] = None
+    """
+    Annotations for the message, when applicable, as when using the
+    [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+    """
+
+    audio: Optional[ChatCompletionAudio] = None
+    """
+    If the audio output modality is requested, this object contains data about the
+    audio response from the model.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
+    """
+
     function_call: Optional[FunctionCall] = None
     """Deprecated and replaced by `tool_calls`.
 
diff --git a/src/openai/types/chat/chat_completion_message_param.py b/src/openai/types/chat/chat_completion_message_param.py
index a3644a5310..942da24304 100644
--- a/src/openai/types/chat/chat_completion_message_param.py
+++ b/src/openai/types/chat/chat_completion_message_param.py
@@ -3,16 +3,19 @@
 from __future__ import annotations
 
 from typing import Union
+from typing_extensions import TypeAlias
 
 from .chat_completion_tool_message_param import ChatCompletionToolMessageParam
 from .chat_completion_user_message_param import ChatCompletionUserMessageParam
 from .chat_completion_system_message_param import ChatCompletionSystemMessageParam
 from .chat_completion_function_message_param import ChatCompletionFunctionMessageParam
 from .chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam
+from .chat_completion_developer_message_param import ChatCompletionDeveloperMessageParam
 
 __all__ = ["ChatCompletionMessageParam"]
 
-ChatCompletionMessageParam = Union[
+ChatCompletionMessageParam: TypeAlias = Union[
+    ChatCompletionDeveloperMessageParam,
     ChatCompletionSystemMessageParam,
     ChatCompletionUserMessageParam,
     ChatCompletionAssistantMessageParam,
diff --git a/src/openai/types/chat/chat_completion_modality.py b/src/openai/types/chat/chat_completion_modality.py
new file mode 100644
index 0000000000..8e3c145979
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_modality.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatCompletionModality"]
+
+ChatCompletionModality: TypeAlias = Literal["text", "audio"]
diff --git a/src/openai/types/chat/chat_completion_prediction_content_param.py b/src/openai/types/chat/chat_completion_prediction_content_param.py
new file mode 100644
index 0000000000..c44e6e3653
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_prediction_content_param.py
@@ -0,0 +1,25 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
+__all__ = ["ChatCompletionPredictionContentParam"]
+
+
+class ChatCompletionPredictionContentParam(TypedDict, total=False):
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
+    """
+    The content that should be matched when generating a model response. If
+    generated tokens would match this content, the entire model response can be
+    returned much more quickly.
+    """
+
+    type: Required[Literal["content"]]
+    """The type of the predicted content you want to provide.
+
+    This type is currently always `content`.
+    """
diff --git a/src/openai/types/chat/chat_completion_reasoning_effort.py b/src/openai/types/chat/chat_completion_reasoning_effort.py
new file mode 100644
index 0000000000..e4785c90bf
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_reasoning_effort.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..shared.reasoning_effort import ReasoningEffort
+
+__all__ = ["ChatCompletionReasoningEffort"]
+
+ChatCompletionReasoningEffort = ReasoningEffort
diff --git a/src/openai/types/chat/chat_completion_role.py b/src/openai/types/chat/chat_completion_role.py
index 1fd83888d3..3ec5e9ad87 100644
--- a/src/openai/types/chat/chat_completion_role.py
+++ b/src/openai/types/chat/chat_completion_role.py
@@ -1,7 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 __all__ = ["ChatCompletionRole"]
 
-ChatCompletionRole = Literal["system", "user", "assistant", "tool", "function"]
+ChatCompletionRole: TypeAlias = Literal["developer", "system", "user", "assistant", "tool", "function"]
diff --git a/src/openai/types/chat/chat_completion_store_message.py b/src/openai/types/chat/chat_completion_store_message.py
new file mode 100644
index 0000000000..95adc08af8
--- /dev/null
+++ b/src/openai/types/chat/chat_completion_store_message.py
@@ -0,0 +1,11 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .chat_completion_message import ChatCompletionMessage
+
+__all__ = ["ChatCompletionStoreMessage"]
+
+
+class ChatCompletionStoreMessage(ChatCompletionMessage):
+    id: str
+    """The identifier of the chat message."""
diff --git a/src/openai/types/chat/chat_completion_stream_options_param.py b/src/openai/types/chat/chat_completion_stream_options_param.py
index fbf7291821..471e0eba98 100644
--- a/src/openai/types/chat/chat_completion_stream_options_param.py
+++ b/src/openai/types/chat/chat_completion_stream_options_param.py
@@ -12,6 +12,9 @@ class ChatCompletionStreamOptionsParam(TypedDict, total=False):
     """If set, an additional chunk will be streamed before the `data: [DONE]` message.
 
     The `usage` field on this chunk shows the token usage statistics for the entire
-    request, and the `choices` field will always be an empty array. All other chunks
-    will also include a `usage` field, but with a null value.
+    request, and the `choices` field will always be an empty array.
+
+    All other chunks will also include a `usage` field, but with a null value.
+    **NOTE:** If the stream is interrupted, you may not receive the final usage
+    chunk which contains the total token usage for the request.
     """
diff --git a/src/openai/types/chat/chat_completion_system_message_param.py b/src/openai/types/chat/chat_completion_system_message_param.py
index 94bb3f636c..172ccea09e 100644
--- a/src/openai/types/chat/chat_completion_system_message_param.py
+++ b/src/openai/types/chat/chat_completion_system_message_param.py
@@ -2,13 +2,16 @@
 
 from __future__ import annotations
 
+from typing import Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
 __all__ = ["ChatCompletionSystemMessageParam"]
 
 
 class ChatCompletionSystemMessageParam(TypedDict, total=False):
-    content: Required[str]
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
     """The contents of the system message."""
 
     role: Required[Literal["system"]]
diff --git a/src/openai/types/chat/chat_completion_tool_choice_option_param.py b/src/openai/types/chat/chat_completion_tool_choice_option_param.py
index 1d3c2506ab..7dedf041b7 100644
--- a/src/openai/types/chat/chat_completion_tool_choice_option_param.py
+++ b/src/openai/types/chat/chat_completion_tool_choice_option_param.py
@@ -3,10 +3,12 @@
 from __future__ import annotations
 
 from typing import Union
-from typing_extensions import Literal
+from typing_extensions import Literal, TypeAlias
 
 from .chat_completion_named_tool_choice_param import ChatCompletionNamedToolChoiceParam
 
 __all__ = ["ChatCompletionToolChoiceOptionParam"]
 
-ChatCompletionToolChoiceOptionParam = Union[Literal["none", "auto", "required"], ChatCompletionNamedToolChoiceParam]
+ChatCompletionToolChoiceOptionParam: TypeAlias = Union[
+    Literal["none", "auto", "required"], ChatCompletionNamedToolChoiceParam
+]
diff --git a/src/openai/types/chat/chat_completion_tool_message_param.py b/src/openai/types/chat/chat_completion_tool_message_param.py
index 5c590e033f..eb5e270e47 100644
--- a/src/openai/types/chat/chat_completion_tool_message_param.py
+++ b/src/openai/types/chat/chat_completion_tool_message_param.py
@@ -2,13 +2,16 @@
 
 from __future__ import annotations
 
+from typing import Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
+from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
+
 __all__ = ["ChatCompletionToolMessageParam"]
 
 
 class ChatCompletionToolMessageParam(TypedDict, total=False):
-    content: Required[str]
+    content: Required[Union[str, Iterable[ChatCompletionContentPartTextParam]]]
     """The contents of the tool message."""
 
     role: Required[Literal["tool"]]
diff --git a/src/openai/types/chat/chat_completion_tool_param.py b/src/openai/types/chat/chat_completion_tool_param.py
index 0cf6ea7268..6c2b1a36f0 100644
--- a/src/openai/types/chat/chat_completion_tool_param.py
+++ b/src/openai/types/chat/chat_completion_tool_param.py
@@ -4,13 +4,13 @@
 
 from typing_extensions import Literal, Required, TypedDict
 
-from ...types import shared_params
+from ..shared_params.function_definition import FunctionDefinition
 
 __all__ = ["ChatCompletionToolParam"]
 
 
 class ChatCompletionToolParam(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
+    function: Required[FunctionDefinition]
 
     type: Required[Literal["function"]]
     """The type of the tool. Currently, only `function` is supported."""
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index 226cf15882..05103fba91 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -3,14 +3,21 @@
 from __future__ import annotations
 
 from typing import Dict, List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
 
-from ...types import shared_params
-from ..chat_model import ChatModel
+from ..shared.chat_model import ChatModel
+from ..shared_params.metadata import Metadata
+from ..shared.reasoning_effort import ReasoningEffort
 from .chat_completion_tool_param import ChatCompletionToolParam
+from .chat_completion_audio_param import ChatCompletionAudioParam
 from .chat_completion_message_param import ChatCompletionMessageParam
+from ..shared_params.function_parameters import FunctionParameters
+from ..shared_params.response_format_text import ResponseFormatText
 from .chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
+from .chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
 from .chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from ..shared_params.response_format_json_schema import ResponseFormatJSONSchema
 from .chat_completion_function_call_option_param import ChatCompletionFunctionCallOptionParam
 
 __all__ = [
@@ -18,6 +25,9 @@
     "FunctionCall",
     "Function",
     "ResponseFormat",
+    "WebSearchOptions",
+    "WebSearchOptionsUserLocation",
+    "WebSearchOptionsUserLocationApproximate",
     "CompletionCreateParamsNonStreaming",
     "CompletionCreateParamsStreaming",
 ]
@@ -27,15 +37,27 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     messages: Required[Iterable[ChatCompletionMessageParam]]
     """A list of messages comprising the conversation so far.
 
-    [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
+    Depending on the [model](https://platform.openai.com/docs/models) you use,
+    different message types (modalities) are supported, like
+    [text](https://platform.openai.com/docs/guides/text-generation),
+    [images](https://platform.openai.com/docs/guides/vision), and
+    [audio](https://platform.openai.com/docs/guides/audio).
     """
 
     model: Required[Union[str, ChatModel]]
-    """ID of the model to use.
+    """Model ID used to generate the response, like `gpt-4o` or `o1`.
 
-    See the
-    [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-    table for details on which models work with the Chat API.
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    audio: Optional[ChatCompletionAudioParam]
+    """Parameters for audio output.
+
+    Required when audio output is requested with `modalities: ["audio"]`.
+    [Learn more](https://platform.openai.com/docs/guides/audio).
     """
 
     frequency_penalty: Optional[float]
@@ -43,19 +65,21 @@ class CompletionCreateParamsBase(TypedDict, total=False):
 
     Positive values penalize new tokens based on their existing frequency in the
     text so far, decreasing the model's likelihood to repeat the same line verbatim.
-
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
     """
 
     function_call: FunctionCall
     """Deprecated in favor of `tool_choice`.
 
-    Controls which (if any) function is called by the model. `none` means the model
-    will not call a function and instead generates a message. `auto` means the model
-    can pick between generating a message or calling a function. Specifying a
-    particular function via `{"name": "my_function"}` forces the model to call that
+    Controls which (if any) function is called by the model.
+
+    `none` means the model will not call a function and instead generates a message.
+
+    `auto` means the model can pick between generating a message or calling a
     function.
 
+    Specifying a particular function via `{"name": "my_function"}` forces the model
+    to call that function.
+
     `none` is the default when no functions are present. `auto` is the default if
     functions are present.
     """
@@ -84,15 +108,46 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     `content` of `message`.
     """
 
+    max_completion_tokens: Optional[int]
+    """
+    An upper bound for the number of tokens that can be generated for a completion,
+    including visible output tokens and
+    [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    """
+
     max_tokens: Optional[int]
     """
     The maximum number of [tokens](/tokenizer) that can be generated in the chat
-    completion.
+    completion. This value can be used to control
+    [costs](https://openai.com/api/pricing/) for text generated via API.
+
+    This value is now deprecated in favor of `max_completion_tokens`, and is not
+    compatible with
+    [o1 series models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
 
-    The total length of input tokens and generated tokens is limited by the model's
-    context length.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
+    modalities: Optional[List[Literal["text", "audio"]]]
+    """
+    Output types that you would like the model to generate. Most models are capable
+    of generating text, which is the default:
+
+    `["text"]`
+
+    The `gpt-4o-audio-preview` model can also be used to
+    [generate audio](https://platform.openai.com/docs/guides/audio). To request that
+    this model generate both text and audio responses, you can use:
+
+    `["text", "audio"]`
     """
 
     n: Optional[int]
@@ -102,32 +157,46 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     of the choices. Keep `n` as `1` to minimize costs.
     """
 
+    parallel_tool_calls: bool
+    """
+    Whether to enable
+    [parallel function calling](https://platform.openai.com/docs/guides/function-calling#configuring-parallel-function-calling)
+    during tool use.
+    """
+
+    prediction: Optional[ChatCompletionPredictionContentParam]
+    """
+    Static predicted output content, such as the content of a text file that is
+    being regenerated.
+    """
+
     presence_penalty: Optional[float]
     """Number between -2.0 and 2.0.
 
     Positive values penalize new tokens based on whether they appear in the text so
     far, increasing the model's likelihood to talk about new topics.
+    """
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+    reasoning_effort: Optional[ReasoningEffort]
+    """**o-series models only**
+
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
     """
 
     response_format: ResponseFormat
     """An object specifying the format that the model must output.
 
-    Compatible with
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-    message the model generates is valid JSON.
+    Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured
+    Outputs which ensures the model will match your supplied JSON schema. Learn more
+    in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
 
-    **Important:** when using JSON mode, you **must** also instruct the model to
-    produce JSON yourself via a system or user message. Without this, the model may
-    generate an unending stream of whitespace until the generation reaches the token
-    limit, resulting in a long-running and seemingly "stuck" request. Also note that
-    the message content may be partially cut off if `finish_reason="length"`, which
-    indicates the generation exceeded `max_tokens` or the conversation exceeded the
-    max context length.
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
     """
 
     seed: Optional[int]
@@ -139,8 +208,36 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     in the backend.
     """
 
-    stop: Union[Optional[str], List[str]]
-    """Up to 4 sequences where the API will stop generating further tokens."""
+    service_tier: Optional[Literal["auto", "default"]]
+    """Specifies the latency tier to use for processing the request.
+
+    This parameter is relevant for customers subscribed to the scale tier service:
+
+    - If set to 'auto', and the Project is Scale tier enabled, the system will
+      utilize scale tier credits until they are exhausted.
+    - If set to 'auto', and the Project is not Scale tier enabled, the request will
+      be processed using the default service tier with a lower uptime SLA and no
+      latency guarentee.
+    - If set to 'default', the request will be processed using the default service
+      tier with a lower uptime SLA and no latency guarentee.
+    - When not set, the default behavior is 'auto'.
+
+    When this parameter is set, the response body will include the `service_tier`
+    utilized.
+    """
+
+    stop: Union[Optional[str], List[str], None]
+    """Up to 4 sequences where the API will stop generating further tokens.
+
+    The returned text will not contain the stop sequence.
+    """
+
+    store: Optional[bool]
+    """
+    Whether or not to store the output of this chat completion request for use in
+    our [model distillation](https://platform.openai.com/docs/guides/distillation)
+    or [evals](https://platform.openai.com/docs/guides/evals) products.
+    """
 
     stream_options: Optional[ChatCompletionStreamOptionsParam]
     """Options for streaming response. Only set this when you set `stream: true`."""
@@ -149,9 +246,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """What sampling temperature to use, between 0 and 2.
 
     Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic.
-
-    We generally recommend altering this or `top_p` but not both.
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
     """
 
     tool_choice: ChatCompletionToolChoiceOptionParam
@@ -195,11 +291,18 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
 
+    web_search_options: WebSearchOptions
+    """
+    This tool searches the web for relevant results to use in a response. Learn more
+    about the
+    [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
+    """
 
-FunctionCall = Union[Literal["none", "auto"], ChatCompletionFunctionCallOptionParam]
+
+FunctionCall: TypeAlias = Union[Literal["none", "auto"], ChatCompletionFunctionCallOptionParam]
 
 
 class Function(TypedDict, total=False):
@@ -216,12 +319,11 @@ class Function(TypedDict, total=False):
     how to call the function.
     """
 
-    parameters: shared_params.FunctionParameters
+    parameters: FunctionParameters
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the
-    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
-    for examples, and the
+    See the [guide](https://platform.openai.com/docs/guides/function-calling) for
+    examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
@@ -229,32 +331,73 @@ class Function(TypedDict, total=False):
     """
 
 
-class ResponseFormat(TypedDict, total=False):
-    type: Literal["text", "json_object"]
-    """Must be one of `text` or `json_object`."""
+ResponseFormat: TypeAlias = Union[ResponseFormatText, ResponseFormatJSONSchema, ResponseFormatJSONObject]
 
 
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
-    stream: Optional[Literal[False]]
-    """If set, partial message deltas will be sent, like in ChatGPT.
+class WebSearchOptionsUserLocationApproximate(TypedDict, total=False):
+    city: str
+    """Free text input for the city of the user, e.g. `San Francisco`."""
+
+    country: str
+    """
+    The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+    the user, e.g. `US`.
+    """
+
+    region: str
+    """Free text input for the region of the user, e.g. `California`."""
+
+    timezone: str
+    """
+    The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+    user, e.g. `America/Los_Angeles`.
+    """
+
+
+class WebSearchOptionsUserLocation(TypedDict, total=False):
+    approximate: Required[WebSearchOptionsUserLocationApproximate]
+    """Approximate location parameters for the search."""
+
+    type: Required[Literal["approximate"]]
+    """The type of location approximation. Always `approximate`."""
 
-    Tokens will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available, with the stream terminated by a `data: [DONE]`
-    message.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+
+class WebSearchOptions(TypedDict, total=False):
+    search_context_size: Literal["low", "medium", "high"]
+    """
+    High level guidance for the amount of context window space to use for the
+    search. One of `low`, `medium`, or `high`. `medium` is the default.
+    """
+
+    user_location: Optional[WebSearchOptionsUserLocation]
+    """Approximate location parameters for the search."""
+
+
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+    for more information, along with the
+    [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+    guide for more information on how to handle the streaming events.
     """
 
 
 class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
     stream: Required[Literal[True]]
-    """If set, partial message deltas will be sent, like in ChatGPT.
-
-    Tokens will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available, with the stream terminated by a `data: [DONE]`
-    message.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/chat/streaming)
+    for more information, along with the
+    [streaming responses](https://platform.openai.com/docs/guides/streaming-responses)
+    guide for more information on how to handle the streaming events.
     """
 
 
diff --git a/src/openai/types/chat/completion_list_params.py b/src/openai/types/chat/completion_list_params.py
new file mode 100644
index 0000000000..d93da834a3
--- /dev/null
+++ b/src/openai/types/chat/completion_list_params.py
@@ -0,0 +1,33 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, TypedDict
+
+from ..shared_params.metadata import Metadata
+
+__all__ = ["CompletionListParams"]
+
+
+class CompletionListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last chat completion from the previous pagination request."""
+
+    limit: int
+    """Number of Chat Completions to retrieve."""
+
+    metadata: Optional[Metadata]
+    """A list of metadata keys to filter the Chat Completions by. Example:
+
+    `metadata[key1]=value1&metadata[key2]=value2`
+    """
+
+    model: str
+    """The model used to generate the Chat Completions."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for Chat Completions by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
diff --git a/src/openai/types/chat/completion_update_params.py b/src/openai/types/chat/completion_update_params.py
new file mode 100644
index 0000000000..fc71733f07
--- /dev/null
+++ b/src/openai/types/chat/completion_update_params.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Required, TypedDict
+
+from ..shared_params.metadata import Metadata
+
+__all__ = ["CompletionUpdateParams"]
+
+
+class CompletionUpdateParams(TypedDict, total=False):
+    metadata: Required[Optional[Metadata]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
diff --git a/src/openai/types/chat/completions/__init__.py b/src/openai/types/chat/completions/__init__.py
new file mode 100644
index 0000000000..b8e62d6a64
--- /dev/null
+++ b/src/openai/types/chat/completions/__init__.py
@@ -0,0 +1,5 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .message_list_params import MessageListParams as MessageListParams
diff --git a/src/openai/types/chat/completions/message_list_params.py b/src/openai/types/chat/completions/message_list_params.py
new file mode 100644
index 0000000000..4e694e83ea
--- /dev/null
+++ b/src/openai/types/chat/completions/message_list_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["MessageListParams"]
+
+
+class MessageListParams(TypedDict, total=False):
+    after: str
+    """Identifier for the last message from the previous pagination request."""
+
+    limit: int
+    """Number of messages to retrieve."""
+
+    order: Literal["asc", "desc"]
+    """Sort order for messages by timestamp.
+
+    Use `asc` for ascending order or `desc` for descending order. Defaults to `asc`.
+    """
diff --git a/src/openai/types/chat_model.py b/src/openai/types/chat_model.py
index 0d2937ea32..9304d195d6 100644
--- a/src/openai/types/chat_model.py
+++ b/src/openai/types/chat_model.py
@@ -1,29 +1,8 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing_extensions import Literal
+
+from .shared import chat_model
 
 __all__ = ["ChatModel"]
 
-ChatModel = Literal[
-    "gpt-4o",
-    "gpt-4o-2024-05-13",
-    "gpt-4-turbo",
-    "gpt-4-turbo-2024-04-09",
-    "gpt-4-0125-preview",
-    "gpt-4-turbo-preview",
-    "gpt-4-1106-preview",
-    "gpt-4-vision-preview",
-    "gpt-4",
-    "gpt-4-0314",
-    "gpt-4-0613",
-    "gpt-4-32k",
-    "gpt-4-32k-0314",
-    "gpt-4-32k-0613",
-    "gpt-3.5-turbo",
-    "gpt-3.5-turbo-16k",
-    "gpt-3.5-turbo-0301",
-    "gpt-3.5-turbo-0613",
-    "gpt-3.5-turbo-1106",
-    "gpt-3.5-turbo-0125",
-    "gpt-3.5-turbo-16k-0613",
-]
+ChatModel = chat_model.ChatModel
diff --git a/src/openai/types/completion_create_params.py b/src/openai/types/completion_create_params.py
index 9fe22fe3c9..fdb1680d26 100644
--- a/src/openai/types/completion_create_params.py
+++ b/src/openai/types/completion_create_params.py
@@ -17,8 +17,8 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
     prompt: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]]
@@ -53,7 +53,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     Positive values penalize new tokens based on their existing frequency in the
     text so far, decreasing the model's likelihood to repeat the same line verbatim.
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
     """
 
     logit_bias: Optional[Dict[str, int]]
@@ -106,7 +106,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     Positive values penalize new tokens based on whether they appear in the text so
     far, increasing the model's likelihood to talk about new topics.
 
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation)
     """
 
     seed: Optional[int]
@@ -156,11 +156,11 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
 
 
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
+class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
     stream: Optional[Literal[False]]
     """Whether to stream back partial progress.
 
diff --git a/src/openai/types/completion_usage.py b/src/openai/types/completion_usage.py
index 0d57b96595..d8c4e84cf7 100644
--- a/src/openai/types/completion_usage.py
+++ b/src/openai/types/completion_usage.py
@@ -1,10 +1,40 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-
+from typing import Optional
 
 from .._models import BaseModel
 
-__all__ = ["CompletionUsage"]
+__all__ = ["CompletionUsage", "CompletionTokensDetails", "PromptTokensDetails"]
+
+
+class CompletionTokensDetails(BaseModel):
+    accepted_prediction_tokens: Optional[int] = None
+    """
+    When using Predicted Outputs, the number of tokens in the prediction that
+    appeared in the completion.
+    """
+
+    audio_tokens: Optional[int] = None
+    """Audio input tokens generated by the model."""
+
+    reasoning_tokens: Optional[int] = None
+    """Tokens generated by the model for reasoning."""
+
+    rejected_prediction_tokens: Optional[int] = None
+    """
+    When using Predicted Outputs, the number of tokens in the prediction that did
+    not appear in the completion. However, like reasoning tokens, these tokens are
+    still counted in the total completion tokens for purposes of billing, output,
+    and context window limits.
+    """
+
+
+class PromptTokensDetails(BaseModel):
+    audio_tokens: Optional[int] = None
+    """Audio input tokens present in the prompt."""
+
+    cached_tokens: Optional[int] = None
+    """Cached tokens present in the prompt."""
 
 
 class CompletionUsage(BaseModel):
@@ -16,3 +46,9 @@ class CompletionUsage(BaseModel):
 
     total_tokens: int
     """Total number of tokens used in the request (prompt + completion)."""
+
+    completion_tokens_details: Optional[CompletionTokensDetails] = None
+    """Breakdown of tokens used in a completion."""
+
+    prompt_tokens_details: Optional[PromptTokensDetails] = None
+    """Breakdown of tokens used in the prompt."""
diff --git a/src/openai/types/embedding_create_params.py b/src/openai/types/embedding_create_params.py
index 930b3b7914..a90566449b 100644
--- a/src/openai/types/embedding_create_params.py
+++ b/src/openai/types/embedding_create_params.py
@@ -5,6 +5,8 @@
 from typing import List, Union, Iterable
 from typing_extensions import Literal, Required, TypedDict
 
+from .embedding_model import EmbeddingModel
+
 __all__ = ["EmbeddingCreateParams"]
 
 
@@ -17,17 +19,18 @@ class EmbeddingCreateParams(TypedDict, total=False):
     (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any
     array must be 2048 dimensions or less.
     [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
+    for counting tokens. Some models may also impose a limit on total number of
+    tokens summed across inputs.
     """
 
-    model: Required[Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]]]
+    model: Required[Union[str, EmbeddingModel]]
     """ID of the model to use.
 
     You can use the
     [List models](https://platform.openai.com/docs/api-reference/models/list) API to
     see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
+    [Model overview](https://platform.openai.com/docs/models) for descriptions of
+    them.
     """
 
     dimensions: int
@@ -46,5 +49,5 @@ class EmbeddingCreateParams(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
diff --git a/src/openai/types/embedding_model.py b/src/openai/types/embedding_model.py
new file mode 100644
index 0000000000..075ff97644
--- /dev/null
+++ b/src/openai/types/embedding_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["EmbeddingModel"]
+
+EmbeddingModel: TypeAlias = Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]
diff --git a/src/openai/types/file_chunking_strategy.py b/src/openai/types/file_chunking_strategy.py
new file mode 100644
index 0000000000..ee96bd7884
--- /dev/null
+++ b/src/openai/types/file_chunking_strategy.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from .._utils import PropertyInfo
+from .other_file_chunking_strategy_object import OtherFileChunkingStrategyObject
+from .static_file_chunking_strategy_object import StaticFileChunkingStrategyObject
+
+__all__ = ["FileChunkingStrategy"]
+
+FileChunkingStrategy: TypeAlias = Annotated[
+    Union[StaticFileChunkingStrategyObject, OtherFileChunkingStrategyObject], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/file_chunking_strategy_param.py b/src/openai/types/file_chunking_strategy_param.py
new file mode 100644
index 0000000000..25d94286d8
--- /dev/null
+++ b/src/openai/types/file_chunking_strategy_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .auto_file_chunking_strategy_param import AutoFileChunkingStrategyParam
+from .static_file_chunking_strategy_object_param import StaticFileChunkingStrategyObjectParam
+
+__all__ = ["FileChunkingStrategyParam"]
+
+FileChunkingStrategyParam: TypeAlias = Union[AutoFileChunkingStrategyParam, StaticFileChunkingStrategyObjectParam]
diff --git a/src/openai/types/file_content.py b/src/openai/types/file_content.py
index b4aa08a9a3..d89eee623e 100644
--- a/src/openai/types/file_content.py
+++ b/src/openai/types/file_content.py
@@ -1,6 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing_extensions import TypeAlias
 
 __all__ = ["FileContent"]
 
-FileContent = str
+FileContent: TypeAlias = str
diff --git a/src/openai/types/file_create_params.py b/src/openai/types/file_create_params.py
index caa913d4d2..728dfd350f 100644
--- a/src/openai/types/file_create_params.py
+++ b/src/openai/types/file_create_params.py
@@ -2,9 +2,10 @@
 
 from __future__ import annotations
 
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Required, TypedDict
 
 from .._types import FileTypes
+from .file_purpose import FilePurpose
 
 __all__ = ["FileCreateParams"]
 
@@ -13,13 +14,11 @@ class FileCreateParams(TypedDict, total=False):
     file: Required[FileTypes]
     """The File object (not file name) to be uploaded."""
 
-    purpose: Required[Literal["assistants", "batch", "fine-tune"]]
+    purpose: Required[FilePurpose]
     """The intended purpose of the uploaded file.
 
-    Use "assistants" for
-    [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-    [Message](https://platform.openai.com/docs/api-reference/messages) files,
-    "vision" for Assistants image file inputs, "batch" for
-    [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for
-    [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
+    One of: - `assistants`: Used in the Assistants API - `batch`: Used in the Batch
+    API - `fine-tune`: Used for fine-tuning - `vision`: Images used for vision
+    fine-tuning - `user_data`: Flexible file type for any purpose - `evals`: Used
+    for eval data sets
     """
diff --git a/src/openai/types/file_list_params.py b/src/openai/types/file_list_params.py
index 212eca13c0..058d874c29 100644
--- a/src/openai/types/file_list_params.py
+++ b/src/openai/types/file_list_params.py
@@ -2,11 +2,32 @@
 
 from __future__ import annotations
 
-from typing_extensions import TypedDict
+from typing_extensions import Literal, TypedDict
 
 __all__ = ["FileListParams"]
 
 
 class FileListParams(TypedDict, total=False):
+    after: str
+    """A cursor for use in pagination.
+
+    `after` is an object ID that defines your place in the list. For instance, if
+    you make a list request and receive 100 objects, ending with obj_foo, your
+    subsequent call can include after=obj_foo in order to fetch the next page of the
+    list.
+    """
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 10,000, and the default is 10,000.
+    """
+
+    order: Literal["asc", "desc"]
+    """Sort order by the `created_at` timestamp of the objects.
+
+    `asc` for ascending order and `desc` for descending order.
+    """
+
     purpose: str
     """Only return files with the given purpose."""
diff --git a/src/openai/types/file_object.py b/src/openai/types/file_object.py
index 6e2bf310a4..1d65e6987d 100644
--- a/src/openai/types/file_object.py
+++ b/src/openai/types/file_object.py
@@ -40,6 +40,9 @@ class FileObject(BaseModel):
     `error`.
     """
 
+    expires_at: Optional[int] = None
+    """The Unix timestamp (in seconds) for when the file will expire."""
+
     status_details: Optional[str] = None
     """Deprecated.
 
diff --git a/src/openai/types/file_purpose.py b/src/openai/types/file_purpose.py
new file mode 100644
index 0000000000..b2c2d5f9fc
--- /dev/null
+++ b/src/openai/types/file_purpose.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["FilePurpose"]
+
+FilePurpose: TypeAlias = Literal["assistants", "batch", "fine-tune", "vision", "user_data", "evals"]
diff --git a/src/openai/types/fine_tuning/fine_tuning_job.py b/src/openai/types/fine_tuning/fine_tuning_job.py
index 7ac8792787..c7fff2b7b1 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job.py
@@ -4,9 +4,19 @@
 from typing_extensions import Literal
 
 from ..._models import BaseModel
+from ..shared.metadata import Metadata
 from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
 
-__all__ = ["FineTuningJob", "Error", "Hyperparameters"]
+__all__ = [
+    "FineTuningJob",
+    "Error",
+    "Hyperparameters",
+    "Method",
+    "MethodDpo",
+    "MethodDpoHyperparameters",
+    "MethodSupervised",
+    "MethodSupervisedHyperparameters",
+]
 
 
 class Error(BaseModel):
@@ -24,15 +34,96 @@ class Error(BaseModel):
 
 
 class Hyperparameters(BaseModel):
-    n_epochs: Union[Literal["auto"], int]
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodDpoHyperparameters(BaseModel):
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    beta: Union[Literal["auto"], float, None] = None
+    """The beta value for the DPO method.
+
+    A higher beta value will increase the weight of the penalty between the policy
+    and reference model.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodDpo(BaseModel):
+    hyperparameters: Optional[MethodDpoHyperparameters] = None
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class MethodSupervisedHyperparameters(BaseModel):
+    batch_size: Union[Literal["auto"], int, None] = None
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float, None] = None
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int, None] = None
     """The number of epochs to train the model for.
 
-    An epoch refers to one full cycle through the training dataset. "auto" decides
-    the optimal number of epochs based on the size of the dataset. If setting the
-    number manually, we support any number between 1 and 50 epochs.
+    An epoch refers to one full cycle through the training dataset.
     """
 
 
+class MethodSupervised(BaseModel):
+    hyperparameters: Optional[MethodSupervisedHyperparameters] = None
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class Method(BaseModel):
+    dpo: Optional[MethodDpo] = None
+    """Configuration for the DPO fine-tuning method."""
+
+    supervised: Optional[MethodSupervised] = None
+    """Configuration for the supervised fine-tuning method."""
+
+    type: Optional[Literal["supervised", "dpo"]] = None
+    """The type of method. Is either `supervised` or `dpo`."""
+
+
 class FineTuningJob(BaseModel):
     id: str
     """The object identifier, which can be referenced in the API endpoints."""
@@ -61,8 +152,7 @@ class FineTuningJob(BaseModel):
     hyperparameters: Hyperparameters
     """The hyperparameters used for the fine-tuning job.
 
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-    for more details.
+    This value will only be returned when running `supervised` jobs.
     """
 
     model: str
@@ -118,3 +208,16 @@ class FineTuningJob(BaseModel):
 
     integrations: Optional[List[FineTuningJobWandbIntegrationObject]] = None
     """A list of integrations to enable for this fine-tuning job."""
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    method: Optional[Method] = None
+    """The method used for fine-tuning."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_event.py b/src/openai/types/fine_tuning/fine_tuning_job_event.py
index 2d204bb980..1d728bd765 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job_event.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job_event.py
@@ -1,5 +1,7 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+import builtins
+from typing import Optional
 from typing_extensions import Literal
 
 from ..._models import BaseModel
@@ -9,11 +11,22 @@
 
 class FineTuningJobEvent(BaseModel):
     id: str
+    """The object identifier."""
 
     created_at: int
+    """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
 
     level: Literal["info", "warn", "error"]
+    """The log level of the event."""
 
     message: str
+    """The message of the event."""
 
     object: Literal["fine_tuning.job.event"]
+    """The object type, which is always "fine_tuning.job.event"."""
+
+    data: Optional[builtins.object] = None
+    """The data associated with the event."""
+
+    type: Optional[Literal["message", "metrics"]] = None
+    """The type of event."""
diff --git a/src/openai/types/fine_tuning/fine_tuning_job_integration.py b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
index 8076313cae..8ac55a0b44 100644
--- a/src/openai/types/fine_tuning/fine_tuning_job_integration.py
+++ b/src/openai/types/fine_tuning/fine_tuning_job_integration.py
@@ -1,7 +1,8 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from .fine_tuning_job_wandb_integration_object import FineTuningJobWandbIntegrationObject
 
+__all__ = ["FineTuningJobIntegration"]
+
 FineTuningJobIntegration = FineTuningJobWandbIntegrationObject
diff --git a/src/openai/types/fine_tuning/job_create_params.py b/src/openai/types/fine_tuning/job_create_params.py
index 1925f90d12..f4cf980b08 100644
--- a/src/openai/types/fine_tuning/job_create_params.py
+++ b/src/openai/types/fine_tuning/job_create_params.py
@@ -5,15 +5,27 @@
 from typing import List, Union, Iterable, Optional
 from typing_extensions import Literal, Required, TypedDict
 
-__all__ = ["JobCreateParams", "Hyperparameters", "Integration", "IntegrationWandb"]
+from ..shared_params.metadata import Metadata
+
+__all__ = [
+    "JobCreateParams",
+    "Hyperparameters",
+    "Integration",
+    "IntegrationWandb",
+    "Method",
+    "MethodDpo",
+    "MethodDpoHyperparameters",
+    "MethodSupervised",
+    "MethodSupervisedHyperparameters",
+]
 
 
 class JobCreateParams(TypedDict, total=False):
-    model: Required[Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]]]
+    model: Required[Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo", "gpt-4o-mini"]]]
     """The name of the model to fine-tune.
 
     You can select one of the
-    [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
+    [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
     """
 
     training_file: Required[str]
@@ -25,16 +37,39 @@ class JobCreateParams(TypedDict, total=False):
     Your dataset must be formatted as a JSONL file. Additionally, you must upload
     your file with the purpose `fine-tune`.
 
+    The contents of the file should differ depending on if the model uses the
+    [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input),
+    [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input)
+    format, or if the fine-tuning method uses the
+    [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input)
+    format.
+
     See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
     for more details.
     """
 
     hyperparameters: Hyperparameters
-    """The hyperparameters used for the fine-tuning job."""
+    """
+    The hyperparameters used for the fine-tuning job. This value is now deprecated
+    in favor of `method`, and should be passed in under the `method` parameter.
+    """
 
     integrations: Optional[Iterable[Integration]]
     """A list of integrations to enable for your fine-tuning job."""
 
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    method: Method
+    """The method used for fine-tuning."""
+
     seed: Optional[int]
     """The seed controls the reproducibility of the job.
 
@@ -45,11 +80,11 @@ class JobCreateParams(TypedDict, total=False):
 
     suffix: Optional[str]
     """
-    A string of up to 18 characters that will be added to your fine-tuned model
+    A string of up to 64 characters that will be added to your fine-tuned model
     name.
 
     For example, a `suffix` of "custom-model-name" would produce a model name like
-    `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
+    `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
     """
 
     validation_file: Optional[str]
@@ -129,3 +164,73 @@ class Integration(TypedDict, total=False):
     can set an explicit display name for your run, add tags to your run, and set a
     default entity (team, username, etc) to be associated with your run.
     """
+
+
+class MethodDpoHyperparameters(TypedDict, total=False):
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    beta: Union[Literal["auto"], float]
+    """The beta value for the DPO method.
+
+    A higher beta value will increase the weight of the penalty between the policy
+    and reference model.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodDpo(TypedDict, total=False):
+    hyperparameters: MethodDpoHyperparameters
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class MethodSupervisedHyperparameters(TypedDict, total=False):
+    batch_size: Union[Literal["auto"], int]
+    """Number of examples in each batch.
+
+    A larger batch size means that model parameters are updated less frequently, but
+    with lower variance.
+    """
+
+    learning_rate_multiplier: Union[Literal["auto"], float]
+    """Scaling factor for the learning rate.
+
+    A smaller learning rate may be useful to avoid overfitting.
+    """
+
+    n_epochs: Union[Literal["auto"], int]
+    """The number of epochs to train the model for.
+
+    An epoch refers to one full cycle through the training dataset.
+    """
+
+
+class MethodSupervised(TypedDict, total=False):
+    hyperparameters: MethodSupervisedHyperparameters
+    """The hyperparameters used for the fine-tuning job."""
+
+
+class Method(TypedDict, total=False):
+    dpo: MethodDpo
+    """Configuration for the DPO fine-tuning method."""
+
+    supervised: MethodSupervised
+    """Configuration for the supervised fine-tuning method."""
+
+    type: Literal["supervised", "dpo"]
+    """The type of method. Is either `supervised` or `dpo`."""
diff --git a/src/openai/types/fine_tuning/job_list_params.py b/src/openai/types/fine_tuning/job_list_params.py
index 5c075ca33f..b79f3ce86a 100644
--- a/src/openai/types/fine_tuning/job_list_params.py
+++ b/src/openai/types/fine_tuning/job_list_params.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+from typing import Dict, Optional
 from typing_extensions import TypedDict
 
 __all__ = ["JobListParams"]
@@ -13,3 +14,10 @@ class JobListParams(TypedDict, total=False):
 
     limit: int
     """Number of fine-tuning jobs to retrieve."""
+
+    metadata: Optional[Dict[str, str]]
+    """Optional metadata filter.
+
+    To filter, use the syntax `metadata[k]=v`. Alternatively, set `metadata=null` to
+    indicate no metadata.
+    """
diff --git a/src/openai/types/image_create_variation_params.py b/src/openai/types/image_create_variation_params.py
index 2549307372..d20f672912 100644
--- a/src/openai/types/image_create_variation_params.py
+++ b/src/openai/types/image_create_variation_params.py
@@ -6,6 +6,7 @@
 from typing_extensions import Literal, Required, TypedDict
 
 from .._types import FileTypes
+from .image_model import ImageModel
 
 __all__ = ["ImageCreateVariationParams"]
 
@@ -17,7 +18,7 @@ class ImageCreateVariationParams(TypedDict, total=False):
     Must be a valid PNG file, less than 4MB, and square.
     """
 
-    model: Union[str, Literal["dall-e-2"], None]
+    model: Union[str, ImageModel, None]
     """The model to use for image generation.
 
     Only `dall-e-2` is supported at this time.
@@ -46,5 +47,5 @@ class ImageCreateVariationParams(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
diff --git a/src/openai/types/image_edit_params.py b/src/openai/types/image_edit_params.py
index 073456e349..1cb10611f3 100644
--- a/src/openai/types/image_edit_params.py
+++ b/src/openai/types/image_edit_params.py
@@ -6,6 +6,7 @@
 from typing_extensions import Literal, Required, TypedDict
 
 from .._types import FileTypes
+from .image_model import ImageModel
 
 __all__ = ["ImageEditParams"]
 
@@ -31,7 +32,7 @@ class ImageEditParams(TypedDict, total=False):
     PNG file, less than 4MB, and have the same dimensions as `image`.
     """
 
-    model: Union[str, Literal["dall-e-2"], None]
+    model: Union[str, ImageModel, None]
     """The model to use for image generation.
 
     Only `dall-e-2` is supported at this time.
@@ -57,5 +58,5 @@ class ImageEditParams(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
diff --git a/src/openai/types/image_generate_params.py b/src/openai/types/image_generate_params.py
index 18c56f8ed6..c88c45f518 100644
--- a/src/openai/types/image_generate_params.py
+++ b/src/openai/types/image_generate_params.py
@@ -5,6 +5,8 @@
 from typing import Union, Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .image_model import ImageModel
+
 __all__ = ["ImageGenerateParams"]
 
 
@@ -16,7 +18,7 @@ class ImageGenerateParams(TypedDict, total=False):
     `dall-e-3`.
     """
 
-    model: Union[str, Literal["dall-e-2", "dall-e-3"], None]
+    model: Union[str, ImageModel, None]
     """The model to use for image generation."""
 
     n: Optional[int]
@@ -59,5 +61,5 @@ class ImageGenerateParams(TypedDict, total=False):
     """
     A unique identifier representing your end-user, which can help OpenAI to monitor
     and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
     """
diff --git a/src/openai/types/image_model.py b/src/openai/types/image_model.py
new file mode 100644
index 0000000000..1672369bea
--- /dev/null
+++ b/src/openai/types/image_model.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ImageModel"]
+
+ImageModel: TypeAlias = Literal["dall-e-2", "dall-e-3"]
diff --git a/src/openai/types/model_deleted.py b/src/openai/types/model_deleted.py
index d9a48bb1b5..7f81e1b380 100644
--- a/src/openai/types/model_deleted.py
+++ b/src/openai/types/model_deleted.py
@@ -1,7 +1,6 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 
-
 from .._models import BaseModel
 
 __all__ = ["ModelDeleted"]
diff --git a/src/openai/types/moderation.py b/src/openai/types/moderation.py
index 5aa691823a..608f562218 100644
--- a/src/openai/types/moderation.py
+++ b/src/openai/types/moderation.py
@@ -1,11 +1,13 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from typing import List, Optional
+from typing_extensions import Literal
 
 from pydantic import Field as FieldInfo
 
 from .._models import BaseModel
 
-__all__ = ["Moderation", "Categories", "CategoryScores"]
+__all__ = ["Moderation", "Categories", "CategoryAppliedInputTypes", "CategoryScores"]
 
 
 class Categories(BaseModel):
@@ -36,6 +38,20 @@ class Categories(BaseModel):
     orientation, disability status, or caste.
     """
 
+    illicit: Optional[bool] = None
+    """
+    Content that includes instructions or advice that facilitate the planning or
+    execution of wrongdoing, or that gives advice or instruction on how to commit
+    illicit acts. For example, "how to shoplift" would fit this category.
+    """
+
+    illicit_violent: Optional[bool] = FieldInfo(alias="illicit/violent", default=None)
+    """
+    Content that includes instructions or advice that facilitate the planning or
+    execution of wrongdoing that also includes violence, or that gives advice or
+    instruction on the procurement of any weapon.
+    """
+
     self_harm: bool = FieldInfo(alias="self-harm")
     """
     Content that promotes, encourages, or depicts acts of self-harm, such as
@@ -72,6 +88,47 @@ class Categories(BaseModel):
     """Content that depicts death, violence, or physical injury in graphic detail."""
 
 
+class CategoryAppliedInputTypes(BaseModel):
+    harassment: List[Literal["text"]]
+    """The applied input type(s) for the category 'harassment'."""
+
+    harassment_threatening: List[Literal["text"]] = FieldInfo(alias="harassment/threatening")
+    """The applied input type(s) for the category 'harassment/threatening'."""
+
+    hate: List[Literal["text"]]
+    """The applied input type(s) for the category 'hate'."""
+
+    hate_threatening: List[Literal["text"]] = FieldInfo(alias="hate/threatening")
+    """The applied input type(s) for the category 'hate/threatening'."""
+
+    illicit: List[Literal["text"]]
+    """The applied input type(s) for the category 'illicit'."""
+
+    illicit_violent: List[Literal["text"]] = FieldInfo(alias="illicit/violent")
+    """The applied input type(s) for the category 'illicit/violent'."""
+
+    self_harm: List[Literal["text", "image"]] = FieldInfo(alias="self-harm")
+    """The applied input type(s) for the category 'self-harm'."""
+
+    self_harm_instructions: List[Literal["text", "image"]] = FieldInfo(alias="self-harm/instructions")
+    """The applied input type(s) for the category 'self-harm/instructions'."""
+
+    self_harm_intent: List[Literal["text", "image"]] = FieldInfo(alias="self-harm/intent")
+    """The applied input type(s) for the category 'self-harm/intent'."""
+
+    sexual: List[Literal["text", "image"]]
+    """The applied input type(s) for the category 'sexual'."""
+
+    sexual_minors: List[Literal["text"]] = FieldInfo(alias="sexual/minors")
+    """The applied input type(s) for the category 'sexual/minors'."""
+
+    violence: List[Literal["text", "image"]]
+    """The applied input type(s) for the category 'violence'."""
+
+    violence_graphic: List[Literal["text", "image"]] = FieldInfo(alias="violence/graphic")
+    """The applied input type(s) for the category 'violence/graphic'."""
+
+
 class CategoryScores(BaseModel):
     harassment: float
     """The score for the category 'harassment'."""
@@ -85,6 +142,12 @@ class CategoryScores(BaseModel):
     hate_threatening: float = FieldInfo(alias="hate/threatening")
     """The score for the category 'hate/threatening'."""
 
+    illicit: float
+    """The score for the category 'illicit'."""
+
+    illicit_violent: float = FieldInfo(alias="illicit/violent")
+    """The score for the category 'illicit/violent'."""
+
     self_harm: float = FieldInfo(alias="self-harm")
     """The score for the category 'self-harm'."""
 
@@ -111,6 +174,11 @@ class Moderation(BaseModel):
     categories: Categories
     """A list of the categories, and whether they are flagged or not."""
 
+    category_applied_input_types: CategoryAppliedInputTypes
+    """
+    A list of the categories along with the input type(s) that the score applies to.
+    """
+
     category_scores: CategoryScores
     """A list of the categories along with their scores as predicted by model."""
 
diff --git a/src/openai/types/moderation_create_params.py b/src/openai/types/moderation_create_params.py
index d4608def54..3ea2f3cd88 100644
--- a/src/openai/types/moderation_create_params.py
+++ b/src/openai/types/moderation_create_params.py
@@ -2,24 +2,28 @@
 
 from __future__ import annotations
 
-from typing import List, Union
-from typing_extensions import Literal, Required, TypedDict
+from typing import List, Union, Iterable
+from typing_extensions import Required, TypedDict
+
+from .moderation_model import ModerationModel
+from .moderation_multi_modal_input_param import ModerationMultiModalInputParam
 
 __all__ = ["ModerationCreateParams"]
 
 
 class ModerationCreateParams(TypedDict, total=False):
-    input: Required[Union[str, List[str]]]
-    """The input text to classify"""
+    input: Required[Union[str, List[str], Iterable[ModerationMultiModalInputParam]]]
+    """Input (or inputs) to classify.
 
-    model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]]
+    Can be a single string, an array of strings, or an array of multi-modal input
+    objects similar to other models.
     """
-    Two content moderations models are available: `text-moderation-stable` and
-    `text-moderation-latest`.
-
-    The default is `text-moderation-latest` which will be automatically upgraded
-    over time. This ensures you are always using our most accurate model. If you use
-    `text-moderation-stable`, we will provide advanced notice before updating the
-    model. Accuracy of `text-moderation-stable` may be slightly lower than for
-    `text-moderation-latest`.
+
+    model: Union[str, ModerationModel]
+    """The content moderation model you would like to use.
+
+    Learn more in
+    [the moderation guide](https://platform.openai.com/docs/guides/moderation), and
+    learn about available models
+    [here](https://platform.openai.com/docs/models#moderation).
     """
diff --git a/src/openai/types/moderation_image_url_input_param.py b/src/openai/types/moderation_image_url_input_param.py
new file mode 100644
index 0000000000..9a69a6a257
--- /dev/null
+++ b/src/openai/types/moderation_image_url_input_param.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ModerationImageURLInputParam", "ImageURL"]
+
+
+class ImageURL(TypedDict, total=False):
+    url: Required[str]
+    """Either a URL of the image or the base64 encoded image data."""
+
+
+class ModerationImageURLInputParam(TypedDict, total=False):
+    image_url: Required[ImageURL]
+    """Contains either an image URL or a data URL for a base64 encoded image."""
+
+    type: Required[Literal["image_url"]]
+    """Always `image_url`."""
diff --git a/src/openai/types/moderation_model.py b/src/openai/types/moderation_model.py
new file mode 100644
index 0000000000..64954c4547
--- /dev/null
+++ b/src/openai/types/moderation_model.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ModerationModel"]
+
+ModerationModel: TypeAlias = Literal[
+    "omni-moderation-latest", "omni-moderation-2024-09-26", "text-moderation-latest", "text-moderation-stable"
+]
diff --git a/src/openai/types/moderation_multi_modal_input_param.py b/src/openai/types/moderation_multi_modal_input_param.py
new file mode 100644
index 0000000000..4314e7b031
--- /dev/null
+++ b/src/openai/types/moderation_multi_modal_input_param.py
@@ -0,0 +1,13 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .moderation_text_input_param import ModerationTextInputParam
+from .moderation_image_url_input_param import ModerationImageURLInputParam
+
+__all__ = ["ModerationMultiModalInputParam"]
+
+ModerationMultiModalInputParam: TypeAlias = Union[ModerationImageURLInputParam, ModerationTextInputParam]
diff --git a/src/openai/types/moderation_text_input_param.py b/src/openai/types/moderation_text_input_param.py
new file mode 100644
index 0000000000..e5da53337b
--- /dev/null
+++ b/src/openai/types/moderation_text_input_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ModerationTextInputParam"]
+
+
+class ModerationTextInputParam(TypedDict, total=False):
+    text: Required[str]
+    """A string of text to classify."""
+
+    type: Required[Literal["text"]]
+    """Always `text`."""
diff --git a/src/openai/types/other_file_chunking_strategy_object.py b/src/openai/types/other_file_chunking_strategy_object.py
new file mode 100644
index 0000000000..e4cd61a8fc
--- /dev/null
+++ b/src/openai/types/other_file_chunking_strategy_object.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["OtherFileChunkingStrategyObject"]
+
+
+class OtherFileChunkingStrategyObject(BaseModel):
+    type: Literal["other"]
+    """Always `other`."""
diff --git a/src/openai/types/responses/__init__.py b/src/openai/types/responses/__init__.py
new file mode 100644
index 0000000000..db7ecabfcf
--- /dev/null
+++ b/src/openai/types/responses/__init__.py
@@ -0,0 +1,132 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .tool import Tool as Tool
+from .response import Response as Response
+from .tool_param import ToolParam as ToolParam
+from .computer_tool import ComputerTool as ComputerTool
+from .function_tool import FunctionTool as FunctionTool
+from .response_error import ResponseError as ResponseError
+from .response_usage import ResponseUsage as ResponseUsage
+from .response_status import ResponseStatus as ResponseStatus
+from .web_search_tool import WebSearchTool as WebSearchTool
+from .file_search_tool import FileSearchTool as FileSearchTool
+from .tool_choice_types import ToolChoiceTypes as ToolChoiceTypes
+from .response_item_list import ResponseItemList as ResponseItemList
+from .computer_tool_param import ComputerToolParam as ComputerToolParam
+from .function_tool_param import FunctionToolParam as FunctionToolParam
+from .response_includable import ResponseIncludable as ResponseIncludable
+from .response_input_file import ResponseInputFile as ResponseInputFile
+from .response_input_text import ResponseInputText as ResponseInputText
+from .tool_choice_options import ToolChoiceOptions as ToolChoiceOptions
+from .response_error_event import ResponseErrorEvent as ResponseErrorEvent
+from .response_input_image import ResponseInputImage as ResponseInputImage
+from .response_input_param import ResponseInputParam as ResponseInputParam
+from .response_output_item import ResponseOutputItem as ResponseOutputItem
+from .response_output_text import ResponseOutputText as ResponseOutputText
+from .response_text_config import ResponseTextConfig as ResponseTextConfig
+from .tool_choice_function import ToolChoiceFunction as ToolChoiceFunction
+from .response_failed_event import ResponseFailedEvent as ResponseFailedEvent
+from .response_stream_event import ResponseStreamEvent as ResponseStreamEvent
+from .web_search_tool_param import WebSearchToolParam as WebSearchToolParam
+from .file_search_tool_param import FileSearchToolParam as FileSearchToolParam
+from .input_item_list_params import InputItemListParams as InputItemListParams
+from .response_create_params import ResponseCreateParams as ResponseCreateParams
+from .response_created_event import ResponseCreatedEvent as ResponseCreatedEvent
+from .response_input_content import ResponseInputContent as ResponseInputContent
+from .response_output_message import ResponseOutputMessage as ResponseOutputMessage
+from .response_output_refusal import ResponseOutputRefusal as ResponseOutputRefusal
+from .response_reasoning_item import ResponseReasoningItem as ResponseReasoningItem
+from .tool_choice_types_param import ToolChoiceTypesParam as ToolChoiceTypesParam
+from .easy_input_message_param import EasyInputMessageParam as EasyInputMessageParam
+from .response_completed_event import ResponseCompletedEvent as ResponseCompletedEvent
+from .response_retrieve_params import ResponseRetrieveParams as ResponseRetrieveParams
+from .response_text_done_event import ResponseTextDoneEvent as ResponseTextDoneEvent
+from .response_audio_done_event import ResponseAudioDoneEvent as ResponseAudioDoneEvent
+from .response_incomplete_event import ResponseIncompleteEvent as ResponseIncompleteEvent
+from .response_input_file_param import ResponseInputFileParam as ResponseInputFileParam
+from .response_input_item_param import ResponseInputItemParam as ResponseInputItemParam
+from .response_input_text_param import ResponseInputTextParam as ResponseInputTextParam
+from .response_text_delta_event import ResponseTextDeltaEvent as ResponseTextDeltaEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent as ResponseAudioDeltaEvent
+from .response_in_progress_event import ResponseInProgressEvent as ResponseInProgressEvent
+from .response_input_image_param import ResponseInputImageParam as ResponseInputImageParam
+from .response_output_text_param import ResponseOutputTextParam as ResponseOutputTextParam
+from .response_text_config_param import ResponseTextConfigParam as ResponseTextConfigParam
+from .tool_choice_function_param import ToolChoiceFunctionParam as ToolChoiceFunctionParam
+from .response_computer_tool_call import ResponseComputerToolCall as ResponseComputerToolCall
+from .response_format_text_config import ResponseFormatTextConfig as ResponseFormatTextConfig
+from .response_function_tool_call import ResponseFunctionToolCall as ResponseFunctionToolCall
+from .response_refusal_done_event import ResponseRefusalDoneEvent as ResponseRefusalDoneEvent
+from .response_function_web_search import ResponseFunctionWebSearch as ResponseFunctionWebSearch
+from .response_input_content_param import ResponseInputContentParam as ResponseInputContentParam
+from .response_refusal_delta_event import ResponseRefusalDeltaEvent as ResponseRefusalDeltaEvent
+from .response_output_message_param import ResponseOutputMessageParam as ResponseOutputMessageParam
+from .response_output_refusal_param import ResponseOutputRefusalParam as ResponseOutputRefusalParam
+from .response_reasoning_item_param import ResponseReasoningItemParam as ResponseReasoningItemParam
+from .response_file_search_tool_call import ResponseFileSearchToolCall as ResponseFileSearchToolCall
+from .response_output_item_done_event import ResponseOutputItemDoneEvent as ResponseOutputItemDoneEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent as ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent as ResponseOutputItemAddedEvent
+from .response_computer_tool_call_param import ResponseComputerToolCallParam as ResponseComputerToolCallParam
+from .response_content_part_added_event import ResponseContentPartAddedEvent as ResponseContentPartAddedEvent
+from .response_format_text_config_param import ResponseFormatTextConfigParam as ResponseFormatTextConfigParam
+from .response_function_tool_call_param import ResponseFunctionToolCallParam as ResponseFunctionToolCallParam
+from .response_function_web_search_param import ResponseFunctionWebSearchParam as ResponseFunctionWebSearchParam
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall as ResponseCodeInterpreterToolCall
+from .response_input_message_content_list import ResponseInputMessageContentList as ResponseInputMessageContentList
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent as ResponseAudioTranscriptDoneEvent
+from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam as ResponseFileSearchToolCallParam
+from .response_text_annotation_delta_event import ResponseTextAnnotationDeltaEvent as ResponseTextAnnotationDeltaEvent
+from .response_audio_transcript_delta_event import (
+    ResponseAudioTranscriptDeltaEvent as ResponseAudioTranscriptDeltaEvent,
+)
+from .response_format_text_json_schema_config import (
+    ResponseFormatTextJSONSchemaConfig as ResponseFormatTextJSONSchemaConfig,
+)
+from .response_web_search_call_completed_event import (
+    ResponseWebSearchCallCompletedEvent as ResponseWebSearchCallCompletedEvent,
+)
+from .response_web_search_call_searching_event import (
+    ResponseWebSearchCallSearchingEvent as ResponseWebSearchCallSearchingEvent,
+)
+from .response_file_search_call_completed_event import (
+    ResponseFileSearchCallCompletedEvent as ResponseFileSearchCallCompletedEvent,
+)
+from .response_file_search_call_searching_event import (
+    ResponseFileSearchCallSearchingEvent as ResponseFileSearchCallSearchingEvent,
+)
+from .response_input_message_content_list_param import (
+    ResponseInputMessageContentListParam as ResponseInputMessageContentListParam,
+)
+from .response_web_search_call_in_progress_event import (
+    ResponseWebSearchCallInProgressEvent as ResponseWebSearchCallInProgressEvent,
+)
+from .response_file_search_call_in_progress_event import (
+    ResponseFileSearchCallInProgressEvent as ResponseFileSearchCallInProgressEvent,
+)
+from .response_function_call_arguments_done_event import (
+    ResponseFunctionCallArgumentsDoneEvent as ResponseFunctionCallArgumentsDoneEvent,
+)
+from .response_function_call_arguments_delta_event import (
+    ResponseFunctionCallArgumentsDeltaEvent as ResponseFunctionCallArgumentsDeltaEvent,
+)
+from .response_format_text_json_schema_config_param import (
+    ResponseFormatTextJSONSchemaConfigParam as ResponseFormatTextJSONSchemaConfigParam,
+)
+from .response_code_interpreter_call_code_done_event import (
+    ResponseCodeInterpreterCallCodeDoneEvent as ResponseCodeInterpreterCallCodeDoneEvent,
+)
+from .response_code_interpreter_call_completed_event import (
+    ResponseCodeInterpreterCallCompletedEvent as ResponseCodeInterpreterCallCompletedEvent,
+)
+from .response_code_interpreter_call_code_delta_event import (
+    ResponseCodeInterpreterCallCodeDeltaEvent as ResponseCodeInterpreterCallCodeDeltaEvent,
+)
+from .response_code_interpreter_call_in_progress_event import (
+    ResponseCodeInterpreterCallInProgressEvent as ResponseCodeInterpreterCallInProgressEvent,
+)
+from .response_code_interpreter_call_interpreting_event import (
+    ResponseCodeInterpreterCallInterpretingEvent as ResponseCodeInterpreterCallInterpretingEvent,
+)
diff --git a/src/openai/types/responses/computer_tool.py b/src/openai/types/responses/computer_tool.py
new file mode 100644
index 0000000000..dffb7af7b7
--- /dev/null
+++ b/src/openai/types/responses/computer_tool.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ComputerTool"]
+
+
+class ComputerTool(BaseModel):
+    display_height: float
+    """The height of the computer display."""
+
+    display_width: float
+    """The width of the computer display."""
+
+    environment: Literal["mac", "windows", "ubuntu", "browser"]
+    """The type of computer environment to control."""
+
+    type: Literal["computer_use_preview"]
+    """The type of the computer use tool. Always `computer_use_preview`."""
diff --git a/src/openai/types/responses/computer_tool_param.py b/src/openai/types/responses/computer_tool_param.py
new file mode 100644
index 0000000000..6b1072ffd2
--- /dev/null
+++ b/src/openai/types/responses/computer_tool_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ComputerToolParam"]
+
+
+class ComputerToolParam(TypedDict, total=False):
+    display_height: Required[float]
+    """The height of the computer display."""
+
+    display_width: Required[float]
+    """The width of the computer display."""
+
+    environment: Required[Literal["mac", "windows", "ubuntu", "browser"]]
+    """The type of computer environment to control."""
+
+    type: Required[Literal["computer_use_preview"]]
+    """The type of the computer use tool. Always `computer_use_preview`."""
diff --git a/src/openai/types/responses/easy_input_message_param.py b/src/openai/types/responses/easy_input_message_param.py
new file mode 100644
index 0000000000..ef2f1c5f37
--- /dev/null
+++ b/src/openai/types/responses/easy_input_message_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypedDict
+
+from .response_input_message_content_list_param import ResponseInputMessageContentListParam
+
+__all__ = ["EasyInputMessageParam"]
+
+
+class EasyInputMessageParam(TypedDict, total=False):
+    content: Required[Union[str, ResponseInputMessageContentListParam]]
+    """
+    Text, image, or audio input to the model, used to generate a response. Can also
+    contain previous assistant responses.
+    """
+
+    role: Required[Literal["user", "assistant", "system", "developer"]]
+    """The role of the message input.
+
+    One of `user`, `assistant`, `system`, or `developer`.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always `message`."""
diff --git a/src/openai/types/responses/file_search_tool.py b/src/openai/types/responses/file_search_tool.py
new file mode 100644
index 0000000000..683fc533fe
--- /dev/null
+++ b/src/openai/types/responses/file_search_tool.py
@@ -0,0 +1,44 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from ..shared.compound_filter import CompoundFilter
+from ..shared.comparison_filter import ComparisonFilter
+
+__all__ = ["FileSearchTool", "Filters", "RankingOptions"]
+
+Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter]
+
+
+class RankingOptions(BaseModel):
+    ranker: Optional[Literal["auto", "default-2024-11-15"]] = None
+    """The ranker to use for the file search."""
+
+    score_threshold: Optional[float] = None
+    """
+    The score threshold for the file search, a number between 0 and 1. Numbers
+    closer to 1 will attempt to return only the most relevant results, but may
+    return fewer results.
+    """
+
+
+class FileSearchTool(BaseModel):
+    type: Literal["file_search"]
+    """The type of the file search tool. Always `file_search`."""
+
+    vector_store_ids: List[str]
+    """The IDs of the vector stores to search."""
+
+    filters: Optional[Filters] = None
+    """A filter to apply based on file attributes."""
+
+    max_num_results: Optional[int] = None
+    """The maximum number of results to return.
+
+    This number should be between 1 and 50 inclusive.
+    """
+
+    ranking_options: Optional[RankingOptions] = None
+    """Ranking options for search."""
diff --git a/src/openai/types/responses/file_search_tool_param.py b/src/openai/types/responses/file_search_tool_param.py
new file mode 100644
index 0000000000..2d6af8536b
--- /dev/null
+++ b/src/openai/types/responses/file_search_tool_param.py
@@ -0,0 +1,45 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from ..shared_params.compound_filter import CompoundFilter
+from ..shared_params.comparison_filter import ComparisonFilter
+
+__all__ = ["FileSearchToolParam", "Filters", "RankingOptions"]
+
+Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter]
+
+
+class RankingOptions(TypedDict, total=False):
+    ranker: Literal["auto", "default-2024-11-15"]
+    """The ranker to use for the file search."""
+
+    score_threshold: float
+    """
+    The score threshold for the file search, a number between 0 and 1. Numbers
+    closer to 1 will attempt to return only the most relevant results, but may
+    return fewer results.
+    """
+
+
+class FileSearchToolParam(TypedDict, total=False):
+    type: Required[Literal["file_search"]]
+    """The type of the file search tool. Always `file_search`."""
+
+    vector_store_ids: Required[List[str]]
+    """The IDs of the vector stores to search."""
+
+    filters: Filters
+    """A filter to apply based on file attributes."""
+
+    max_num_results: int
+    """The maximum number of results to return.
+
+    This number should be between 1 and 50 inclusive.
+    """
+
+    ranking_options: RankingOptions
+    """Ranking options for search."""
diff --git a/src/openai/types/responses/function_tool.py b/src/openai/types/responses/function_tool.py
new file mode 100644
index 0000000000..236a2c7c63
--- /dev/null
+++ b/src/openai/types/responses/function_tool.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["FunctionTool"]
+
+
+class FunctionTool(BaseModel):
+    name: str
+    """The name of the function to call."""
+
+    parameters: Dict[str, object]
+    """A JSON schema object describing the parameters of the function."""
+
+    strict: bool
+    """Whether to enforce strict parameter validation. Default `true`."""
+
+    type: Literal["function"]
+    """The type of the function tool. Always `function`."""
+
+    description: Optional[str] = None
+    """A description of the function.
+
+    Used by the model to determine whether or not to call the function.
+    """
diff --git a/src/openai/types/responses/function_tool_param.py b/src/openai/types/responses/function_tool_param.py
new file mode 100644
index 0000000000..774a22e336
--- /dev/null
+++ b/src/openai/types/responses/function_tool_param.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["FunctionToolParam"]
+
+
+class FunctionToolParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
+
+    parameters: Required[Dict[str, object]]
+    """A JSON schema object describing the parameters of the function."""
+
+    strict: Required[bool]
+    """Whether to enforce strict parameter validation. Default `true`."""
+
+    type: Required[Literal["function"]]
+    """The type of the function tool. Always `function`."""
+
+    description: Optional[str]
+    """A description of the function.
+
+    Used by the model to determine whether or not to call the function.
+    """
diff --git a/src/openai/types/responses/input_item_list_params.py b/src/openai/types/responses/input_item_list_params.py
new file mode 100644
index 0000000000..e0b71f1ac5
--- /dev/null
+++ b/src/openai/types/responses/input_item_list_params.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypedDict
+
+__all__ = ["InputItemListParams"]
+
+
+class InputItemListParams(TypedDict, total=False):
+    after: str
+    """An item ID to list items after, used in pagination."""
+
+    before: str
+    """An item ID to list items before, used in pagination."""
+
+    limit: int
+    """A limit on the number of objects to be returned.
+
+    Limit can range between 1 and 100, and the default is 20.
+    """
+
+    order: Literal["asc", "desc"]
+    """The order to return the input items in. Default is `asc`.
+
+    - `asc`: Return the input items in ascending order.
+    - `desc`: Return the input items in descending order.
+    """
diff --git a/src/openai/types/responses/response.py b/src/openai/types/responses/response.py
new file mode 100644
index 0000000000..ec1b199f64
--- /dev/null
+++ b/src/openai/types/responses/response.py
@@ -0,0 +1,188 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, TypeAlias
+
+from .tool import Tool
+from ..._models import BaseModel
+from .response_error import ResponseError
+from .response_usage import ResponseUsage
+from .response_status import ResponseStatus
+from ..shared.metadata import Metadata
+from ..shared.reasoning import Reasoning
+from .tool_choice_types import ToolChoiceTypes
+from ..shared.chat_model import ChatModel
+from .tool_choice_options import ToolChoiceOptions
+from .response_output_item import ResponseOutputItem
+from .response_text_config import ResponseTextConfig
+from .tool_choice_function import ToolChoiceFunction
+
+__all__ = ["Response", "IncompleteDetails", "ToolChoice"]
+
+
+class IncompleteDetails(BaseModel):
+    reason: Optional[Literal["max_output_tokens", "content_filter"]] = None
+    """The reason why the response is incomplete."""
+
+
+ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypes, ToolChoiceFunction]
+
+
+class Response(BaseModel):
+    id: str
+    """Unique identifier for this Response."""
+
+    created_at: float
+    """Unix timestamp (in seconds) of when this Response was created."""
+
+    error: Optional[ResponseError] = None
+    """An error object returned when the model fails to generate a Response."""
+
+    incomplete_details: Optional[IncompleteDetails] = None
+    """Details about why the response is incomplete."""
+
+    instructions: Optional[str] = None
+    """
+    Inserts a system (or developer) message as the first item in the model's
+    context.
+
+    When using along with `previous_response_id`, the instructions from a previous
+    response will be not be carried over to the next response. This makes it simple
+    to swap out system (or developer) messages in new responses.
+    """
+
+    metadata: Optional[Metadata] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    model: Union[str, ChatModel]
+    """Model ID used to generate the response, like `gpt-4o` or `o1`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    object: Literal["response"]
+    """The object type of this resource - always set to `response`."""
+
+    output: List[ResponseOutputItem]
+    """An array of content items generated by the model.
+
+    - The length and order of items in the `output` array is dependent on the
+      model's response.
+    - Rather than accessing the first item in the `output` array and assuming it's
+      an `assistant` message with the content generated by the model, you might
+      consider using the `output_text` property where supported in SDKs.
+    """
+
+    parallel_tool_calls: bool
+    """Whether to allow the model to run tool calls in parallel."""
+
+    temperature: Optional[float] = None
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
+    """
+
+    tool_choice: ToolChoice
+    """
+    How the model should select which tool (or tools) to use when generating a
+    response. See the `tools` parameter to see how to specify which tools the model
+    can call.
+    """
+
+    tools: List[Tool]
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: Optional[float] = None
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or `temperature` but not both.
+    """
+
+    max_output_tokens: Optional[int] = None
+    """
+    An upper bound for the number of tokens that can be generated for a response,
+    including visible output tokens and
+    [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    previous_response_id: Optional[str] = None
+    """The unique ID of the previous response to the model.
+
+    Use this to create multi-turn conversations. Learn more about
+    [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    """
+
+    reasoning: Optional[Reasoning] = None
+    """**o-series models only**
+
+    Configuration options for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    status: Optional[ResponseStatus] = None
+    """The status of the response generation.
+
+    One of `completed`, `failed`, `in_progress`, or `incomplete`.
+    """
+
+    text: Optional[ResponseTextConfig] = None
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    truncation: Optional[Literal["auto", "disabled"]] = None
+    """The truncation strategy to use for the model response.
+
+    - `auto`: If the context of this response and previous ones exceeds the model's
+      context window size, the model will truncate the response to fit the context
+      window by dropping input items in the middle of the conversation.
+    - `disabled` (default): If a model response will exceed the context window size
+      for a model, the request will fail with a 400 error.
+    """
+
+    usage: Optional[ResponseUsage] = None
+    """
+    Represents token usage details including input tokens, output tokens, a
+    breakdown of output tokens, and the total tokens used.
+    """
+
+    user: Optional[str] = None
+    """
+    A unique identifier representing your end-user, which can help OpenAI to monitor
+    and detect abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+    """
diff --git a/src/openai/types/responses/response_audio_delta_event.py b/src/openai/types/responses/response_audio_delta_event.py
new file mode 100644
index 0000000000..f3d77fac52
--- /dev/null
+++ b/src/openai/types/responses/response_audio_delta_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioDeltaEvent"]
+
+
+class ResponseAudioDeltaEvent(BaseModel):
+    delta: str
+    """A chunk of Base64 encoded response audio bytes."""
+
+    type: Literal["response.audio.delta"]
+    """The type of the event. Always `response.audio.delta`."""
diff --git a/src/openai/types/responses/response_audio_done_event.py b/src/openai/types/responses/response_audio_done_event.py
new file mode 100644
index 0000000000..5654f8e398
--- /dev/null
+++ b/src/openai/types/responses/response_audio_done_event.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioDoneEvent"]
+
+
+class ResponseAudioDoneEvent(BaseModel):
+    type: Literal["response.audio.done"]
+    """The type of the event. Always `response.audio.done`."""
diff --git a/src/openai/types/responses/response_audio_transcript_delta_event.py b/src/openai/types/responses/response_audio_transcript_delta_event.py
new file mode 100644
index 0000000000..69b6660f3f
--- /dev/null
+++ b/src/openai/types/responses/response_audio_transcript_delta_event.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDeltaEvent"]
+
+
+class ResponseAudioTranscriptDeltaEvent(BaseModel):
+    delta: str
+    """The partial transcript of the audio response."""
+
+    type: Literal["response.audio.transcript.delta"]
+    """The type of the event. Always `response.audio.transcript.delta`."""
diff --git a/src/openai/types/responses/response_audio_transcript_done_event.py b/src/openai/types/responses/response_audio_transcript_done_event.py
new file mode 100644
index 0000000000..1a20319f83
--- /dev/null
+++ b/src/openai/types/responses/response_audio_transcript_done_event.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseAudioTranscriptDoneEvent"]
+
+
+class ResponseAudioTranscriptDoneEvent(BaseModel):
+    type: Literal["response.audio.transcript.done"]
+    """The type of the event. Always `response.audio.transcript.done`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_code_delta_event.py b/src/openai/types/responses/response_code_interpreter_call_code_delta_event.py
new file mode 100644
index 0000000000..7527238d06
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_code_delta_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallCodeDeltaEvent"]
+
+
+class ResponseCodeInterpreterCallCodeDeltaEvent(BaseModel):
+    delta: str
+    """The partial code snippet added by the code interpreter."""
+
+    output_index: int
+    """The index of the output item that the code interpreter call is in progress."""
+
+    type: Literal["response.code_interpreter_call.code.delta"]
+    """The type of the event. Always `response.code_interpreter_call.code.delta`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_code_done_event.py b/src/openai/types/responses/response_code_interpreter_call_code_done_event.py
new file mode 100644
index 0000000000..f84d4cf3e8
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_code_done_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterCallCodeDoneEvent"]
+
+
+class ResponseCodeInterpreterCallCodeDoneEvent(BaseModel):
+    code: str
+    """The final code snippet output by the code interpreter."""
+
+    output_index: int
+    """The index of the output item that the code interpreter call is in progress."""
+
+    type: Literal["response.code_interpreter_call.code.done"]
+    """The type of the event. Always `response.code_interpreter_call.code.done`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_completed_event.py b/src/openai/types/responses/response_code_interpreter_call_completed_event.py
new file mode 100644
index 0000000000..b0cb73fb72
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_completed_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+
+__all__ = ["ResponseCodeInterpreterCallCompletedEvent"]
+
+
+class ResponseCodeInterpreterCallCompletedEvent(BaseModel):
+    code_interpreter_call: ResponseCodeInterpreterToolCall
+    """A tool call to run code."""
+
+    output_index: int
+    """The index of the output item that the code interpreter call is in progress."""
+
+    type: Literal["response.code_interpreter_call.completed"]
+    """The type of the event. Always `response.code_interpreter_call.completed`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_in_progress_event.py b/src/openai/types/responses/response_code_interpreter_call_in_progress_event.py
new file mode 100644
index 0000000000..64b739f308
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_in_progress_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+
+__all__ = ["ResponseCodeInterpreterCallInProgressEvent"]
+
+
+class ResponseCodeInterpreterCallInProgressEvent(BaseModel):
+    code_interpreter_call: ResponseCodeInterpreterToolCall
+    """A tool call to run code."""
+
+    output_index: int
+    """The index of the output item that the code interpreter call is in progress."""
+
+    type: Literal["response.code_interpreter_call.in_progress"]
+    """The type of the event. Always `response.code_interpreter_call.in_progress`."""
diff --git a/src/openai/types/responses/response_code_interpreter_call_interpreting_event.py b/src/openai/types/responses/response_code_interpreter_call_interpreting_event.py
new file mode 100644
index 0000000000..3100eac175
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_call_interpreting_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_code_interpreter_tool_call import ResponseCodeInterpreterToolCall
+
+__all__ = ["ResponseCodeInterpreterCallInterpretingEvent"]
+
+
+class ResponseCodeInterpreterCallInterpretingEvent(BaseModel):
+    code_interpreter_call: ResponseCodeInterpreterToolCall
+    """A tool call to run code."""
+
+    output_index: int
+    """The index of the output item that the code interpreter call is in progress."""
+
+    type: Literal["response.code_interpreter_call.interpreting"]
+    """The type of the event. Always `response.code_interpreter_call.interpreting`."""
diff --git a/src/openai/types/responses/response_code_interpreter_tool_call.py b/src/openai/types/responses/response_code_interpreter_tool_call.py
new file mode 100644
index 0000000000..d5a5057074
--- /dev/null
+++ b/src/openai/types/responses/response_code_interpreter_tool_call.py
@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["ResponseCodeInterpreterToolCall", "Result", "ResultLogs", "ResultFiles", "ResultFilesFile"]
+
+
+class ResultLogs(BaseModel):
+    logs: str
+    """The logs of the code interpreter tool call."""
+
+    type: Literal["logs"]
+    """The type of the code interpreter text output. Always `logs`."""
+
+
+class ResultFilesFile(BaseModel):
+    file_id: str
+    """The ID of the file."""
+
+    mime_type: str
+    """The MIME type of the file."""
+
+
+class ResultFiles(BaseModel):
+    files: List[ResultFilesFile]
+
+    type: Literal["files"]
+    """The type of the code interpreter file output. Always `files`."""
+
+
+Result: TypeAlias = Annotated[Union[ResultLogs, ResultFiles], PropertyInfo(discriminator="type")]
+
+
+class ResponseCodeInterpreterToolCall(BaseModel):
+    id: str
+    """The unique ID of the code interpreter tool call."""
+
+    code: str
+    """The code to run."""
+
+    results: List[Result]
+    """The results of the code interpreter tool call."""
+
+    status: Literal["in_progress", "interpreting", "completed"]
+    """The status of the code interpreter tool call."""
+
+    type: Literal["code_interpreter_call"]
+    """The type of the code interpreter tool call. Always `code_interpreter_call`."""
diff --git a/src/openai/types/responses/response_completed_event.py b/src/openai/types/responses/response_completed_event.py
new file mode 100644
index 0000000000..a944f248ef
--- /dev/null
+++ b/src/openai/types/responses/response_completed_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseCompletedEvent"]
+
+
+class ResponseCompletedEvent(BaseModel):
+    response: Response
+    """Properties of the completed response."""
+
+    type: Literal["response.completed"]
+    """The type of the event. Always `response.completed`."""
diff --git a/src/openai/types/responses/response_computer_tool_call.py b/src/openai/types/responses/response_computer_tool_call.py
new file mode 100644
index 0000000000..994837567a
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call.py
@@ -0,0 +1,212 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "ResponseComputerToolCall",
+    "Action",
+    "ActionClick",
+    "ActionDoubleClick",
+    "ActionDrag",
+    "ActionDragPath",
+    "ActionKeypress",
+    "ActionMove",
+    "ActionScreenshot",
+    "ActionScroll",
+    "ActionType",
+    "ActionWait",
+    "PendingSafetyCheck",
+]
+
+
+class ActionClick(BaseModel):
+    button: Literal["left", "right", "wheel", "back", "forward"]
+    """Indicates which mouse button was pressed during the click.
+
+    One of `left`, `right`, `wheel`, `back`, or `forward`.
+    """
+
+    type: Literal["click"]
+    """Specifies the event type.
+
+    For a click action, this property is always set to `click`.
+    """
+
+    x: int
+    """The x-coordinate where the click occurred."""
+
+    y: int
+    """The y-coordinate where the click occurred."""
+
+
+class ActionDoubleClick(BaseModel):
+    type: Literal["double_click"]
+    """Specifies the event type.
+
+    For a double click action, this property is always set to `double_click`.
+    """
+
+    x: int
+    """The x-coordinate where the double click occurred."""
+
+    y: int
+    """The y-coordinate where the double click occurred."""
+
+
+class ActionDragPath(BaseModel):
+    x: int
+    """The x-coordinate."""
+
+    y: int
+    """The y-coordinate."""
+
+
+class ActionDrag(BaseModel):
+    path: List[ActionDragPath]
+    """An array of coordinates representing the path of the drag action.
+
+    Coordinates will appear as an array of objects, eg
+
+    ```
+    [
+      { x: 100, y: 200 },
+      { x: 200, y: 300 }
+    ]
+    ```
+    """
+
+    type: Literal["drag"]
+    """Specifies the event type.
+
+    For a drag action, this property is always set to `drag`.
+    """
+
+
+class ActionKeypress(BaseModel):
+    keys: List[str]
+    """The combination of keys the model is requesting to be pressed.
+
+    This is an array of strings, each representing a key.
+    """
+
+    type: Literal["keypress"]
+    """Specifies the event type.
+
+    For a keypress action, this property is always set to `keypress`.
+    """
+
+
+class ActionMove(BaseModel):
+    type: Literal["move"]
+    """Specifies the event type.
+
+    For a move action, this property is always set to `move`.
+    """
+
+    x: int
+    """The x-coordinate to move to."""
+
+    y: int
+    """The y-coordinate to move to."""
+
+
+class ActionScreenshot(BaseModel):
+    type: Literal["screenshot"]
+    """Specifies the event type.
+
+    For a screenshot action, this property is always set to `screenshot`.
+    """
+
+
+class ActionScroll(BaseModel):
+    scroll_x: int
+    """The horizontal scroll distance."""
+
+    scroll_y: int
+    """The vertical scroll distance."""
+
+    type: Literal["scroll"]
+    """Specifies the event type.
+
+    For a scroll action, this property is always set to `scroll`.
+    """
+
+    x: int
+    """The x-coordinate where the scroll occurred."""
+
+    y: int
+    """The y-coordinate where the scroll occurred."""
+
+
+class ActionType(BaseModel):
+    text: str
+    """The text to type."""
+
+    type: Literal["type"]
+    """Specifies the event type.
+
+    For a type action, this property is always set to `type`.
+    """
+
+
+class ActionWait(BaseModel):
+    type: Literal["wait"]
+    """Specifies the event type.
+
+    For a wait action, this property is always set to `wait`.
+    """
+
+
+Action: TypeAlias = Annotated[
+    Union[
+        ActionClick,
+        ActionDoubleClick,
+        ActionDrag,
+        ActionKeypress,
+        ActionMove,
+        ActionScreenshot,
+        ActionScroll,
+        ActionType,
+        ActionWait,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class PendingSafetyCheck(BaseModel):
+    id: str
+    """The ID of the pending safety check."""
+
+    code: str
+    """The type of the pending safety check."""
+
+    message: str
+    """Details about the pending safety check."""
+
+
+class ResponseComputerToolCall(BaseModel):
+    id: str
+    """The unique ID of the computer call."""
+
+    action: Action
+    """A click action."""
+
+    call_id: str
+    """An identifier used when responding to the tool call with output."""
+
+    pending_safety_checks: List[PendingSafetyCheck]
+    """The pending safety checks for the computer call."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Literal["computer_call"]
+    """The type of the computer call. Always `computer_call`."""
diff --git a/src/openai/types/responses/response_computer_tool_call_param.py b/src/openai/types/responses/response_computer_tool_call_param.py
new file mode 100644
index 0000000000..d4ef56ab5c
--- /dev/null
+++ b/src/openai/types/responses/response_computer_tool_call_param.py
@@ -0,0 +1,208 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "ResponseComputerToolCallParam",
+    "Action",
+    "ActionClick",
+    "ActionDoubleClick",
+    "ActionDrag",
+    "ActionDragPath",
+    "ActionKeypress",
+    "ActionMove",
+    "ActionScreenshot",
+    "ActionScroll",
+    "ActionType",
+    "ActionWait",
+    "PendingSafetyCheck",
+]
+
+
+class ActionClick(TypedDict, total=False):
+    button: Required[Literal["left", "right", "wheel", "back", "forward"]]
+    """Indicates which mouse button was pressed during the click.
+
+    One of `left`, `right`, `wheel`, `back`, or `forward`.
+    """
+
+    type: Required[Literal["click"]]
+    """Specifies the event type.
+
+    For a click action, this property is always set to `click`.
+    """
+
+    x: Required[int]
+    """The x-coordinate where the click occurred."""
+
+    y: Required[int]
+    """The y-coordinate where the click occurred."""
+
+
+class ActionDoubleClick(TypedDict, total=False):
+    type: Required[Literal["double_click"]]
+    """Specifies the event type.
+
+    For a double click action, this property is always set to `double_click`.
+    """
+
+    x: Required[int]
+    """The x-coordinate where the double click occurred."""
+
+    y: Required[int]
+    """The y-coordinate where the double click occurred."""
+
+
+class ActionDragPath(TypedDict, total=False):
+    x: Required[int]
+    """The x-coordinate."""
+
+    y: Required[int]
+    """The y-coordinate."""
+
+
+class ActionDrag(TypedDict, total=False):
+    path: Required[Iterable[ActionDragPath]]
+    """An array of coordinates representing the path of the drag action.
+
+    Coordinates will appear as an array of objects, eg
+
+    ```
+    [
+      { x: 100, y: 200 },
+      { x: 200, y: 300 }
+    ]
+    ```
+    """
+
+    type: Required[Literal["drag"]]
+    """Specifies the event type.
+
+    For a drag action, this property is always set to `drag`.
+    """
+
+
+class ActionKeypress(TypedDict, total=False):
+    keys: Required[List[str]]
+    """The combination of keys the model is requesting to be pressed.
+
+    This is an array of strings, each representing a key.
+    """
+
+    type: Required[Literal["keypress"]]
+    """Specifies the event type.
+
+    For a keypress action, this property is always set to `keypress`.
+    """
+
+
+class ActionMove(TypedDict, total=False):
+    type: Required[Literal["move"]]
+    """Specifies the event type.
+
+    For a move action, this property is always set to `move`.
+    """
+
+    x: Required[int]
+    """The x-coordinate to move to."""
+
+    y: Required[int]
+    """The y-coordinate to move to."""
+
+
+class ActionScreenshot(TypedDict, total=False):
+    type: Required[Literal["screenshot"]]
+    """Specifies the event type.
+
+    For a screenshot action, this property is always set to `screenshot`.
+    """
+
+
+class ActionScroll(TypedDict, total=False):
+    scroll_x: Required[int]
+    """The horizontal scroll distance."""
+
+    scroll_y: Required[int]
+    """The vertical scroll distance."""
+
+    type: Required[Literal["scroll"]]
+    """Specifies the event type.
+
+    For a scroll action, this property is always set to `scroll`.
+    """
+
+    x: Required[int]
+    """The x-coordinate where the scroll occurred."""
+
+    y: Required[int]
+    """The y-coordinate where the scroll occurred."""
+
+
+class ActionType(TypedDict, total=False):
+    text: Required[str]
+    """The text to type."""
+
+    type: Required[Literal["type"]]
+    """Specifies the event type.
+
+    For a type action, this property is always set to `type`.
+    """
+
+
+class ActionWait(TypedDict, total=False):
+    type: Required[Literal["wait"]]
+    """Specifies the event type.
+
+    For a wait action, this property is always set to `wait`.
+    """
+
+
+Action: TypeAlias = Union[
+    ActionClick,
+    ActionDoubleClick,
+    ActionDrag,
+    ActionKeypress,
+    ActionMove,
+    ActionScreenshot,
+    ActionScroll,
+    ActionType,
+    ActionWait,
+]
+
+
+class PendingSafetyCheck(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the pending safety check."""
+
+    code: Required[str]
+    """The type of the pending safety check."""
+
+    message: Required[str]
+    """Details about the pending safety check."""
+
+
+class ResponseComputerToolCallParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the computer call."""
+
+    action: Required[Action]
+    """A click action."""
+
+    call_id: Required[str]
+    """An identifier used when responding to the tool call with output."""
+
+    pending_safety_checks: Required[Iterable[PendingSafetyCheck]]
+    """The pending safety checks for the computer call."""
+
+    status: Required[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Required[Literal["computer_call"]]
+    """The type of the computer call. Always `computer_call`."""
diff --git a/src/openai/types/responses/response_content_part_added_event.py b/src/openai/types/responses/response_content_part_added_event.py
new file mode 100644
index 0000000000..93f5ec4b0c
--- /dev/null
+++ b/src/openai/types/responses/response_content_part_added_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_text import ResponseOutputText
+from .response_output_refusal import ResponseOutputRefusal
+
+__all__ = ["ResponseContentPartAddedEvent", "Part"]
+
+Part: TypeAlias = Annotated[Union[ResponseOutputText, ResponseOutputRefusal], PropertyInfo(discriminator="type")]
+
+
+class ResponseContentPartAddedEvent(BaseModel):
+    content_index: int
+    """The index of the content part that was added."""
+
+    item_id: str
+    """The ID of the output item that the content part was added to."""
+
+    output_index: int
+    """The index of the output item that the content part was added to."""
+
+    part: Part
+    """The content part that was added."""
+
+    type: Literal["response.content_part.added"]
+    """The type of the event. Always `response.content_part.added`."""
diff --git a/src/openai/types/responses/response_content_part_done_event.py b/src/openai/types/responses/response_content_part_done_event.py
new file mode 100644
index 0000000000..4ec0739877
--- /dev/null
+++ b/src/openai/types/responses/response_content_part_done_event.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_text import ResponseOutputText
+from .response_output_refusal import ResponseOutputRefusal
+
+__all__ = ["ResponseContentPartDoneEvent", "Part"]
+
+Part: TypeAlias = Annotated[Union[ResponseOutputText, ResponseOutputRefusal], PropertyInfo(discriminator="type")]
+
+
+class ResponseContentPartDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part that is done."""
+
+    item_id: str
+    """The ID of the output item that the content part was added to."""
+
+    output_index: int
+    """The index of the output item that the content part was added to."""
+
+    part: Part
+    """The content part that is done."""
+
+    type: Literal["response.content_part.done"]
+    """The type of the event. Always `response.content_part.done`."""
diff --git a/src/openai/types/responses/response_create_params.py b/src/openai/types/responses/response_create_params.py
new file mode 100644
index 0000000000..d5b2fdeb1a
--- /dev/null
+++ b/src/openai/types/responses/response_create_params.py
@@ -0,0 +1,204 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .tool_param import ToolParam
+from ..shared.chat_model import ChatModel
+from .response_includable import ResponseIncludable
+from .tool_choice_options import ToolChoiceOptions
+from .response_input_param import ResponseInputParam
+from ..shared_params.metadata import Metadata
+from .tool_choice_types_param import ToolChoiceTypesParam
+from ..shared_params.reasoning import Reasoning
+from .response_text_config_param import ResponseTextConfigParam
+from .tool_choice_function_param import ToolChoiceFunctionParam
+
+__all__ = [
+    "ResponseCreateParamsBase",
+    "ToolChoice",
+    "ResponseCreateParamsNonStreaming",
+    "ResponseCreateParamsStreaming",
+]
+
+
+class ResponseCreateParamsBase(TypedDict, total=False):
+    input: Required[Union[str, ResponseInputParam]]
+    """Text, image, or file inputs to the model, used to generate a response.
+
+    Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Image inputs](https://platform.openai.com/docs/guides/images)
+    - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+    - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+    - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+    """
+
+    model: Required[Union[str, ChatModel]]
+    """Model ID used to generate the response, like `gpt-4o` or `o1`.
+
+    OpenAI offers a wide range of models with different capabilities, performance
+    characteristics, and price points. Refer to the
+    [model guide](https://platform.openai.com/docs/models) to browse and compare
+    available models.
+    """
+
+    include: Optional[List[ResponseIncludable]]
+    """Specify additional output data to include in the model response.
+
+    Currently supported values are:
+
+    - `file_search_call.results`: Include the search results of the file search tool
+      call.
+    - `message.input_image.image_url`: Include image urls from the input message.
+    - `computer_call_output.output.image_url`: Include image urls from the computer
+      call output.
+    """
+
+    instructions: Optional[str]
+    """
+    Inserts a system (or developer) message as the first item in the model's
+    context.
+
+    When using along with `previous_response_id`, the instructions from a previous
+    response will be not be carried over to the next response. This makes it simple
+    to swap out system (or developer) messages in new responses.
+    """
+
+    max_output_tokens: Optional[int]
+    """
+    An upper bound for the number of tokens that can be generated for a response,
+    including visible output tokens and
+    [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    metadata: Optional[Metadata]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
+    """
+
+    parallel_tool_calls: Optional[bool]
+    """Whether to allow the model to run tool calls in parallel."""
+
+    previous_response_id: Optional[str]
+    """The unique ID of the previous response to the model.
+
+    Use this to create multi-turn conversations. Learn more about
+    [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    """
+
+    reasoning: Optional[Reasoning]
+    """**o-series models only**
+
+    Configuration options for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    """
+
+    store: Optional[bool]
+    """Whether to store the generated model response for later retrieval via API."""
+
+    temperature: Optional[float]
+    """What sampling temperature to use, between 0 and 2.
+
+    Higher values like 0.8 will make the output more random, while lower values like
+    0.2 will make it more focused and deterministic. We generally recommend altering
+    this or `top_p` but not both.
+    """
+
+    text: ResponseTextConfigParam
+    """Configuration options for a text response from the model.
+
+    Can be plain text or structured JSON data. Learn more:
+
+    - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    """
+
+    tool_choice: ToolChoice
+    """
+    How the model should select which tool (or tools) to use when generating a
+    response. See the `tools` parameter to see how to specify which tools the model
+    can call.
+    """
+
+    tools: Iterable[ToolParam]
+    """An array of tools the model may call while generating a response.
+
+    You can specify which tool to use by setting the `tool_choice` parameter.
+
+    The two categories of tools you can provide the model are:
+
+    - **Built-in tools**: Tools that are provided by OpenAI that extend the model's
+      capabilities, like
+      [web search](https://platform.openai.com/docs/guides/tools-web-search) or
+      [file search](https://platform.openai.com/docs/guides/tools-file-search).
+      Learn more about
+      [built-in tools](https://platform.openai.com/docs/guides/tools).
+    - **Function calls (custom tools)**: Functions that are defined by you, enabling
+      the model to call your own code. Learn more about
+      [function calling](https://platform.openai.com/docs/guides/function-calling).
+    """
+
+    top_p: Optional[float]
+    """
+    An alternative to sampling with temperature, called nucleus sampling, where the
+    model considers the results of the tokens with top_p probability mass. So 0.1
+    means only the tokens comprising the top 10% probability mass are considered.
+
+    We generally recommend altering this or `temperature` but not both.
+    """
+
+    truncation: Optional[Literal["auto", "disabled"]]
+    """The truncation strategy to use for the model response.
+
+    - `auto`: If the context of this response and previous ones exceeds the model's
+      context window size, the model will truncate the response to fit the context
+      window by dropping input items in the middle of the conversation.
+    - `disabled` (default): If a model response will exceed the context window size
+      for a model, the request will fail with a 400 error.
+    """
+
+    user: str
+    """
+    A unique identifier representing your end-user, which can help OpenAI to monitor
+    and detect abuse.
+    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
+    """
+
+
+ToolChoice: TypeAlias = Union[ToolChoiceOptions, ToolChoiceTypesParam, ToolChoiceFunctionParam]
+
+
+class ResponseCreateParamsNonStreaming(ResponseCreateParamsBase, total=False):
+    stream: Optional[Literal[False]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    for more information.
+    """
+
+
+class ResponseCreateParamsStreaming(ResponseCreateParamsBase):
+    stream: Required[Literal[True]]
+    """
+    If set to true, the model response data will be streamed to the client as it is
+    generated using
+    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    See the
+    [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    for more information.
+    """
+
+
+ResponseCreateParams = Union[ResponseCreateParamsNonStreaming, ResponseCreateParamsStreaming]
diff --git a/src/openai/types/responses/response_created_event.py b/src/openai/types/responses/response_created_event.py
new file mode 100644
index 0000000000..7a524cec87
--- /dev/null
+++ b/src/openai/types/responses/response_created_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseCreatedEvent"]
+
+
+class ResponseCreatedEvent(BaseModel):
+    response: Response
+    """The response that was created."""
+
+    type: Literal["response.created"]
+    """The type of the event. Always `response.created`."""
diff --git a/src/openai/types/responses/response_error.py b/src/openai/types/responses/response_error.py
new file mode 100644
index 0000000000..90f1fcf5da
--- /dev/null
+++ b/src/openai/types/responses/response_error.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseError"]
+
+
+class ResponseError(BaseModel):
+    code: Literal[
+        "server_error",
+        "rate_limit_exceeded",
+        "invalid_prompt",
+        "vector_store_timeout",
+        "invalid_image",
+        "invalid_image_format",
+        "invalid_base64_image",
+        "invalid_image_url",
+        "image_too_large",
+        "image_too_small",
+        "image_parse_error",
+        "image_content_policy_violation",
+        "invalid_image_mode",
+        "image_file_too_large",
+        "unsupported_image_media_type",
+        "empty_image_file",
+        "failed_to_download_image",
+        "image_file_not_found",
+    ]
+    """The error code for the response."""
+
+    message: str
+    """A human-readable description of the error."""
diff --git a/src/openai/types/responses/response_error_event.py b/src/openai/types/responses/response_error_event.py
new file mode 100644
index 0000000000..1b7e605d02
--- /dev/null
+++ b/src/openai/types/responses/response_error_event.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseErrorEvent"]
+
+
+class ResponseErrorEvent(BaseModel):
+    code: Optional[str] = None
+    """The error code."""
+
+    message: str
+    """The error message."""
+
+    param: Optional[str] = None
+    """The error parameter."""
+
+    type: Literal["error"]
+    """The type of the event. Always `error`."""
diff --git a/src/openai/types/responses/response_failed_event.py b/src/openai/types/responses/response_failed_event.py
new file mode 100644
index 0000000000..3e8f75d8c4
--- /dev/null
+++ b/src/openai/types/responses/response_failed_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseFailedEvent"]
+
+
+class ResponseFailedEvent(BaseModel):
+    response: Response
+    """The response that failed."""
+
+    type: Literal["response.failed"]
+    """The type of the event. Always `response.failed`."""
diff --git a/src/openai/types/responses/response_file_search_call_completed_event.py b/src/openai/types/responses/response_file_search_call_completed_event.py
new file mode 100644
index 0000000000..4b86083369
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_call_completed_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchCallCompletedEvent"]
+
+
+class ResponseFileSearchCallCompletedEvent(BaseModel):
+    item_id: str
+    """The ID of the output item that the file search call is initiated."""
+
+    output_index: int
+    """The index of the output item that the file search call is initiated."""
+
+    type: Literal["response.file_search_call.completed"]
+    """The type of the event. Always `response.file_search_call.completed`."""
diff --git a/src/openai/types/responses/response_file_search_call_in_progress_event.py b/src/openai/types/responses/response_file_search_call_in_progress_event.py
new file mode 100644
index 0000000000..eb42e3dad6
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_call_in_progress_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchCallInProgressEvent"]
+
+
+class ResponseFileSearchCallInProgressEvent(BaseModel):
+    item_id: str
+    """The ID of the output item that the file search call is initiated."""
+
+    output_index: int
+    """The index of the output item that the file search call is initiated."""
+
+    type: Literal["response.file_search_call.in_progress"]
+    """The type of the event. Always `response.file_search_call.in_progress`."""
diff --git a/src/openai/types/responses/response_file_search_call_searching_event.py b/src/openai/types/responses/response_file_search_call_searching_event.py
new file mode 100644
index 0000000000..3cd8905de6
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_call_searching_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchCallSearchingEvent"]
+
+
+class ResponseFileSearchCallSearchingEvent(BaseModel):
+    item_id: str
+    """The ID of the output item that the file search call is initiated."""
+
+    output_index: int
+    """The index of the output item that the file search call is searching."""
+
+    type: Literal["response.file_search_call.searching"]
+    """The type of the event. Always `response.file_search_call.searching`."""
diff --git a/src/openai/types/responses/response_file_search_tool_call.py b/src/openai/types/responses/response_file_search_tool_call.py
new file mode 100644
index 0000000000..ef1c6a5608
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_tool_call.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFileSearchToolCall", "Result"]
+
+
+class Result(BaseModel):
+    attributes: Optional[Dict[str, Union[str, float, bool]]] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    file_id: Optional[str] = None
+    """The unique ID of the file."""
+
+    filename: Optional[str] = None
+    """The name of the file."""
+
+    score: Optional[float] = None
+    """The relevance score of the file - a value between 0 and 1."""
+
+    text: Optional[str] = None
+    """The text that was retrieved from the file."""
+
+
+class ResponseFileSearchToolCall(BaseModel):
+    id: str
+    """The unique ID of the file search tool call."""
+
+    queries: List[str]
+    """The queries used to search for files."""
+
+    status: Literal["in_progress", "searching", "completed", "incomplete", "failed"]
+    """The status of the file search tool call.
+
+    One of `in_progress`, `searching`, `incomplete` or `failed`,
+    """
+
+    type: Literal["file_search_call"]
+    """The type of the file search tool call. Always `file_search_call`."""
+
+    results: Optional[List[Result]] = None
+    """The results of the file search tool call."""
diff --git a/src/openai/types/responses/response_file_search_tool_call_param.py b/src/openai/types/responses/response_file_search_tool_call_param.py
new file mode 100644
index 0000000000..9a4177cf81
--- /dev/null
+++ b/src/openai/types/responses/response_file_search_tool_call_param.py
@@ -0,0 +1,51 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Iterable, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFileSearchToolCallParam", "Result"]
+
+
+class Result(TypedDict, total=False):
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    file_id: str
+    """The unique ID of the file."""
+
+    filename: str
+    """The name of the file."""
+
+    score: float
+    """The relevance score of the file - a value between 0 and 1."""
+
+    text: str
+    """The text that was retrieved from the file."""
+
+
+class ResponseFileSearchToolCallParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the file search tool call."""
+
+    queries: Required[List[str]]
+    """The queries used to search for files."""
+
+    status: Required[Literal["in_progress", "searching", "completed", "incomplete", "failed"]]
+    """The status of the file search tool call.
+
+    One of `in_progress`, `searching`, `incomplete` or `failed`,
+    """
+
+    type: Required[Literal["file_search_call"]]
+    """The type of the file search tool call. Always `file_search_call`."""
+
+    results: Optional[Iterable[Result]]
+    """The results of the file search tool call."""
diff --git a/src/openai/types/responses/response_format_text_config.py b/src/openai/types/responses/response_format_text_config.py
new file mode 100644
index 0000000000..a4896bf9fe
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_config.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..shared.response_format_text import ResponseFormatText
+from ..shared.response_format_json_object import ResponseFormatJSONObject
+from .response_format_text_json_schema_config import ResponseFormatTextJSONSchemaConfig
+
+__all__ = ["ResponseFormatTextConfig"]
+
+ResponseFormatTextConfig: TypeAlias = Annotated[
+    Union[ResponseFormatText, ResponseFormatTextJSONSchemaConfig, ResponseFormatJSONObject],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_format_text_config_param.py b/src/openai/types/responses/response_format_text_config_param.py
new file mode 100644
index 0000000000..fcaf8f3fb6
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_config_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from ..shared_params.response_format_text import ResponseFormatText
+from ..shared_params.response_format_json_object import ResponseFormatJSONObject
+from .response_format_text_json_schema_config_param import ResponseFormatTextJSONSchemaConfigParam
+
+__all__ = ["ResponseFormatTextConfigParam"]
+
+ResponseFormatTextConfigParam: TypeAlias = Union[
+    ResponseFormatText, ResponseFormatTextJSONSchemaConfigParam, ResponseFormatJSONObject
+]
diff --git a/src/openai/types/responses/response_format_text_json_schema_config.py b/src/openai/types/responses/response_format_text_json_schema_config.py
new file mode 100644
index 0000000000..3cf066370f
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_json_schema_config.py
@@ -0,0 +1,43 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatTextJSONSchemaConfig"]
+
+
+class ResponseFormatTextJSONSchemaConfig(BaseModel):
+    schema_: Dict[str, object] = FieldInfo(alias="schema")
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    type: Literal["json_schema"]
+    """The type of response format being defined. Always `json_schema`."""
+
+    description: Optional[str] = None
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    name: Optional[str] = None
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    strict: Optional[bool] = None
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
diff --git a/src/openai/types/responses/response_format_text_json_schema_config_param.py b/src/openai/types/responses/response_format_text_json_schema_config_param.py
new file mode 100644
index 0000000000..211c5d1eff
--- /dev/null
+++ b/src/openai/types/responses/response_format_text_json_schema_config_param.py
@@ -0,0 +1,41 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatTextJSONSchemaConfigParam"]
+
+
+class ResponseFormatTextJSONSchemaConfigParam(TypedDict, total=False):
+    schema: Required[Dict[str, object]]
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    type: Required[Literal["json_schema"]]
+    """The type of response format being defined. Always `json_schema`."""
+
+    description: str
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    name: str
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    strict: Optional[bool]
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
diff --git a/src/openai/types/responses/response_function_call_arguments_delta_event.py b/src/openai/types/responses/response_function_call_arguments_delta_event.py
new file mode 100644
index 0000000000..0989b7caeb
--- /dev/null
+++ b/src/openai/types/responses/response_function_call_arguments_delta_event.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDeltaEvent"]
+
+
+class ResponseFunctionCallArgumentsDeltaEvent(BaseModel):
+    delta: str
+    """The function-call arguments delta that is added."""
+
+    item_id: str
+    """The ID of the output item that the function-call arguments delta is added to."""
+
+    output_index: int
+    """
+    The index of the output item that the function-call arguments delta is added to.
+    """
+
+    type: Literal["response.function_call_arguments.delta"]
+    """The type of the event. Always `response.function_call_arguments.delta`."""
diff --git a/src/openai/types/responses/response_function_call_arguments_done_event.py b/src/openai/types/responses/response_function_call_arguments_done_event.py
new file mode 100644
index 0000000000..1d805a57c6
--- /dev/null
+++ b/src/openai/types/responses/response_function_call_arguments_done_event.py
@@ -0,0 +1,20 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionCallArgumentsDoneEvent"]
+
+
+class ResponseFunctionCallArgumentsDoneEvent(BaseModel):
+    arguments: str
+    """The function-call arguments."""
+
+    item_id: str
+    """The ID of the item."""
+
+    output_index: int
+    """The index of the output item."""
+
+    type: Literal["response.function_call_arguments.done"]
diff --git a/src/openai/types/responses/response_function_tool_call.py b/src/openai/types/responses/response_function_tool_call.py
new file mode 100644
index 0000000000..2a8482204e
--- /dev/null
+++ b/src/openai/types/responses/response_function_tool_call.py
@@ -0,0 +1,32 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionToolCall"]
+
+
+class ResponseFunctionToolCall(BaseModel):
+    arguments: str
+    """A JSON string of the arguments to pass to the function."""
+
+    call_id: str
+    """The unique ID of the function tool call generated by the model."""
+
+    name: str
+    """The name of the function to run."""
+
+    type: Literal["function_call"]
+    """The type of the function tool call. Always `function_call`."""
+
+    id: Optional[str] = None
+    """The unique ID of the function tool call."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_function_tool_call_param.py b/src/openai/types/responses/response_function_tool_call_param.py
new file mode 100644
index 0000000000..eaa263cf67
--- /dev/null
+++ b/src/openai/types/responses/response_function_tool_call_param.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFunctionToolCallParam"]
+
+
+class ResponseFunctionToolCallParam(TypedDict, total=False):
+    arguments: Required[str]
+    """A JSON string of the arguments to pass to the function."""
+
+    call_id: Required[str]
+    """The unique ID of the function tool call generated by the model."""
+
+    name: Required[str]
+    """The name of the function to run."""
+
+    type: Required[Literal["function_call"]]
+    """The type of the function tool call. Always `function_call`."""
+
+    id: str
+    """The unique ID of the function tool call."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_function_web_search.py b/src/openai/types/responses/response_function_web_search.py
new file mode 100644
index 0000000000..44734b681f
--- /dev/null
+++ b/src/openai/types/responses/response_function_web_search.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFunctionWebSearch"]
+
+
+class ResponseFunctionWebSearch(BaseModel):
+    id: str
+    """The unique ID of the web search tool call."""
+
+    status: Literal["in_progress", "searching", "completed", "failed"]
+    """The status of the web search tool call."""
+
+    type: Literal["web_search_call"]
+    """The type of the web search tool call. Always `web_search_call`."""
diff --git a/src/openai/types/responses/response_function_web_search_param.py b/src/openai/types/responses/response_function_web_search_param.py
new file mode 100644
index 0000000000..d413e60b12
--- /dev/null
+++ b/src/openai/types/responses/response_function_web_search_param.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFunctionWebSearchParam"]
+
+
+class ResponseFunctionWebSearchParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the web search tool call."""
+
+    status: Required[Literal["in_progress", "searching", "completed", "failed"]]
+    """The status of the web search tool call."""
+
+    type: Required[Literal["web_search_call"]]
+    """The type of the web search tool call. Always `web_search_call`."""
diff --git a/src/openai/types/responses/response_in_progress_event.py b/src/openai/types/responses/response_in_progress_event.py
new file mode 100644
index 0000000000..7d96cbb8ad
--- /dev/null
+++ b/src/openai/types/responses/response_in_progress_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseInProgressEvent"]
+
+
+class ResponseInProgressEvent(BaseModel):
+    response: Response
+    """The response that is in progress."""
+
+    type: Literal["response.in_progress"]
+    """The type of the event. Always `response.in_progress`."""
diff --git a/src/openai/types/responses/response_includable.py b/src/openai/types/responses/response_includable.py
new file mode 100644
index 0000000000..83489fa7f1
--- /dev/null
+++ b/src/openai/types/responses/response_includable.py
@@ -0,0 +1,9 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ResponseIncludable"]
+
+ResponseIncludable: TypeAlias = Literal[
+    "file_search_call.results", "message.input_image.image_url", "computer_call_output.output.image_url"
+]
diff --git a/src/openai/types/responses/response_incomplete_event.py b/src/openai/types/responses/response_incomplete_event.py
new file mode 100644
index 0000000000..742b789c7e
--- /dev/null
+++ b/src/openai/types/responses/response_incomplete_event.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .response import Response
+from ..._models import BaseModel
+
+__all__ = ["ResponseIncompleteEvent"]
+
+
+class ResponseIncompleteEvent(BaseModel):
+    response: Response
+    """The response that was incomplete."""
+
+    type: Literal["response.incomplete"]
+    """The type of the event. Always `response.incomplete`."""
diff --git a/src/openai/types/responses/response_input_content.py b/src/openai/types/responses/response_input_content.py
new file mode 100644
index 0000000000..1726909a17
--- /dev/null
+++ b/src/openai/types/responses/response_input_content.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .response_input_file import ResponseInputFile
+from .response_input_text import ResponseInputText
+from .response_input_image import ResponseInputImage
+
+__all__ = ["ResponseInputContent"]
+
+ResponseInputContent: TypeAlias = Annotated[
+    Union[ResponseInputText, ResponseInputImage, ResponseInputFile], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/responses/response_input_content_param.py b/src/openai/types/responses/response_input_content_param.py
new file mode 100644
index 0000000000..7791cdfd8e
--- /dev/null
+++ b/src/openai/types/responses/response_input_content_param.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .response_input_file_param import ResponseInputFileParam
+from .response_input_text_param import ResponseInputTextParam
+from .response_input_image_param import ResponseInputImageParam
+
+__all__ = ["ResponseInputContentParam"]
+
+ResponseInputContentParam: TypeAlias = Union[ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
diff --git a/src/openai/types/responses/response_input_file.py b/src/openai/types/responses/response_input_file.py
new file mode 100644
index 0000000000..00b35dc844
--- /dev/null
+++ b/src/openai/types/responses/response_input_file.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputFile"]
+
+
+class ResponseInputFile(BaseModel):
+    type: Literal["input_file"]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: Optional[str] = None
+    """The content of the file to be sent to the model."""
+
+    file_id: Optional[str] = None
+    """The ID of the file to be sent to the model."""
+
+    filename: Optional[str] = None
+    """The name of the file to be sent to the model."""
diff --git a/src/openai/types/responses/response_input_file_param.py b/src/openai/types/responses/response_input_file_param.py
new file mode 100644
index 0000000000..dc06a4ea2d
--- /dev/null
+++ b/src/openai/types/responses/response_input_file_param.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputFileParam"]
+
+
+class ResponseInputFileParam(TypedDict, total=False):
+    type: Required[Literal["input_file"]]
+    """The type of the input item. Always `input_file`."""
+
+    file_data: str
+    """The content of the file to be sent to the model."""
+
+    file_id: str
+    """The ID of the file to be sent to the model."""
+
+    filename: str
+    """The name of the file to be sent to the model."""
diff --git a/src/openai/types/responses/response_input_image.py b/src/openai/types/responses/response_input_image.py
new file mode 100644
index 0000000000..d719f44e9b
--- /dev/null
+++ b/src/openai/types/responses/response_input_image.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputImage"]
+
+
+class ResponseInputImage(BaseModel):
+    detail: Literal["high", "low", "auto"]
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+    type: Literal["input_image"]
+    """The type of the input item. Always `input_image`."""
+
+    file_id: Optional[str] = None
+    """The ID of the file to be sent to the model."""
+
+    image_url: Optional[str] = None
+    """The URL of the image to be sent to the model.
+
+    A fully qualified URL or base64 encoded image in a data URL.
+    """
diff --git a/src/openai/types/responses/response_input_image_param.py b/src/openai/types/responses/response_input_image_param.py
new file mode 100644
index 0000000000..5dd4db2b5d
--- /dev/null
+++ b/src/openai/types/responses/response_input_image_param.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputImageParam"]
+
+
+class ResponseInputImageParam(TypedDict, total=False):
+    detail: Required[Literal["high", "low", "auto"]]
+    """The detail level of the image to be sent to the model.
+
+    One of `high`, `low`, or `auto`. Defaults to `auto`.
+    """
+
+    type: Required[Literal["input_image"]]
+    """The type of the input item. Always `input_image`."""
+
+    file_id: Optional[str]
+    """The ID of the file to be sent to the model."""
+
+    image_url: Optional[str]
+    """The URL of the image to be sent to the model.
+
+    A fully qualified URL or base64 encoded image in a data URL.
+    """
diff --git a/src/openai/types/responses/response_input_item_param.py b/src/openai/types/responses/response_input_item_param.py
new file mode 100644
index 0000000000..32ac13cabb
--- /dev/null
+++ b/src/openai/types/responses/response_input_item_param.py
@@ -0,0 +1,145 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .easy_input_message_param import EasyInputMessageParam
+from .response_output_message_param import ResponseOutputMessageParam
+from .response_reasoning_item_param import ResponseReasoningItemParam
+from .response_computer_tool_call_param import ResponseComputerToolCallParam
+from .response_function_tool_call_param import ResponseFunctionToolCallParam
+from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
+from .response_input_message_content_list_param import ResponseInputMessageContentListParam
+
+__all__ = [
+    "ResponseInputItemParam",
+    "Message",
+    "ComputerCallOutput",
+    "ComputerCallOutputOutput",
+    "ComputerCallOutputAcknowledgedSafetyCheck",
+    "FunctionCallOutput",
+    "ItemReference",
+]
+
+
+class Message(TypedDict, total=False):
+    content: Required[ResponseInputMessageContentListParam]
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Required[Literal["user", "system", "developer"]]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always set to `message`."""
+
+
+class ComputerCallOutputOutput(TypedDict, total=False):
+    type: Required[Literal["computer_screenshot"]]
+    """Specifies the event type.
+
+    For a computer screenshot, this property is always set to `computer_screenshot`.
+    """
+
+    file_id: str
+    """The identifier of an uploaded file that contains the screenshot."""
+
+    image_url: str
+    """The URL of the screenshot image."""
+
+
+class ComputerCallOutputAcknowledgedSafetyCheck(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the pending safety check."""
+
+    code: Required[str]
+    """The type of the pending safety check."""
+
+    message: Required[str]
+    """Details about the pending safety check."""
+
+
+class ComputerCallOutput(TypedDict, total=False):
+    call_id: Required[str]
+    """The ID of the computer tool call that produced the output."""
+
+    output: Required[ComputerCallOutputOutput]
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Required[Literal["computer_call_output"]]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    id: str
+    """The ID of the computer tool call output."""
+
+    acknowledged_safety_checks: Iterable[ComputerCallOutputAcknowledgedSafetyCheck]
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+
+class FunctionCallOutput(TypedDict, total=False):
+    call_id: Required[str]
+    """The unique ID of the function tool call generated by the model."""
+
+    output: Required[str]
+    """A JSON string of the output of the function tool call."""
+
+    type: Required[Literal["function_call_output"]]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    id: str
+    """The unique ID of the function tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+
+class ItemReference(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the item to reference."""
+
+    type: Required[Literal["item_reference"]]
+    """The type of item to reference. Always `item_reference`."""
+
+
+ResponseInputItemParam: TypeAlias = Union[
+    EasyInputMessageParam,
+    Message,
+    ResponseOutputMessageParam,
+    ResponseFileSearchToolCallParam,
+    ResponseComputerToolCallParam,
+    ComputerCallOutput,
+    ResponseFunctionWebSearchParam,
+    ResponseFunctionToolCallParam,
+    FunctionCallOutput,
+    ResponseReasoningItemParam,
+    ItemReference,
+]
diff --git a/src/openai/types/responses/response_input_message_content_list.py b/src/openai/types/responses/response_input_message_content_list.py
new file mode 100644
index 0000000000..99b7c10f12
--- /dev/null
+++ b/src/openai/types/responses/response_input_message_content_list.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List
+from typing_extensions import TypeAlias
+
+from .response_input_content import ResponseInputContent
+
+__all__ = ["ResponseInputMessageContentList"]
+
+ResponseInputMessageContentList: TypeAlias = List[ResponseInputContent]
diff --git a/src/openai/types/responses/response_input_message_content_list_param.py b/src/openai/types/responses/response_input_message_content_list_param.py
new file mode 100644
index 0000000000..080613df0d
--- /dev/null
+++ b/src/openai/types/responses/response_input_message_content_list_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import TypeAlias
+
+from .response_input_file_param import ResponseInputFileParam
+from .response_input_text_param import ResponseInputTextParam
+from .response_input_image_param import ResponseInputImageParam
+
+__all__ = ["ResponseInputMessageContentListParam", "ResponseInputContentParam"]
+
+ResponseInputContentParam: TypeAlias = Union[ResponseInputTextParam, ResponseInputImageParam, ResponseInputFileParam]
+
+ResponseInputMessageContentListParam: TypeAlias = List[ResponseInputContentParam]
diff --git a/src/openai/types/responses/response_input_param.py b/src/openai/types/responses/response_input_param.py
new file mode 100644
index 0000000000..b942f4868a
--- /dev/null
+++ b/src/openai/types/responses/response_input_param.py
@@ -0,0 +1,148 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .easy_input_message_param import EasyInputMessageParam
+from .response_output_message_param import ResponseOutputMessageParam
+from .response_reasoning_item_param import ResponseReasoningItemParam
+from .response_computer_tool_call_param import ResponseComputerToolCallParam
+from .response_function_tool_call_param import ResponseFunctionToolCallParam
+from .response_function_web_search_param import ResponseFunctionWebSearchParam
+from .response_file_search_tool_call_param import ResponseFileSearchToolCallParam
+from .response_input_message_content_list_param import ResponseInputMessageContentListParam
+
+__all__ = [
+    "ResponseInputParam",
+    "ResponseInputItemParam",
+    "Message",
+    "ComputerCallOutput",
+    "ComputerCallOutputOutput",
+    "ComputerCallOutputAcknowledgedSafetyCheck",
+    "FunctionCallOutput",
+    "ItemReference",
+]
+
+
+class Message(TypedDict, total=False):
+    content: Required[ResponseInputMessageContentListParam]
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Required[Literal["user", "system", "developer"]]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Literal["message"]
+    """The type of the message input. Always set to `message`."""
+
+
+class ComputerCallOutputOutput(TypedDict, total=False):
+    type: Required[Literal["computer_screenshot"]]
+    """Specifies the event type.
+
+    For a computer screenshot, this property is always set to `computer_screenshot`.
+    """
+
+    file_id: str
+    """The identifier of an uploaded file that contains the screenshot."""
+
+    image_url: str
+    """The URL of the screenshot image."""
+
+
+class ComputerCallOutputAcknowledgedSafetyCheck(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the pending safety check."""
+
+    code: Required[str]
+    """The type of the pending safety check."""
+
+    message: Required[str]
+    """Details about the pending safety check."""
+
+
+class ComputerCallOutput(TypedDict, total=False):
+    call_id: Required[str]
+    """The ID of the computer tool call that produced the output."""
+
+    output: Required[ComputerCallOutputOutput]
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Required[Literal["computer_call_output"]]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    id: str
+    """The ID of the computer tool call output."""
+
+    acknowledged_safety_checks: Iterable[ComputerCallOutputAcknowledgedSafetyCheck]
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+
+class FunctionCallOutput(TypedDict, total=False):
+    call_id: Required[str]
+    """The unique ID of the function tool call generated by the model."""
+
+    output: Required[str]
+    """A JSON string of the output of the function tool call."""
+
+    type: Required[Literal["function_call_output"]]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    id: str
+    """The unique ID of the function tool call output.
+
+    Populated when this item is returned via API.
+    """
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+
+class ItemReference(TypedDict, total=False):
+    id: Required[str]
+    """The ID of the item to reference."""
+
+    type: Required[Literal["item_reference"]]
+    """The type of item to reference. Always `item_reference`."""
+
+
+ResponseInputItemParam: TypeAlias = Union[
+    EasyInputMessageParam,
+    Message,
+    ResponseOutputMessageParam,
+    ResponseFileSearchToolCallParam,
+    ResponseComputerToolCallParam,
+    ComputerCallOutput,
+    ResponseFunctionWebSearchParam,
+    ResponseFunctionToolCallParam,
+    FunctionCallOutput,
+    ResponseReasoningItemParam,
+    ItemReference,
+]
+
+ResponseInputParam: TypeAlias = List[ResponseInputItemParam]
diff --git a/src/openai/types/responses/response_input_text.py b/src/openai/types/responses/response_input_text.py
new file mode 100644
index 0000000000..ba8d1ea18b
--- /dev/null
+++ b/src/openai/types/responses/response_input_text.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseInputText"]
+
+
+class ResponseInputText(BaseModel):
+    text: str
+    """The text input to the model."""
+
+    type: Literal["input_text"]
+    """The type of the input item. Always `input_text`."""
diff --git a/src/openai/types/responses/response_input_text_param.py b/src/openai/types/responses/response_input_text_param.py
new file mode 100644
index 0000000000..f2ba834082
--- /dev/null
+++ b/src/openai/types/responses/response_input_text_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseInputTextParam"]
+
+
+class ResponseInputTextParam(TypedDict, total=False):
+    text: Required[str]
+    """The text input to the model."""
+
+    type: Required[Literal["input_text"]]
+    """The type of the input item. Always `input_text`."""
diff --git a/src/openai/types/responses/response_item_list.py b/src/openai/types/responses/response_item_list.py
new file mode 100644
index 0000000000..7c3e4d7f82
--- /dev/null
+++ b/src/openai/types/responses/response_item_list.py
@@ -0,0 +1,152 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union, Optional
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_message import ResponseOutputMessage
+from .response_computer_tool_call import ResponseComputerToolCall
+from .response_function_tool_call import ResponseFunctionToolCall
+from .response_function_web_search import ResponseFunctionWebSearch
+from .response_file_search_tool_call import ResponseFileSearchToolCall
+from .response_input_message_content_list import ResponseInputMessageContentList
+
+__all__ = [
+    "ResponseItemList",
+    "Data",
+    "DataMessage",
+    "DataComputerCallOutput",
+    "DataComputerCallOutputOutput",
+    "DataComputerCallOutputAcknowledgedSafetyCheck",
+    "DataFunctionCallOutput",
+]
+
+
+class DataMessage(BaseModel):
+    id: str
+    """The unique ID of the message input."""
+
+    content: ResponseInputMessageContentList
+    """
+    A list of one or many input items to the model, containing different content
+    types.
+    """
+
+    role: Literal["user", "system", "developer"]
+    """The role of the message input. One of `user`, `system`, or `developer`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+    type: Optional[Literal["message"]] = None
+    """The type of the message input. Always set to `message`."""
+
+
+class DataComputerCallOutputOutput(BaseModel):
+    type: Literal["computer_screenshot"]
+    """Specifies the event type.
+
+    For a computer screenshot, this property is always set to `computer_screenshot`.
+    """
+
+    file_id: Optional[str] = None
+    """The identifier of an uploaded file that contains the screenshot."""
+
+    image_url: Optional[str] = None
+    """The URL of the screenshot image."""
+
+
+class DataComputerCallOutputAcknowledgedSafetyCheck(BaseModel):
+    id: str
+    """The ID of the pending safety check."""
+
+    code: str
+    """The type of the pending safety check."""
+
+    message: str
+    """Details about the pending safety check."""
+
+
+class DataComputerCallOutput(BaseModel):
+    id: str
+    """The unique ID of the computer call tool output."""
+
+    call_id: str
+    """The ID of the computer tool call that produced the output."""
+
+    output: DataComputerCallOutputOutput
+    """A computer screenshot image used with the computer use tool."""
+
+    type: Literal["computer_call_output"]
+    """The type of the computer tool call output. Always `computer_call_output`."""
+
+    acknowledged_safety_checks: Optional[List[DataComputerCallOutputAcknowledgedSafetyCheck]] = None
+    """
+    The safety checks reported by the API that have been acknowledged by the
+    developer.
+    """
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+
+class DataFunctionCallOutput(BaseModel):
+    id: str
+    """The unique ID of the function call tool output."""
+
+    call_id: str
+    """The unique ID of the function tool call generated by the model."""
+
+    output: str
+    """A JSON string of the output of the function tool call."""
+
+    type: Literal["function_call_output"]
+    """The type of the function tool call output. Always `function_call_output`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
+
+
+Data: TypeAlias = Annotated[
+    Union[
+        DataMessage,
+        ResponseOutputMessage,
+        ResponseFileSearchToolCall,
+        ResponseComputerToolCall,
+        DataComputerCallOutput,
+        ResponseFunctionWebSearch,
+        ResponseFunctionToolCall,
+        DataFunctionCallOutput,
+    ],
+    PropertyInfo(discriminator="type"),
+]
+
+
+class ResponseItemList(BaseModel):
+    data: List[Data]
+    """A list of items used to generate this response."""
+
+    first_id: str
+    """The ID of the first item in the list."""
+
+    has_more: bool
+    """Whether there are more items available."""
+
+    last_id: str
+    """The ID of the last item in the list."""
+
+    object: Literal["list"]
+    """The type of object returned, must be `list`."""
diff --git a/src/openai/types/responses/response_output_item.py b/src/openai/types/responses/response_output_item.py
new file mode 100644
index 0000000000..f1e9693195
--- /dev/null
+++ b/src/openai/types/responses/response_output_item.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .response_output_message import ResponseOutputMessage
+from .response_reasoning_item import ResponseReasoningItem
+from .response_computer_tool_call import ResponseComputerToolCall
+from .response_function_tool_call import ResponseFunctionToolCall
+from .response_function_web_search import ResponseFunctionWebSearch
+from .response_file_search_tool_call import ResponseFileSearchToolCall
+
+__all__ = ["ResponseOutputItem"]
+
+ResponseOutputItem: TypeAlias = Annotated[
+    Union[
+        ResponseOutputMessage,
+        ResponseFileSearchToolCall,
+        ResponseFunctionToolCall,
+        ResponseFunctionWebSearch,
+        ResponseComputerToolCall,
+        ResponseReasoningItem,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_output_item_added_event.py b/src/openai/types/responses/response_output_item_added_event.py
new file mode 100644
index 0000000000..7344fb9a6c
--- /dev/null
+++ b/src/openai/types/responses/response_output_item_added_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_output_item import ResponseOutputItem
+
+__all__ = ["ResponseOutputItemAddedEvent"]
+
+
+class ResponseOutputItemAddedEvent(BaseModel):
+    item: ResponseOutputItem
+    """The output item that was added."""
+
+    output_index: int
+    """The index of the output item that was added."""
+
+    type: Literal["response.output_item.added"]
+    """The type of the event. Always `response.output_item.added`."""
diff --git a/src/openai/types/responses/response_output_item_done_event.py b/src/openai/types/responses/response_output_item_done_event.py
new file mode 100644
index 0000000000..a0a871a019
--- /dev/null
+++ b/src/openai/types/responses/response_output_item_done_event.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .response_output_item import ResponseOutputItem
+
+__all__ = ["ResponseOutputItemDoneEvent"]
+
+
+class ResponseOutputItemDoneEvent(BaseModel):
+    item: ResponseOutputItem
+    """The output item that was marked done."""
+
+    output_index: int
+    """The index of the output item that was marked done."""
+
+    type: Literal["response.output_item.done"]
+    """The type of the event. Always `response.output_item.done`."""
diff --git a/src/openai/types/responses/response_output_message.py b/src/openai/types/responses/response_output_message.py
new file mode 100644
index 0000000000..3864aa2111
--- /dev/null
+++ b/src/openai/types/responses/response_output_message.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+from .response_output_text import ResponseOutputText
+from .response_output_refusal import ResponseOutputRefusal
+
+__all__ = ["ResponseOutputMessage", "Content"]
+
+Content: TypeAlias = Annotated[Union[ResponseOutputText, ResponseOutputRefusal], PropertyInfo(discriminator="type")]
+
+
+class ResponseOutputMessage(BaseModel):
+    id: str
+    """The unique ID of the output message."""
+
+    content: List[Content]
+    """The content of the output message."""
+
+    role: Literal["assistant"]
+    """The role of the output message. Always `assistant`."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+    type: Literal["message"]
+    """The type of the output message. Always `message`."""
diff --git a/src/openai/types/responses/response_output_message_param.py b/src/openai/types/responses/response_output_message_param.py
new file mode 100644
index 0000000000..46cbbd20de
--- /dev/null
+++ b/src/openai/types/responses/response_output_message_param.py
@@ -0,0 +1,34 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .response_output_text_param import ResponseOutputTextParam
+from .response_output_refusal_param import ResponseOutputRefusalParam
+
+__all__ = ["ResponseOutputMessageParam", "Content"]
+
+Content: TypeAlias = Union[ResponseOutputTextParam, ResponseOutputRefusalParam]
+
+
+class ResponseOutputMessageParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique ID of the output message."""
+
+    content: Required[Iterable[Content]]
+    """The content of the output message."""
+
+    role: Required[Literal["assistant"]]
+    """The role of the output message. Always `assistant`."""
+
+    status: Required[Literal["in_progress", "completed", "incomplete"]]
+    """The status of the message input.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when input items
+    are returned via API.
+    """
+
+    type: Required[Literal["message"]]
+    """The type of the output message. Always `message`."""
diff --git a/src/openai/types/responses/response_output_refusal.py b/src/openai/types/responses/response_output_refusal.py
new file mode 100644
index 0000000000..eba581070d
--- /dev/null
+++ b/src/openai/types/responses/response_output_refusal.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseOutputRefusal"]
+
+
+class ResponseOutputRefusal(BaseModel):
+    refusal: str
+    """The refusal explanationfrom the model."""
+
+    type: Literal["refusal"]
+    """The type of the refusal. Always `refusal`."""
diff --git a/src/openai/types/responses/response_output_refusal_param.py b/src/openai/types/responses/response_output_refusal_param.py
new file mode 100644
index 0000000000..53140a6080
--- /dev/null
+++ b/src/openai/types/responses/response_output_refusal_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseOutputRefusalParam"]
+
+
+class ResponseOutputRefusalParam(TypedDict, total=False):
+    refusal: Required[str]
+    """The refusal explanationfrom the model."""
+
+    type: Required[Literal["refusal"]]
+    """The type of the refusal. Always `refusal`."""
diff --git a/src/openai/types/responses/response_output_text.py b/src/openai/types/responses/response_output_text.py
new file mode 100644
index 0000000000..fa653cd1af
--- /dev/null
+++ b/src/openai/types/responses/response_output_text.py
@@ -0,0 +1,64 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = ["ResponseOutputText", "Annotation", "AnnotationFileCitation", "AnnotationURLCitation", "AnnotationFilePath"]
+
+
+class AnnotationFileCitation(BaseModel):
+    file_id: str
+    """The ID of the file."""
+
+    index: int
+    """The index of the file in the list of files."""
+
+    type: Literal["file_citation"]
+    """The type of the file citation. Always `file_citation`."""
+
+
+class AnnotationURLCitation(BaseModel):
+    end_index: int
+    """The index of the last character of the URL citation in the message."""
+
+    start_index: int
+    """The index of the first character of the URL citation in the message."""
+
+    title: str
+    """The title of the web resource."""
+
+    type: Literal["url_citation"]
+    """The type of the URL citation. Always `url_citation`."""
+
+    url: str
+    """The URL of the web resource."""
+
+
+class AnnotationFilePath(BaseModel):
+    file_id: str
+    """The ID of the file."""
+
+    index: int
+    """The index of the file in the list of files."""
+
+    type: Literal["file_path"]
+    """The type of the file path. Always `file_path`."""
+
+
+Annotation: TypeAlias = Annotated[
+    Union[AnnotationFileCitation, AnnotationURLCitation, AnnotationFilePath], PropertyInfo(discriminator="type")
+]
+
+
+class ResponseOutputText(BaseModel):
+    annotations: List[Annotation]
+    """The annotations of the text output."""
+
+    text: str
+    """The text output from the model."""
+
+    type: Literal["output_text"]
+    """The type of the output text. Always `output_text`."""
diff --git a/src/openai/types/responses/response_output_text_param.py b/src/openai/types/responses/response_output_text_param.py
new file mode 100644
index 0000000000..1f0967285f
--- /dev/null
+++ b/src/openai/types/responses/response_output_text_param.py
@@ -0,0 +1,67 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+__all__ = [
+    "ResponseOutputTextParam",
+    "Annotation",
+    "AnnotationFileCitation",
+    "AnnotationURLCitation",
+    "AnnotationFilePath",
+]
+
+
+class AnnotationFileCitation(TypedDict, total=False):
+    file_id: Required[str]
+    """The ID of the file."""
+
+    index: Required[int]
+    """The index of the file in the list of files."""
+
+    type: Required[Literal["file_citation"]]
+    """The type of the file citation. Always `file_citation`."""
+
+
+class AnnotationURLCitation(TypedDict, total=False):
+    end_index: Required[int]
+    """The index of the last character of the URL citation in the message."""
+
+    start_index: Required[int]
+    """The index of the first character of the URL citation in the message."""
+
+    title: Required[str]
+    """The title of the web resource."""
+
+    type: Required[Literal["url_citation"]]
+    """The type of the URL citation. Always `url_citation`."""
+
+    url: Required[str]
+    """The URL of the web resource."""
+
+
+class AnnotationFilePath(TypedDict, total=False):
+    file_id: Required[str]
+    """The ID of the file."""
+
+    index: Required[int]
+    """The index of the file in the list of files."""
+
+    type: Required[Literal["file_path"]]
+    """The type of the file path. Always `file_path`."""
+
+
+Annotation: TypeAlias = Union[AnnotationFileCitation, AnnotationURLCitation, AnnotationFilePath]
+
+
+class ResponseOutputTextParam(TypedDict, total=False):
+    annotations: Required[Iterable[Annotation]]
+    """The annotations of the text output."""
+
+    text: Required[str]
+    """The text output from the model."""
+
+    type: Required[Literal["output_text"]]
+    """The type of the output text. Always `output_text`."""
diff --git a/src/openai/types/responses/response_reasoning_item.py b/src/openai/types/responses/response_reasoning_item.py
new file mode 100644
index 0000000000..57e5fbfe6d
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_item.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseReasoningItem", "Summary"]
+
+
+class Summary(BaseModel):
+    text: str
+    """
+    A short summary of the reasoning used by the model when generating the response.
+    """
+
+    type: Literal["summary_text"]
+    """The type of the object. Always `summary_text`."""
+
+
+class ResponseReasoningItem(BaseModel):
+    id: str
+    """The unique identifier of the reasoning content."""
+
+    summary: List[Summary]
+    """Reasoning text contents."""
+
+    type: Literal["reasoning"]
+    """The type of the object. Always `reasoning`."""
+
+    status: Optional[Literal["in_progress", "completed", "incomplete"]] = None
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_reasoning_item_param.py b/src/openai/types/responses/response_reasoning_item_param.py
new file mode 100644
index 0000000000..adb49d6402
--- /dev/null
+++ b/src/openai/types/responses/response_reasoning_item_param.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Iterable
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseReasoningItemParam", "Summary"]
+
+
+class Summary(TypedDict, total=False):
+    text: Required[str]
+    """
+    A short summary of the reasoning used by the model when generating the response.
+    """
+
+    type: Required[Literal["summary_text"]]
+    """The type of the object. Always `summary_text`."""
+
+
+class ResponseReasoningItemParam(TypedDict, total=False):
+    id: Required[str]
+    """The unique identifier of the reasoning content."""
+
+    summary: Required[Iterable[Summary]]
+    """Reasoning text contents."""
+
+    type: Required[Literal["reasoning"]]
+    """The type of the object. Always `reasoning`."""
+
+    status: Literal["in_progress", "completed", "incomplete"]
+    """The status of the item.
+
+    One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    returned via API.
+    """
diff --git a/src/openai/types/responses/response_refusal_delta_event.py b/src/openai/types/responses/response_refusal_delta_event.py
new file mode 100644
index 0000000000..04dcdf1c8c
--- /dev/null
+++ b/src/openai/types/responses/response_refusal_delta_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseRefusalDeltaEvent"]
+
+
+class ResponseRefusalDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part that the refusal text is added to."""
+
+    delta: str
+    """The refusal text that is added."""
+
+    item_id: str
+    """The ID of the output item that the refusal text is added to."""
+
+    output_index: int
+    """The index of the output item that the refusal text is added to."""
+
+    type: Literal["response.refusal.delta"]
+    """The type of the event. Always `response.refusal.delta`."""
diff --git a/src/openai/types/responses/response_refusal_done_event.py b/src/openai/types/responses/response_refusal_done_event.py
new file mode 100644
index 0000000000..a9b6f4b055
--- /dev/null
+++ b/src/openai/types/responses/response_refusal_done_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseRefusalDoneEvent"]
+
+
+class ResponseRefusalDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part that the refusal text is finalized."""
+
+    item_id: str
+    """The ID of the output item that the refusal text is finalized."""
+
+    output_index: int
+    """The index of the output item that the refusal text is finalized."""
+
+    refusal: str
+    """The refusal text that is finalized."""
+
+    type: Literal["response.refusal.done"]
+    """The type of the event. Always `response.refusal.done`."""
diff --git a/src/openai/types/responses/response_retrieve_params.py b/src/openai/types/responses/response_retrieve_params.py
new file mode 100644
index 0000000000..137bf4dcee
--- /dev/null
+++ b/src/openai/types/responses/response_retrieve_params.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import TypedDict
+
+from .response_includable import ResponseIncludable
+
+__all__ = ["ResponseRetrieveParams"]
+
+
+class ResponseRetrieveParams(TypedDict, total=False):
+    include: List[ResponseIncludable]
+    """Additional fields to include in the response.
+
+    See the `include` parameter for Response creation above for more information.
+    """
diff --git a/src/openai/types/responses/response_status.py b/src/openai/types/responses/response_status.py
new file mode 100644
index 0000000000..934d17cda3
--- /dev/null
+++ b/src/openai/types/responses/response_status.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ResponseStatus"]
+
+ResponseStatus: TypeAlias = Literal["completed", "failed", "in_progress", "incomplete"]
diff --git a/src/openai/types/responses/response_stream_event.py b/src/openai/types/responses/response_stream_event.py
new file mode 100644
index 0000000000..446863b175
--- /dev/null
+++ b/src/openai/types/responses/response_stream_event.py
@@ -0,0 +1,78 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .response_error_event import ResponseErrorEvent
+from .response_failed_event import ResponseFailedEvent
+from .response_created_event import ResponseCreatedEvent
+from .response_completed_event import ResponseCompletedEvent
+from .response_text_done_event import ResponseTextDoneEvent
+from .response_audio_done_event import ResponseAudioDoneEvent
+from .response_incomplete_event import ResponseIncompleteEvent
+from .response_text_delta_event import ResponseTextDeltaEvent
+from .response_audio_delta_event import ResponseAudioDeltaEvent
+from .response_in_progress_event import ResponseInProgressEvent
+from .response_refusal_done_event import ResponseRefusalDoneEvent
+from .response_refusal_delta_event import ResponseRefusalDeltaEvent
+from .response_output_item_done_event import ResponseOutputItemDoneEvent
+from .response_content_part_done_event import ResponseContentPartDoneEvent
+from .response_output_item_added_event import ResponseOutputItemAddedEvent
+from .response_content_part_added_event import ResponseContentPartAddedEvent
+from .response_audio_transcript_done_event import ResponseAudioTranscriptDoneEvent
+from .response_text_annotation_delta_event import ResponseTextAnnotationDeltaEvent
+from .response_audio_transcript_delta_event import ResponseAudioTranscriptDeltaEvent
+from .response_web_search_call_completed_event import ResponseWebSearchCallCompletedEvent
+from .response_web_search_call_searching_event import ResponseWebSearchCallSearchingEvent
+from .response_file_search_call_completed_event import ResponseFileSearchCallCompletedEvent
+from .response_file_search_call_searching_event import ResponseFileSearchCallSearchingEvent
+from .response_web_search_call_in_progress_event import ResponseWebSearchCallInProgressEvent
+from .response_file_search_call_in_progress_event import ResponseFileSearchCallInProgressEvent
+from .response_function_call_arguments_done_event import ResponseFunctionCallArgumentsDoneEvent
+from .response_function_call_arguments_delta_event import ResponseFunctionCallArgumentsDeltaEvent
+from .response_code_interpreter_call_code_done_event import ResponseCodeInterpreterCallCodeDoneEvent
+from .response_code_interpreter_call_completed_event import ResponseCodeInterpreterCallCompletedEvent
+from .response_code_interpreter_call_code_delta_event import ResponseCodeInterpreterCallCodeDeltaEvent
+from .response_code_interpreter_call_in_progress_event import ResponseCodeInterpreterCallInProgressEvent
+from .response_code_interpreter_call_interpreting_event import ResponseCodeInterpreterCallInterpretingEvent
+
+__all__ = ["ResponseStreamEvent"]
+
+ResponseStreamEvent: TypeAlias = Annotated[
+    Union[
+        ResponseAudioDeltaEvent,
+        ResponseAudioDoneEvent,
+        ResponseAudioTranscriptDeltaEvent,
+        ResponseAudioTranscriptDoneEvent,
+        ResponseCodeInterpreterCallCodeDeltaEvent,
+        ResponseCodeInterpreterCallCodeDoneEvent,
+        ResponseCodeInterpreterCallCompletedEvent,
+        ResponseCodeInterpreterCallInProgressEvent,
+        ResponseCodeInterpreterCallInterpretingEvent,
+        ResponseCompletedEvent,
+        ResponseContentPartAddedEvent,
+        ResponseContentPartDoneEvent,
+        ResponseCreatedEvent,
+        ResponseErrorEvent,
+        ResponseFileSearchCallCompletedEvent,
+        ResponseFileSearchCallInProgressEvent,
+        ResponseFileSearchCallSearchingEvent,
+        ResponseFunctionCallArgumentsDeltaEvent,
+        ResponseFunctionCallArgumentsDoneEvent,
+        ResponseInProgressEvent,
+        ResponseFailedEvent,
+        ResponseIncompleteEvent,
+        ResponseOutputItemAddedEvent,
+        ResponseOutputItemDoneEvent,
+        ResponseRefusalDeltaEvent,
+        ResponseRefusalDoneEvent,
+        ResponseTextAnnotationDeltaEvent,
+        ResponseTextDeltaEvent,
+        ResponseTextDoneEvent,
+        ResponseWebSearchCallCompletedEvent,
+        ResponseWebSearchCallInProgressEvent,
+        ResponseWebSearchCallSearchingEvent,
+    ],
+    PropertyInfo(discriminator="type"),
+]
diff --git a/src/openai/types/responses/response_text_annotation_delta_event.py b/src/openai/types/responses/response_text_annotation_delta_event.py
new file mode 100644
index 0000000000..4f2582282a
--- /dev/null
+++ b/src/openai/types/responses/response_text_annotation_delta_event.py
@@ -0,0 +1,79 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal, Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from ..._models import BaseModel
+
+__all__ = [
+    "ResponseTextAnnotationDeltaEvent",
+    "Annotation",
+    "AnnotationFileCitation",
+    "AnnotationURLCitation",
+    "AnnotationFilePath",
+]
+
+
+class AnnotationFileCitation(BaseModel):
+    file_id: str
+    """The ID of the file."""
+
+    index: int
+    """The index of the file in the list of files."""
+
+    type: Literal["file_citation"]
+    """The type of the file citation. Always `file_citation`."""
+
+
+class AnnotationURLCitation(BaseModel):
+    end_index: int
+    """The index of the last character of the URL citation in the message."""
+
+    start_index: int
+    """The index of the first character of the URL citation in the message."""
+
+    title: str
+    """The title of the web resource."""
+
+    type: Literal["url_citation"]
+    """The type of the URL citation. Always `url_citation`."""
+
+    url: str
+    """The URL of the web resource."""
+
+
+class AnnotationFilePath(BaseModel):
+    file_id: str
+    """The ID of the file."""
+
+    index: int
+    """The index of the file in the list of files."""
+
+    type: Literal["file_path"]
+    """The type of the file path. Always `file_path`."""
+
+
+Annotation: TypeAlias = Annotated[
+    Union[AnnotationFileCitation, AnnotationURLCitation, AnnotationFilePath], PropertyInfo(discriminator="type")
+]
+
+
+class ResponseTextAnnotationDeltaEvent(BaseModel):
+    annotation: Annotation
+    """A citation to a file."""
+
+    annotation_index: int
+    """The index of the annotation that was added."""
+
+    content_index: int
+    """The index of the content part that the text annotation was added to."""
+
+    item_id: str
+    """The ID of the output item that the text annotation was added to."""
+
+    output_index: int
+    """The index of the output item that the text annotation was added to."""
+
+    type: Literal["response.output_text.annotation.added"]
+    """The type of the event. Always `response.output_text.annotation.added`."""
diff --git a/src/openai/types/responses/response_text_config.py b/src/openai/types/responses/response_text_config.py
new file mode 100644
index 0000000000..a1894a9176
--- /dev/null
+++ b/src/openai/types/responses/response_text_config.py
@@ -0,0 +1,26 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+from .response_format_text_config import ResponseFormatTextConfig
+
+__all__ = ["ResponseTextConfig"]
+
+
+class ResponseTextConfig(BaseModel):
+    format: Optional[ResponseFormatTextConfig] = None
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
diff --git a/src/openai/types/responses/response_text_config_param.py b/src/openai/types/responses/response_text_config_param.py
new file mode 100644
index 0000000000..aec064bf89
--- /dev/null
+++ b/src/openai/types/responses/response_text_config_param.py
@@ -0,0 +1,27 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import TypedDict
+
+from .response_format_text_config_param import ResponseFormatTextConfigParam
+
+__all__ = ["ResponseTextConfigParam"]
+
+
+class ResponseTextConfigParam(TypedDict, total=False):
+    format: ResponseFormatTextConfigParam
+    """An object specifying the format that the model must output.
+
+    Configuring `{ "type": "json_schema" }` enables Structured Outputs, which
+    ensures the model will match your supplied JSON schema. Learn more in the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+
+    The default format is `{ "type": "text" }` with no additional options.
+
+    **Not recommended for gpt-4o and newer models:**
+
+    Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    ensures the message the model generates is valid JSON. Using `json_schema` is
+    preferred for models that support it.
+    """
diff --git a/src/openai/types/responses/response_text_delta_event.py b/src/openai/types/responses/response_text_delta_event.py
new file mode 100644
index 0000000000..751a5e2a19
--- /dev/null
+++ b/src/openai/types/responses/response_text_delta_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseTextDeltaEvent"]
+
+
+class ResponseTextDeltaEvent(BaseModel):
+    content_index: int
+    """The index of the content part that the text delta was added to."""
+
+    delta: str
+    """The text delta that was added."""
+
+    item_id: str
+    """The ID of the output item that the text delta was added to."""
+
+    output_index: int
+    """The index of the output item that the text delta was added to."""
+
+    type: Literal["response.output_text.delta"]
+    """The type of the event. Always `response.output_text.delta`."""
diff --git a/src/openai/types/responses/response_text_done_event.py b/src/openai/types/responses/response_text_done_event.py
new file mode 100644
index 0000000000..9b5c5e020c
--- /dev/null
+++ b/src/openai/types/responses/response_text_done_event.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseTextDoneEvent"]
+
+
+class ResponseTextDoneEvent(BaseModel):
+    content_index: int
+    """The index of the content part that the text content is finalized."""
+
+    item_id: str
+    """The ID of the output item that the text content is finalized."""
+
+    output_index: int
+    """The index of the output item that the text content is finalized."""
+
+    text: str
+    """The text content that is finalized."""
+
+    type: Literal["response.output_text.done"]
+    """The type of the event. Always `response.output_text.done`."""
diff --git a/src/openai/types/responses/response_usage.py b/src/openai/types/responses/response_usage.py
new file mode 100644
index 0000000000..9ad36bd326
--- /dev/null
+++ b/src/openai/types/responses/response_usage.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseUsage", "InputTokensDetails", "OutputTokensDetails"]
+
+
+class InputTokensDetails(BaseModel):
+    cached_tokens: int
+    """The number of tokens that were retrieved from the cache.
+
+    [More on prompt caching](https://platform.openai.com/docs/guides/prompt-caching).
+    """
+
+
+class OutputTokensDetails(BaseModel):
+    reasoning_tokens: int
+    """The number of reasoning tokens."""
+
+
+class ResponseUsage(BaseModel):
+    input_tokens: int
+    """The number of input tokens."""
+
+    input_tokens_details: InputTokensDetails
+    """A detailed breakdown of the input tokens."""
+
+    output_tokens: int
+    """The number of output tokens."""
+
+    output_tokens_details: OutputTokensDetails
+    """A detailed breakdown of the output tokens."""
+
+    total_tokens: int
+    """The total number of tokens used."""
diff --git a/src/openai/types/responses/response_web_search_call_completed_event.py b/src/openai/types/responses/response_web_search_call_completed_event.py
new file mode 100644
index 0000000000..76f26766a1
--- /dev/null
+++ b/src/openai/types/responses/response_web_search_call_completed_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseWebSearchCallCompletedEvent"]
+
+
+class ResponseWebSearchCallCompletedEvent(BaseModel):
+    item_id: str
+    """Unique ID for the output item associated with the web search call."""
+
+    output_index: int
+    """The index of the output item that the web search call is associated with."""
+
+    type: Literal["response.web_search_call.completed"]
+    """The type of the event. Always `response.web_search_call.completed`."""
diff --git a/src/openai/types/responses/response_web_search_call_in_progress_event.py b/src/openai/types/responses/response_web_search_call_in_progress_event.py
new file mode 100644
index 0000000000..681ce6d94b
--- /dev/null
+++ b/src/openai/types/responses/response_web_search_call_in_progress_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseWebSearchCallInProgressEvent"]
+
+
+class ResponseWebSearchCallInProgressEvent(BaseModel):
+    item_id: str
+    """Unique ID for the output item associated with the web search call."""
+
+    output_index: int
+    """The index of the output item that the web search call is associated with."""
+
+    type: Literal["response.web_search_call.in_progress"]
+    """The type of the event. Always `response.web_search_call.in_progress`."""
diff --git a/src/openai/types/responses/response_web_search_call_searching_event.py b/src/openai/types/responses/response_web_search_call_searching_event.py
new file mode 100644
index 0000000000..c885d98918
--- /dev/null
+++ b/src/openai/types/responses/response_web_search_call_searching_event.py
@@ -0,0 +1,18 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseWebSearchCallSearchingEvent"]
+
+
+class ResponseWebSearchCallSearchingEvent(BaseModel):
+    item_id: str
+    """Unique ID for the output item associated with the web search call."""
+
+    output_index: int
+    """The index of the output item that the web search call is associated with."""
+
+    type: Literal["response.web_search_call.searching"]
+    """The type of the event. Always `response.web_search_call.searching`."""
diff --git a/src/openai/types/responses/tool.py b/src/openai/types/responses/tool.py
new file mode 100644
index 0000000000..de5d5524d4
--- /dev/null
+++ b/src/openai/types/responses/tool.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Annotated, TypeAlias
+
+from ..._utils import PropertyInfo
+from .computer_tool import ComputerTool
+from .function_tool import FunctionTool
+from .web_search_tool import WebSearchTool
+from .file_search_tool import FileSearchTool
+
+__all__ = ["Tool"]
+
+Tool: TypeAlias = Annotated[
+    Union[FileSearchTool, FunctionTool, ComputerTool, WebSearchTool], PropertyInfo(discriminator="type")
+]
diff --git a/src/openai/types/responses/tool_choice_function.py b/src/openai/types/responses/tool_choice_function.py
new file mode 100644
index 0000000000..8d2a4f2822
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_function.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceFunction"]
+
+
+class ToolChoiceFunction(BaseModel):
+    name: str
+    """The name of the function to call."""
+
+    type: Literal["function"]
+    """For function calling, the type is always `function`."""
diff --git a/src/openai/types/responses/tool_choice_function_param.py b/src/openai/types/responses/tool_choice_function_param.py
new file mode 100644
index 0000000000..910537fd97
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_function_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceFunctionParam"]
+
+
+class ToolChoiceFunctionParam(TypedDict, total=False):
+    name: Required[str]
+    """The name of the function to call."""
+
+    type: Required[Literal["function"]]
+    """For function calling, the type is always `function`."""
diff --git a/src/openai/types/responses/tool_choice_options.py b/src/openai/types/responses/tool_choice_options.py
new file mode 100644
index 0000000000..c200db54e1
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_options.py
@@ -0,0 +1,7 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ToolChoiceOptions"]
+
+ToolChoiceOptions: TypeAlias = Literal["none", "auto", "required"]
diff --git a/src/openai/types/responses/tool_choice_types.py b/src/openai/types/responses/tool_choice_types.py
new file mode 100644
index 0000000000..4942808f14
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_types.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ToolChoiceTypes"]
+
+
+class ToolChoiceTypes(BaseModel):
+    type: Literal["file_search", "web_search_preview", "computer_use_preview", "web_search_preview_2025_03_11"]
+    """The type of hosted tool the model should to use.
+
+    Learn more about
+    [built-in tools](https://platform.openai.com/docs/guides/tools).
+
+    Allowed values are:
+
+    - `file_search`
+    - `web_search_preview`
+    - `computer_use_preview`
+    """
diff --git a/src/openai/types/responses/tool_choice_types_param.py b/src/openai/types/responses/tool_choice_types_param.py
new file mode 100644
index 0000000000..b14f2a9eb0
--- /dev/null
+++ b/src/openai/types/responses/tool_choice_types_param.py
@@ -0,0 +1,24 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ToolChoiceTypesParam"]
+
+
+class ToolChoiceTypesParam(TypedDict, total=False):
+    type: Required[
+        Literal["file_search", "web_search_preview", "computer_use_preview", "web_search_preview_2025_03_11"]
+    ]
+    """The type of hosted tool the model should to use.
+
+    Learn more about
+    [built-in tools](https://platform.openai.com/docs/guides/tools).
+
+    Allowed values are:
+
+    - `file_search`
+    - `web_search_preview`
+    - `computer_use_preview`
+    """
diff --git a/src/openai/types/responses/tool_param.py b/src/openai/types/responses/tool_param.py
new file mode 100644
index 0000000000..8bb089c5f1
--- /dev/null
+++ b/src/openai/types/responses/tool_param.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import TypeAlias
+
+from .computer_tool_param import ComputerToolParam
+from .function_tool_param import FunctionToolParam
+from .web_search_tool_param import WebSearchToolParam
+from .file_search_tool_param import FileSearchToolParam
+
+__all__ = ["ToolParam"]
+
+ToolParam: TypeAlias = Union[FileSearchToolParam, FunctionToolParam, ComputerToolParam, WebSearchToolParam]
diff --git a/src/openai/types/responses/web_search_tool.py b/src/openai/types/responses/web_search_tool.py
new file mode 100644
index 0000000000..bee270bf85
--- /dev/null
+++ b/src/openai/types/responses/web_search_tool.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["WebSearchTool", "UserLocation"]
+
+
+class UserLocation(BaseModel):
+    type: Literal["approximate"]
+    """The type of location approximation. Always `approximate`."""
+
+    city: Optional[str] = None
+    """Free text input for the city of the user, e.g. `San Francisco`."""
+
+    country: Optional[str] = None
+    """
+    The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+    the user, e.g. `US`.
+    """
+
+    region: Optional[str] = None
+    """Free text input for the region of the user, e.g. `California`."""
+
+    timezone: Optional[str] = None
+    """
+    The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+    user, e.g. `America/Los_Angeles`.
+    """
+
+
+class WebSearchTool(BaseModel):
+    type: Literal["web_search_preview", "web_search_preview_2025_03_11"]
+    """The type of the web search tool. One of:
+
+    - `web_search_preview`
+    - `web_search_preview_2025_03_11`
+    """
+
+    search_context_size: Optional[Literal["low", "medium", "high"]] = None
+    """
+    High level guidance for the amount of context window space to use for the
+    search. One of `low`, `medium`, or `high`. `medium` is the default.
+    """
+
+    user_location: Optional[UserLocation] = None
diff --git a/src/openai/types/responses/web_search_tool_param.py b/src/openai/types/responses/web_search_tool_param.py
new file mode 100644
index 0000000000..8ee36ffb47
--- /dev/null
+++ b/src/openai/types/responses/web_search_tool_param.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["WebSearchToolParam", "UserLocation"]
+
+
+class UserLocation(TypedDict, total=False):
+    type: Required[Literal["approximate"]]
+    """The type of location approximation. Always `approximate`."""
+
+    city: str
+    """Free text input for the city of the user, e.g. `San Francisco`."""
+
+    country: str
+    """
+    The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of
+    the user, e.g. `US`.
+    """
+
+    region: str
+    """Free text input for the region of the user, e.g. `California`."""
+
+    timezone: str
+    """
+    The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the
+    user, e.g. `America/Los_Angeles`.
+    """
+
+
+class WebSearchToolParam(TypedDict, total=False):
+    type: Required[Literal["web_search_preview", "web_search_preview_2025_03_11"]]
+    """The type of the web search tool. One of:
+
+    - `web_search_preview`
+    - `web_search_preview_2025_03_11`
+    """
+
+    search_context_size: Literal["low", "medium", "high"]
+    """
+    High level guidance for the amount of context window space to use for the
+    search. One of `low`, `medium`, or `high`. `medium` is the default.
+    """
+
+    user_location: Optional[UserLocation]
diff --git a/src/openai/types/shared/__init__.py b/src/openai/types/shared/__init__.py
index e085744e29..6ccc2313cc 100644
--- a/src/openai/types/shared/__init__.py
+++ b/src/openai/types/shared/__init__.py
@@ -1,5 +1,14 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .metadata import Metadata as Metadata
+from .reasoning import Reasoning as Reasoning
+from .chat_model import ChatModel as ChatModel
 from .error_object import ErrorObject as ErrorObject
+from .compound_filter import CompoundFilter as CompoundFilter
+from .reasoning_effort import ReasoningEffort as ReasoningEffort
+from .comparison_filter import ComparisonFilter as ComparisonFilter
 from .function_definition import FunctionDefinition as FunctionDefinition
 from .function_parameters import FunctionParameters as FunctionParameters
+from .response_format_text import ResponseFormatText as ResponseFormatText
+from .response_format_json_object import ResponseFormatJSONObject as ResponseFormatJSONObject
+from .response_format_json_schema import ResponseFormatJSONSchema as ResponseFormatJSONSchema
diff --git a/src/openai/types/shared/chat_model.py b/src/openai/types/shared/chat_model.py
new file mode 100644
index 0000000000..31d7104e6e
--- /dev/null
+++ b/src/openai/types/shared/chat_model.py
@@ -0,0 +1,52 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatModel"]
+
+ChatModel: TypeAlias = Literal[
+    "o3-mini",
+    "o3-mini-2025-01-31",
+    "o1",
+    "o1-2024-12-17",
+    "o1-preview",
+    "o1-preview-2024-09-12",
+    "o1-mini",
+    "o1-mini-2024-09-12",
+    "computer-use-preview",
+    "computer-use-preview-2025-02-04",
+    "computer-use-preview-2025-03-11",
+    "gpt-4.5-preview",
+    "gpt-4.5-preview-2025-02-27",
+    "gpt-4o",
+    "gpt-4o-2024-11-20",
+    "gpt-4o-2024-08-06",
+    "gpt-4o-2024-05-13",
+    "gpt-4o-audio-preview",
+    "gpt-4o-audio-preview-2024-10-01",
+    "gpt-4o-audio-preview-2024-12-17",
+    "gpt-4o-mini-audio-preview",
+    "gpt-4o-mini-audio-preview-2024-12-17",
+    "chatgpt-4o-latest",
+    "gpt-4o-mini",
+    "gpt-4o-mini-2024-07-18",
+    "gpt-4-turbo",
+    "gpt-4-turbo-2024-04-09",
+    "gpt-4-0125-preview",
+    "gpt-4-turbo-preview",
+    "gpt-4-1106-preview",
+    "gpt-4-vision-preview",
+    "gpt-4",
+    "gpt-4-0314",
+    "gpt-4-0613",
+    "gpt-4-32k",
+    "gpt-4-32k-0314",
+    "gpt-4-32k-0613",
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-1106",
+    "gpt-3.5-turbo-0125",
+    "gpt-3.5-turbo-16k-0613",
+]
diff --git a/src/openai/types/shared/comparison_filter.py b/src/openai/types/shared/comparison_filter.py
new file mode 100644
index 0000000000..2ec2651ff2
--- /dev/null
+++ b/src/openai/types/shared/comparison_filter.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Union
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ComparisonFilter"]
+
+
+class ComparisonFilter(BaseModel):
+    key: str
+    """The key to compare against the value."""
+
+    type: Literal["eq", "ne", "gt", "gte", "lt", "lte"]
+    """Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
+
+    - `eq`: equals
+    - `ne`: not equal
+    - `gt`: greater than
+    - `gte`: greater than or equal
+    - `lt`: less than
+    - `lte`: less than or equal
+    """
+
+    value: Union[str, float, bool]
+    """
+    The value to compare against the attribute key; supports string, number, or
+    boolean types.
+    """
diff --git a/src/openai/types/shared/compound_filter.py b/src/openai/types/shared/compound_filter.py
new file mode 100644
index 0000000000..3aefa43647
--- /dev/null
+++ b/src/openai/types/shared/compound_filter.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import List, Union
+from typing_extensions import Literal, TypeAlias
+
+from ..._models import BaseModel
+from .comparison_filter import ComparisonFilter
+
+__all__ = ["CompoundFilter", "Filter"]
+
+Filter: TypeAlias = Union[ComparisonFilter, object]
+
+
+class CompoundFilter(BaseModel):
+    filters: List[Filter]
+    """Array of filters to combine.
+
+    Items can be `ComparisonFilter` or `CompoundFilter`.
+    """
+
+    type: Literal["and", "or"]
+    """Type of operation: `and` or `or`."""
diff --git a/src/openai/types/shared/function_definition.py b/src/openai/types/shared/function_definition.py
index a39116d6bd..06baa23170 100644
--- a/src/openai/types/shared/function_definition.py
+++ b/src/openai/types/shared/function_definition.py
@@ -25,11 +25,19 @@ class FunctionDefinition(BaseModel):
     parameters: Optional[FunctionParameters] = None
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the
-    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
-    for examples, and the
+    See the [guide](https://platform.openai.com/docs/guides/function-calling) for
+    examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
     Omitting `parameters` defines a function with an empty parameter list.
     """
+
+    strict: Optional[bool] = None
+    """Whether to enable strict schema adherence when generating the function call.
+
+    If set to true, the model will follow the exact schema defined in the
+    `parameters` field. Only a subset of JSON Schema is supported when `strict` is
+    `true`. Learn more about Structured Outputs in the
+    [function calling guide](docs/guides/function-calling).
+    """
diff --git a/src/openai/types/shared/function_parameters.py b/src/openai/types/shared/function_parameters.py
index c9524e4cb8..a3d83e3496 100644
--- a/src/openai/types/shared/function_parameters.py
+++ b/src/openai/types/shared/function_parameters.py
@@ -1,7 +1,8 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 from typing import Dict
+from typing_extensions import TypeAlias
 
 __all__ = ["FunctionParameters"]
 
-FunctionParameters = Dict[str, object]
+FunctionParameters: TypeAlias = Dict[str, object]
diff --git a/src/openai/types/shared/metadata.py b/src/openai/types/shared/metadata.py
new file mode 100644
index 0000000000..0da88c679c
--- /dev/null
+++ b/src/openai/types/shared/metadata.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict
+from typing_extensions import TypeAlias
+
+__all__ = ["Metadata"]
+
+Metadata: TypeAlias = Dict[str, str]
diff --git a/src/openai/types/shared/reasoning.py b/src/openai/types/shared/reasoning.py
new file mode 100644
index 0000000000..78a396d738
--- /dev/null
+++ b/src/openai/types/shared/reasoning.py
@@ -0,0 +1,28 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+from .reasoning_effort import ReasoningEffort
+
+__all__ = ["Reasoning"]
+
+
+class Reasoning(BaseModel):
+    effort: Optional[ReasoningEffort] = None
+    """**o-series models only**
+
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
+    """
+
+    generate_summary: Optional[Literal["concise", "detailed"]] = None
+    """**computer_use_preview only**
+
+    A summary of the reasoning performed by the model. This can be useful for
+    debugging and understanding the model's reasoning process. One of `concise` or
+    `detailed`.
+    """
diff --git a/src/openai/types/shared/reasoning_effort.py b/src/openai/types/shared/reasoning_effort.py
new file mode 100644
index 0000000000..ace21b67e4
--- /dev/null
+++ b/src/openai/types/shared/reasoning_effort.py
@@ -0,0 +1,8 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ReasoningEffort"]
+
+ReasoningEffort: TypeAlias = Optional[Literal["low", "medium", "high"]]
diff --git a/src/openai/types/shared/response_format_json_object.py b/src/openai/types/shared/response_format_json_object.py
new file mode 100644
index 0000000000..2aaa5dbdfe
--- /dev/null
+++ b/src/openai/types/shared/response_format_json_object.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatJSONObject"]
+
+
+class ResponseFormatJSONObject(BaseModel):
+    type: Literal["json_object"]
+    """The type of response format being defined. Always `json_object`."""
diff --git a/src/openai/types/shared/response_format_json_schema.py b/src/openai/types/shared/response_format_json_schema.py
new file mode 100644
index 0000000000..c7924446f4
--- /dev/null
+++ b/src/openai/types/shared/response_format_json_schema.py
@@ -0,0 +1,48 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, Optional
+from typing_extensions import Literal
+
+from pydantic import Field as FieldInfo
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatJSONSchema", "JSONSchema"]
+
+
+class JSONSchema(BaseModel):
+    name: str
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: Optional[str] = None
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    schema_: Optional[Dict[str, object]] = FieldInfo(alias="schema", default=None)
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    strict: Optional[bool] = None
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
+
+
+class ResponseFormatJSONSchema(BaseModel):
+    json_schema: JSONSchema
+    """Structured Outputs configuration options, including a JSON Schema."""
+
+    type: Literal["json_schema"]
+    """The type of response format being defined. Always `json_schema`."""
diff --git a/src/openai/types/shared/response_format_text.py b/src/openai/types/shared/response_format_text.py
new file mode 100644
index 0000000000..f0c8cfb700
--- /dev/null
+++ b/src/openai/types/shared/response_format_text.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["ResponseFormatText"]
+
+
+class ResponseFormatText(BaseModel):
+    type: Literal["text"]
+    """The type of response format being defined. Always `text`."""
diff --git a/src/openai/types/shared_params/__init__.py b/src/openai/types/shared_params/__init__.py
index ef638cb279..4a4a8cdf1e 100644
--- a/src/openai/types/shared_params/__init__.py
+++ b/src/openai/types/shared_params/__init__.py
@@ -1,4 +1,13 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
+from .metadata import Metadata as Metadata
+from .reasoning import Reasoning as Reasoning
+from .chat_model import ChatModel as ChatModel
+from .compound_filter import CompoundFilter as CompoundFilter
+from .reasoning_effort import ReasoningEffort as ReasoningEffort
+from .comparison_filter import ComparisonFilter as ComparisonFilter
 from .function_definition import FunctionDefinition as FunctionDefinition
 from .function_parameters import FunctionParameters as FunctionParameters
+from .response_format_text import ResponseFormatText as ResponseFormatText
+from .response_format_json_object import ResponseFormatJSONObject as ResponseFormatJSONObject
+from .response_format_json_schema import ResponseFormatJSONSchema as ResponseFormatJSONSchema
diff --git a/src/openai/types/shared_params/chat_model.py b/src/openai/types/shared_params/chat_model.py
new file mode 100644
index 0000000000..55649876eb
--- /dev/null
+++ b/src/openai/types/shared_params/chat_model.py
@@ -0,0 +1,54 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ChatModel"]
+
+ChatModel: TypeAlias = Literal[
+    "o3-mini",
+    "o3-mini-2025-01-31",
+    "o1",
+    "o1-2024-12-17",
+    "o1-preview",
+    "o1-preview-2024-09-12",
+    "o1-mini",
+    "o1-mini-2024-09-12",
+    "computer-use-preview",
+    "computer-use-preview-2025-02-04",
+    "computer-use-preview-2025-03-11",
+    "gpt-4.5-preview",
+    "gpt-4.5-preview-2025-02-27",
+    "gpt-4o",
+    "gpt-4o-2024-11-20",
+    "gpt-4o-2024-08-06",
+    "gpt-4o-2024-05-13",
+    "gpt-4o-audio-preview",
+    "gpt-4o-audio-preview-2024-10-01",
+    "gpt-4o-audio-preview-2024-12-17",
+    "gpt-4o-mini-audio-preview",
+    "gpt-4o-mini-audio-preview-2024-12-17",
+    "chatgpt-4o-latest",
+    "gpt-4o-mini",
+    "gpt-4o-mini-2024-07-18",
+    "gpt-4-turbo",
+    "gpt-4-turbo-2024-04-09",
+    "gpt-4-0125-preview",
+    "gpt-4-turbo-preview",
+    "gpt-4-1106-preview",
+    "gpt-4-vision-preview",
+    "gpt-4",
+    "gpt-4-0314",
+    "gpt-4-0613",
+    "gpt-4-32k",
+    "gpt-4-32k-0314",
+    "gpt-4-32k-0613",
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-16k",
+    "gpt-3.5-turbo-0301",
+    "gpt-3.5-turbo-0613",
+    "gpt-3.5-turbo-1106",
+    "gpt-3.5-turbo-0125",
+    "gpt-3.5-turbo-16k-0613",
+]
diff --git a/src/openai/types/shared_params/comparison_filter.py b/src/openai/types/shared_params/comparison_filter.py
new file mode 100644
index 0000000000..38edd315ed
--- /dev/null
+++ b/src/openai/types/shared_params/comparison_filter.py
@@ -0,0 +1,30 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ComparisonFilter"]
+
+
+class ComparisonFilter(TypedDict, total=False):
+    key: Required[str]
+    """The key to compare against the value."""
+
+    type: Required[Literal["eq", "ne", "gt", "gte", "lt", "lte"]]
+    """Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
+
+    - `eq`: equals
+    - `ne`: not equal
+    - `gt`: greater than
+    - `gte`: greater than or equal
+    - `lt`: less than
+    - `lte`: less than or equal
+    """
+
+    value: Required[Union[str, float, bool]]
+    """
+    The value to compare against the attribute key; supports string, number, or
+    boolean types.
+    """
diff --git a/src/openai/types/shared_params/compound_filter.py b/src/openai/types/shared_params/compound_filter.py
new file mode 100644
index 0000000000..d12e9b1bda
--- /dev/null
+++ b/src/openai/types/shared_params/compound_filter.py
@@ -0,0 +1,23 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Union, Iterable
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .comparison_filter import ComparisonFilter
+
+__all__ = ["CompoundFilter", "Filter"]
+
+Filter: TypeAlias = Union[ComparisonFilter, object]
+
+
+class CompoundFilter(TypedDict, total=False):
+    filters: Required[Iterable[Filter]]
+    """Array of filters to combine.
+
+    Items can be `ComparisonFilter` or `CompoundFilter`.
+    """
+
+    type: Required[Literal["and", "or"]]
+    """Type of operation: `and` or `or`."""
diff --git a/src/openai/types/shared_params/function_definition.py b/src/openai/types/shared_params/function_definition.py
index 58d0203b4f..d45ec13f1e 100644
--- a/src/openai/types/shared_params/function_definition.py
+++ b/src/openai/types/shared_params/function_definition.py
@@ -2,9 +2,10 @@
 
 from __future__ import annotations
 
+from typing import Optional
 from typing_extensions import Required, TypedDict
 
-from ...types import shared_params
+from .function_parameters import FunctionParameters
 
 __all__ = ["FunctionDefinition"]
 
@@ -23,14 +24,22 @@ class FunctionDefinition(TypedDict, total=False):
     how to call the function.
     """
 
-    parameters: shared_params.FunctionParameters
+    parameters: FunctionParameters
     """The parameters the functions accepts, described as a JSON Schema object.
 
-    See the
-    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
-    for examples, and the
+    See the [guide](https://platform.openai.com/docs/guides/function-calling) for
+    examples, and the
     [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
     documentation about the format.
 
     Omitting `parameters` defines a function with an empty parameter list.
     """
+
+    strict: Optional[bool]
+    """Whether to enable strict schema adherence when generating the function call.
+
+    If set to true, the model will follow the exact schema defined in the
+    `parameters` field. Only a subset of JSON Schema is supported when `strict` is
+    `true`. Learn more about Structured Outputs in the
+    [function calling guide](docs/guides/function-calling).
+    """
diff --git a/src/openai/types/shared_params/function_parameters.py b/src/openai/types/shared_params/function_parameters.py
index 5b40efb78f..45fc742d3b 100644
--- a/src/openai/types/shared_params/function_parameters.py
+++ b/src/openai/types/shared_params/function_parameters.py
@@ -3,7 +3,8 @@
 from __future__ import annotations
 
 from typing import Dict
+from typing_extensions import TypeAlias
 
 __all__ = ["FunctionParameters"]
 
-FunctionParameters = Dict[str, object]
+FunctionParameters: TypeAlias = Dict[str, object]
diff --git a/src/openai/types/shared_params/metadata.py b/src/openai/types/shared_params/metadata.py
new file mode 100644
index 0000000000..821650b48b
--- /dev/null
+++ b/src/openai/types/shared_params/metadata.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict
+from typing_extensions import TypeAlias
+
+__all__ = ["Metadata"]
+
+Metadata: TypeAlias = Dict[str, str]
diff --git a/src/openai/types/shared_params/reasoning.py b/src/openai/types/shared_params/reasoning.py
new file mode 100644
index 0000000000..2953b895c4
--- /dev/null
+++ b/src/openai/types/shared_params/reasoning.py
@@ -0,0 +1,29 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, TypedDict
+
+from ..shared.reasoning_effort import ReasoningEffort
+
+__all__ = ["Reasoning"]
+
+
+class Reasoning(TypedDict, total=False):
+    effort: Optional[ReasoningEffort]
+    """**o-series models only**
+
+    Constrains effort on reasoning for
+    [reasoning models](https://platform.openai.com/docs/guides/reasoning). Currently
+    supported values are `low`, `medium`, and `high`. Reducing reasoning effort can
+    result in faster responses and fewer tokens used on reasoning in a response.
+    """
+
+    generate_summary: Optional[Literal["concise", "detailed"]]
+    """**computer_use_preview only**
+
+    A summary of the reasoning performed by the model. This can be useful for
+    debugging and understanding the model's reasoning process. One of `concise` or
+    `detailed`.
+    """
diff --git a/src/openai/types/shared_params/reasoning_effort.py b/src/openai/types/shared_params/reasoning_effort.py
new file mode 100644
index 0000000000..6052c5ae15
--- /dev/null
+++ b/src/openai/types/shared_params/reasoning_effort.py
@@ -0,0 +1,10 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Optional
+from typing_extensions import Literal, TypeAlias
+
+__all__ = ["ReasoningEffort"]
+
+ReasoningEffort: TypeAlias = Optional[Literal["low", "medium", "high"]]
diff --git a/src/openai/types/shared_params/response_format_json_object.py b/src/openai/types/shared_params/response_format_json_object.py
new file mode 100644
index 0000000000..d4d1deaae5
--- /dev/null
+++ b/src/openai/types/shared_params/response_format_json_object.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatJSONObject"]
+
+
+class ResponseFormatJSONObject(TypedDict, total=False):
+    type: Required[Literal["json_object"]]
+    """The type of response format being defined. Always `json_object`."""
diff --git a/src/openai/types/shared_params/response_format_json_schema.py b/src/openai/types/shared_params/response_format_json_schema.py
new file mode 100644
index 0000000000..5b0a13ee06
--- /dev/null
+++ b/src/openai/types/shared_params/response_format_json_schema.py
@@ -0,0 +1,46 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatJSONSchema", "JSONSchema"]
+
+
+class JSONSchema(TypedDict, total=False):
+    name: Required[str]
+    """The name of the response format.
+
+    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
+    of 64.
+    """
+
+    description: str
+    """
+    A description of what the response format is for, used by the model to determine
+    how to respond in the format.
+    """
+
+    schema: Dict[str, object]
+    """
+    The schema for the response format, described as a JSON Schema object. Learn how
+    to build JSON schemas [here](https://json-schema.org/).
+    """
+
+    strict: Optional[bool]
+    """
+    Whether to enable strict schema adherence when generating the output. If set to
+    true, the model will always follow the exact schema defined in the `schema`
+    field. Only a subset of JSON Schema is supported when `strict` is `true`. To
+    learn more, read the
+    [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    """
+
+
+class ResponseFormatJSONSchema(TypedDict, total=False):
+    json_schema: Required[JSONSchema]
+    """Structured Outputs configuration options, including a JSON Schema."""
+
+    type: Required[Literal["json_schema"]]
+    """The type of response format being defined. Always `json_schema`."""
diff --git a/src/openai/types/shared_params/response_format_text.py b/src/openai/types/shared_params/response_format_text.py
new file mode 100644
index 0000000000..c3ef2b0816
--- /dev/null
+++ b/src/openai/types/shared_params/response_format_text.py
@@ -0,0 +1,12 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = ["ResponseFormatText"]
+
+
+class ResponseFormatText(TypedDict, total=False):
+    type: Required[Literal["text"]]
+    """The type of response format being defined. Always `text`."""
diff --git a/src/openai/types/static_file_chunking_strategy.py b/src/openai/types/static_file_chunking_strategy.py
new file mode 100644
index 0000000000..2813bc6630
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+
+from .._models import BaseModel
+
+__all__ = ["StaticFileChunkingStrategy"]
+
+
+class StaticFileChunkingStrategy(BaseModel):
+    chunk_overlap_tokens: int
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: int
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
diff --git a/src/openai/types/static_file_chunking_strategy_object.py b/src/openai/types/static_file_chunking_strategy_object.py
new file mode 100644
index 0000000000..2a95dce5b3
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy_object.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .static_file_chunking_strategy import StaticFileChunkingStrategy
+
+__all__ = ["StaticFileChunkingStrategyObject"]
+
+
+class StaticFileChunkingStrategyObject(BaseModel):
+    static: StaticFileChunkingStrategy
+
+    type: Literal["static"]
+    """Always `static`."""
diff --git a/src/openai/types/static_file_chunking_strategy_object_param.py b/src/openai/types/static_file_chunking_strategy_object_param.py
new file mode 100644
index 0000000000..0cdf35c0df
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy_object_param.py
@@ -0,0 +1,16 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Literal, Required, TypedDict
+
+from .static_file_chunking_strategy_param import StaticFileChunkingStrategyParam
+
+__all__ = ["StaticFileChunkingStrategyObjectParam"]
+
+
+class StaticFileChunkingStrategyObjectParam(TypedDict, total=False):
+    static: Required[StaticFileChunkingStrategyParam]
+
+    type: Required[Literal["static"]]
+    """Always `static`."""
diff --git a/src/openai/types/static_file_chunking_strategy_param.py b/src/openai/types/static_file_chunking_strategy_param.py
new file mode 100644
index 0000000000..f917ac5647
--- /dev/null
+++ b/src/openai/types/static_file_chunking_strategy_param.py
@@ -0,0 +1,22 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+__all__ = ["StaticFileChunkingStrategyParam"]
+
+
+class StaticFileChunkingStrategyParam(TypedDict, total=False):
+    chunk_overlap_tokens: Required[int]
+    """The number of tokens that overlap between chunks. The default value is `400`.
+
+    Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    """
+
+    max_chunk_size_tokens: Required[int]
+    """The maximum number of tokens in each chunk.
+
+    The default value is `800`. The minimum value is `100` and the maximum value is
+    `4096`.
+    """
diff --git a/src/openai/types/upload.py b/src/openai/types/upload.py
new file mode 100644
index 0000000000..914b69a863
--- /dev/null
+++ b/src/openai/types/upload.py
@@ -0,0 +1,42 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+from .file_object import FileObject
+
+__all__ = ["Upload"]
+
+
+class Upload(BaseModel):
+    id: str
+    """The Upload unique identifier, which can be referenced in API endpoints."""
+
+    bytes: int
+    """The intended number of bytes to be uploaded."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the Upload was created."""
+
+    expires_at: int
+    """The Unix timestamp (in seconds) for when the Upload will expire."""
+
+    filename: str
+    """The name of the file to be uploaded."""
+
+    object: Literal["upload"]
+    """The object type, which is always "upload"."""
+
+    purpose: str
+    """The intended purpose of the file.
+
+    [Please refer here](https://platform.openai.com/docs/api-reference/files/object#files/object-purpose)
+    for acceptable values.
+    """
+
+    status: Literal["pending", "completed", "cancelled", "expired"]
+    """The status of the Upload."""
+
+    file: Optional[FileObject] = None
+    """The `File` object represents a document that has been uploaded to OpenAI."""
diff --git a/src/openai/types/upload_complete_params.py b/src/openai/types/upload_complete_params.py
new file mode 100644
index 0000000000..cce568d5c6
--- /dev/null
+++ b/src/openai/types/upload_complete_params.py
@@ -0,0 +1,19 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List
+from typing_extensions import Required, TypedDict
+
+__all__ = ["UploadCompleteParams"]
+
+
+class UploadCompleteParams(TypedDict, total=False):
+    part_ids: Required[List[str]]
+    """The ordered list of Part IDs."""
+
+    md5: str
+    """
+    The optional md5 checksum for the file contents to verify if the bytes uploaded
+    matches what you expect.
+    """
diff --git a/src/openai/types/upload_create_params.py b/src/openai/types/upload_create_params.py
new file mode 100644
index 0000000000..2ebabe6c66
--- /dev/null
+++ b/src/openai/types/upload_create_params.py
@@ -0,0 +1,31 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from .file_purpose import FilePurpose
+
+__all__ = ["UploadCreateParams"]
+
+
+class UploadCreateParams(TypedDict, total=False):
+    bytes: Required[int]
+    """The number of bytes in the file you are uploading."""
+
+    filename: Required[str]
+    """The name of the file to upload."""
+
+    mime_type: Required[str]
+    """The MIME type of the file.
+
+    This must fall within the supported MIME types for your file purpose. See the
+    supported MIME types for assistants and vision.
+    """
+
+    purpose: Required[FilePurpose]
+    """The intended purpose of the uploaded file.
+
+    See the
+    [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+    """
diff --git a/src/openai/types/uploads/__init__.py b/src/openai/types/uploads/__init__.py
new file mode 100644
index 0000000000..41deb0ab4b
--- /dev/null
+++ b/src/openai/types/uploads/__init__.py
@@ -0,0 +1,6 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from .upload_part import UploadPart as UploadPart
+from .part_create_params import PartCreateParams as PartCreateParams
diff --git a/src/openai/types/uploads/part_create_params.py b/src/openai/types/uploads/part_create_params.py
new file mode 100644
index 0000000000..9851ca41e9
--- /dev/null
+++ b/src/openai/types/uploads/part_create_params.py
@@ -0,0 +1,14 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing_extensions import Required, TypedDict
+
+from ..._types import FileTypes
+
+__all__ = ["PartCreateParams"]
+
+
+class PartCreateParams(TypedDict, total=False):
+    data: Required[FileTypes]
+    """The chunk of bytes for this Part."""
diff --git a/src/openai/types/uploads/upload_part.py b/src/openai/types/uploads/upload_part.py
new file mode 100644
index 0000000000..e09621d8f9
--- /dev/null
+++ b/src/openai/types/uploads/upload_part.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing_extensions import Literal
+
+from ..._models import BaseModel
+
+__all__ = ["UploadPart"]
+
+
+class UploadPart(BaseModel):
+    id: str
+    """The upload Part unique identifier, which can be referenced in API endpoints."""
+
+    created_at: int
+    """The Unix timestamp (in seconds) for when the Part was created."""
+
+    object: Literal["upload.part"]
+    """The object type, which is always `upload.part`."""
+
+    upload_id: str
+    """The ID of the Upload object that this Part was added to."""
diff --git a/src/openai/types/beta/vector_store.py b/src/openai/types/vector_store.py
similarity index 87%
rename from src/openai/types/beta/vector_store.py
rename to src/openai/types/vector_store.py
index 488961b444..2473a442d2 100644
--- a/src/openai/types/beta/vector_store.py
+++ b/src/openai/types/vector_store.py
@@ -3,7 +3,8 @@
 from typing import Optional
 from typing_extensions import Literal
 
-from ..._models import BaseModel
+from .._models import BaseModel
+from .shared.metadata import Metadata
 
 __all__ = ["VectorStore", "FileCounts", "ExpiresAfter"]
 
@@ -48,12 +49,14 @@ class VectorStore(BaseModel):
     last_active_at: Optional[int] = None
     """The Unix timestamp (in seconds) for when the vector store was last active."""
 
-    metadata: Optional[object] = None
+    metadata: Optional[Metadata] = None
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     name: str
diff --git a/src/openai/types/beta/vector_store_create_params.py b/src/openai/types/vector_store_create_params.py
similarity index 67%
rename from src/openai/types/beta/vector_store_create_params.py
rename to src/openai/types/vector_store_create_params.py
index f1a3abcbdf..365d0936b1 100644
--- a/src/openai/types/beta/vector_store_create_params.py
+++ b/src/openai/types/vector_store_create_params.py
@@ -5,10 +5,20 @@
 from typing import List, Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .shared_params.metadata import Metadata
+from .file_chunking_strategy_param import FileChunkingStrategyParam
+
 __all__ = ["VectorStoreCreateParams", "ExpiresAfter"]
 
 
 class VectorStoreCreateParams(TypedDict, total=False):
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
+
     expires_after: ExpiresAfter
     """The expiration policy for a vector store."""
 
@@ -19,12 +29,14 @@ class VectorStoreCreateParams(TypedDict, total=False):
     files.
     """
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     name: str
diff --git a/src/openai/types/beta/vector_store_deleted.py b/src/openai/types/vector_store_deleted.py
similarity index 89%
rename from src/openai/types/beta/vector_store_deleted.py
rename to src/openai/types/vector_store_deleted.py
index 21ccda1db5..dfac9ce8bd 100644
--- a/src/openai/types/beta/vector_store_deleted.py
+++ b/src/openai/types/vector_store_deleted.py
@@ -2,7 +2,7 @@
 
 from typing_extensions import Literal
 
-from ..._models import BaseModel
+from .._models import BaseModel
 
 __all__ = ["VectorStoreDeleted"]
 
diff --git a/src/openai/types/beta/vector_store_list_params.py b/src/openai/types/vector_store_list_params.py
similarity index 93%
rename from src/openai/types/beta/vector_store_list_params.py
rename to src/openai/types/vector_store_list_params.py
index f39f67266d..e26ff90a85 100644
--- a/src/openai/types/beta/vector_store_list_params.py
+++ b/src/openai/types/vector_store_list_params.py
@@ -21,7 +21,7 @@ class VectorStoreListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/vector_store_search_params.py b/src/openai/types/vector_store_search_params.py
new file mode 100644
index 0000000000..17573d0f61
--- /dev/null
+++ b/src/openai/types/vector_store_search_params.py
@@ -0,0 +1,40 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import List, Union
+from typing_extensions import Literal, Required, TypeAlias, TypedDict
+
+from .shared_params.compound_filter import CompoundFilter
+from .shared_params.comparison_filter import ComparisonFilter
+
+__all__ = ["VectorStoreSearchParams", "Filters", "RankingOptions"]
+
+
+class VectorStoreSearchParams(TypedDict, total=False):
+    query: Required[Union[str, List[str]]]
+    """A query string for a search"""
+
+    filters: Filters
+    """A filter to apply based on file attributes."""
+
+    max_num_results: int
+    """The maximum number of results to return.
+
+    This number should be between 1 and 50 inclusive.
+    """
+
+    ranking_options: RankingOptions
+    """Ranking options for search."""
+
+    rewrite_query: bool
+    """Whether to rewrite the natural language query for vector search."""
+
+
+Filters: TypeAlias = Union[ComparisonFilter, CompoundFilter]
+
+
+class RankingOptions(TypedDict, total=False):
+    ranker: Literal["auto", "default-2024-11-15"]
+
+    score_threshold: float
diff --git a/src/openai/types/vector_store_search_response.py b/src/openai/types/vector_store_search_response.py
new file mode 100644
index 0000000000..d78b71bfba
--- /dev/null
+++ b/src/openai/types/vector_store_search_response.py
@@ -0,0 +1,39 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Literal
+
+from .._models import BaseModel
+
+__all__ = ["VectorStoreSearchResponse", "Content"]
+
+
+class Content(BaseModel):
+    text: str
+    """The text content returned from search."""
+
+    type: Literal["text"]
+    """The type of content."""
+
+
+class VectorStoreSearchResponse(BaseModel):
+    attributes: Optional[Dict[str, Union[str, float, bool]]] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    content: List[Content]
+    """Content chunks from the file."""
+
+    file_id: str
+    """The ID of the vector store file."""
+
+    filename: str
+    """The name of the vector store file."""
+
+    score: float
+    """The similarity score for the result."""
diff --git a/src/openai/types/beta/vector_store_update_params.py b/src/openai/types/vector_store_update_params.py
similarity index 77%
rename from src/openai/types/beta/vector_store_update_params.py
rename to src/openai/types/vector_store_update_params.py
index 0f9593e476..4f6ac63963 100644
--- a/src/openai/types/beta/vector_store_update_params.py
+++ b/src/openai/types/vector_store_update_params.py
@@ -5,6 +5,8 @@
 from typing import Optional
 from typing_extensions import Literal, Required, TypedDict
 
+from .shared_params.metadata import Metadata
+
 __all__ = ["VectorStoreUpdateParams", "ExpiresAfter"]
 
 
@@ -12,12 +14,14 @@ class VectorStoreUpdateParams(TypedDict, total=False):
     expires_after: Optional[ExpiresAfter]
     """The expiration policy for a vector store."""
 
-    metadata: Optional[object]
+    metadata: Optional[Metadata]
     """Set of 16 key-value pairs that can be attached to an object.
 
     This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
+    structured format, and querying for objects via API or the dashboard.
+
+    Keys are strings with a maximum length of 64 characters. Values are strings with
+    a maximum length of 512 characters.
     """
 
     name: Optional[str]
diff --git a/src/openai/types/beta/vector_stores/__init__.py b/src/openai/types/vector_stores/__init__.py
similarity index 82%
rename from src/openai/types/beta/vector_stores/__init__.py
rename to src/openai/types/vector_stores/__init__.py
index ff05dd63d8..96ce301481 100644
--- a/src/openai/types/beta/vector_stores/__init__.py
+++ b/src/openai/types/vector_stores/__init__.py
@@ -5,6 +5,8 @@
 from .file_list_params import FileListParams as FileListParams
 from .vector_store_file import VectorStoreFile as VectorStoreFile
 from .file_create_params import FileCreateParams as FileCreateParams
+from .file_update_params import FileUpdateParams as FileUpdateParams
+from .file_content_response import FileContentResponse as FileContentResponse
 from .vector_store_file_batch import VectorStoreFileBatch as VectorStoreFileBatch
 from .file_batch_create_params import FileBatchCreateParams as FileBatchCreateParams
 from .vector_store_file_deleted import VectorStoreFileDeleted as VectorStoreFileDeleted
diff --git a/src/openai/types/vector_stores/file_batch_create_params.py b/src/openai/types/vector_stores/file_batch_create_params.py
new file mode 100644
index 0000000000..1a470f757a
--- /dev/null
+++ b/src/openai/types/vector_stores/file_batch_create_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, List, Union, Optional
+from typing_extensions import Required, TypedDict
+
+from ..file_chunking_strategy_param import FileChunkingStrategyParam
+
+__all__ = ["FileBatchCreateParams"]
+
+
+class FileBatchCreateParams(TypedDict, total=False):
+    file_ids: Required[List[str]]
+    """
+    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
+    the vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
+
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
diff --git a/src/openai/types/beta/vector_stores/file_batch_list_files_params.py b/src/openai/types/vector_stores/file_batch_list_files_params.py
similarity index 94%
rename from src/openai/types/beta/vector_stores/file_batch_list_files_params.py
rename to src/openai/types/vector_stores/file_batch_list_files_params.py
index 24dee7d5a5..2a0a6c6aa7 100644
--- a/src/openai/types/beta/vector_stores/file_batch_list_files_params.py
+++ b/src/openai/types/vector_stores/file_batch_list_files_params.py
@@ -23,7 +23,7 @@ class FileBatchListFilesParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/vector_stores/file_content_response.py b/src/openai/types/vector_stores/file_content_response.py
new file mode 100644
index 0000000000..32db2f2ce9
--- /dev/null
+++ b/src/openai/types/vector_stores/file_content_response.py
@@ -0,0 +1,15 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from typing import Optional
+
+from ..._models import BaseModel
+
+__all__ = ["FileContentResponse"]
+
+
+class FileContentResponse(BaseModel):
+    text: Optional[str] = None
+    """The text content"""
+
+    type: Optional[str] = None
+    """The content type (currently only `"text"`)"""
diff --git a/src/openai/types/vector_stores/file_create_params.py b/src/openai/types/vector_stores/file_create_params.py
new file mode 100644
index 0000000000..5b8989251a
--- /dev/null
+++ b/src/openai/types/vector_stores/file_create_params.py
@@ -0,0 +1,35 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Required, TypedDict
+
+from ..file_chunking_strategy_param import FileChunkingStrategyParam
+
+__all__ = ["FileCreateParams"]
+
+
+class FileCreateParams(TypedDict, total=False):
+    file_id: Required[str]
+    """
+    A [File](https://platform.openai.com/docs/api-reference/files) ID that the
+    vector store should use. Useful for tools like `file_search` that can access
+    files.
+    """
+
+    attributes: Optional[Dict[str, Union[str, float, bool]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: FileChunkingStrategyParam
+    """The chunking strategy used to chunk the file(s).
+
+    If not set, will use the `auto` strategy. Only applicable if `file_ids` is
+    non-empty.
+    """
diff --git a/src/openai/types/beta/vector_stores/file_list_params.py b/src/openai/types/vector_stores/file_list_params.py
similarity index 94%
rename from src/openai/types/beta/vector_stores/file_list_params.py
rename to src/openai/types/vector_stores/file_list_params.py
index 23dd7f0d94..867b5fb3bb 100644
--- a/src/openai/types/beta/vector_stores/file_list_params.py
+++ b/src/openai/types/vector_stores/file_list_params.py
@@ -21,7 +21,7 @@ class FileListParams(TypedDict, total=False):
     """A cursor for use in pagination.
 
     `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
+    you make a list request and receive 100 objects, starting with obj_foo, your
     subsequent call can include before=obj_foo in order to fetch the previous page
     of the list.
     """
diff --git a/src/openai/types/vector_stores/file_update_params.py b/src/openai/types/vector_stores/file_update_params.py
new file mode 100644
index 0000000000..ebf540d046
--- /dev/null
+++ b/src/openai/types/vector_stores/file_update_params.py
@@ -0,0 +1,21 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import Dict, Union, Optional
+from typing_extensions import Required, TypedDict
+
+__all__ = ["FileUpdateParams"]
+
+
+class FileUpdateParams(TypedDict, total=False):
+    vector_store_id: Required[str]
+
+    attributes: Required[Optional[Dict[str, Union[str, float, bool]]]]
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
diff --git a/src/openai/types/beta/vector_stores/vector_store_file.py b/src/openai/types/vector_stores/vector_store_file.py
similarity index 66%
rename from src/openai/types/beta/vector_stores/vector_store_file.py
rename to src/openai/types/vector_stores/vector_store_file.py
index 3fab489602..b59a61dfb0 100644
--- a/src/openai/types/beta/vector_stores/vector_store_file.py
+++ b/src/openai/types/vector_stores/vector_store_file.py
@@ -1,15 +1,16 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
-from typing import Optional
+from typing import Dict, Union, Optional
 from typing_extensions import Literal
 
-from ...._models import BaseModel
+from ..._models import BaseModel
+from ..file_chunking_strategy import FileChunkingStrategy
 
 __all__ = ["VectorStoreFile", "LastError"]
 
 
 class LastError(BaseModel):
-    code: Literal["internal_error", "file_not_found", "parsing_error", "unhandled_mime_type"]
+    code: Literal["server_error", "unsupported_file", "invalid_file"]
     """One of `server_error` or `rate_limit_exceeded`."""
 
     message: str
@@ -52,3 +53,15 @@ class VectorStoreFile(BaseModel):
     that the [File](https://platform.openai.com/docs/api-reference/files) is
     attached to.
     """
+
+    attributes: Optional[Dict[str, Union[str, float, bool]]] = None
+    """Set of 16 key-value pairs that can be attached to an object.
+
+    This can be useful for storing additional information about the object in a
+    structured format, and querying for objects via API or the dashboard. Keys are
+    strings with a maximum length of 64 characters. Values are strings with a
+    maximum length of 512 characters, booleans, or numbers.
+    """
+
+    chunking_strategy: Optional[FileChunkingStrategy] = None
+    """The strategy used to chunk the file."""
diff --git a/src/openai/types/beta/vector_stores/vector_store_file_batch.py b/src/openai/types/vector_stores/vector_store_file_batch.py
similarity index 97%
rename from src/openai/types/beta/vector_stores/vector_store_file_batch.py
rename to src/openai/types/vector_stores/vector_store_file_batch.py
index df130a58de..57dbfbd809 100644
--- a/src/openai/types/beta/vector_stores/vector_store_file_batch.py
+++ b/src/openai/types/vector_stores/vector_store_file_batch.py
@@ -2,7 +2,7 @@
 
 from typing_extensions import Literal
 
-from ...._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["VectorStoreFileBatch", "FileCounts"]
 
diff --git a/src/openai/types/beta/vector_stores/vector_store_file_deleted.py b/src/openai/types/vector_stores/vector_store_file_deleted.py
similarity index 89%
rename from src/openai/types/beta/vector_stores/vector_store_file_deleted.py
rename to src/openai/types/vector_stores/vector_store_file_deleted.py
index ae37f84364..5c856f26cd 100644
--- a/src/openai/types/beta/vector_stores/vector_store_file_deleted.py
+++ b/src/openai/types/vector_stores/vector_store_file_deleted.py
@@ -2,7 +2,7 @@
 
 from typing_extensions import Literal
 
-from ...._models import BaseModel
+from ..._models import BaseModel
 
 __all__ = ["VectorStoreFileDeleted"]
 
diff --git a/src/openai/types/websocket_connection_options.py b/src/openai/types/websocket_connection_options.py
new file mode 100644
index 0000000000..40fd24ab03
--- /dev/null
+++ b/src/openai/types/websocket_connection_options.py
@@ -0,0 +1,36 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+from typing_extensions import Sequence, TypedDict
+
+if TYPE_CHECKING:
+    from websockets import Subprotocol
+    from websockets.extensions import ClientExtensionFactory
+
+
+class WebsocketConnectionOptions(TypedDict, total=False):
+    """Websocket connection options copied from `websockets`.
+
+    For example: https://websockets.readthedocs.io/en/stable/reference/asyncio/client.html#websockets.asyncio.client.connect
+    """
+
+    extensions: Sequence[ClientExtensionFactory] | None
+    """List of supported extensions, in order in which they should be negotiated and run."""
+
+    subprotocols: Sequence[Subprotocol] | None
+    """List of supported subprotocols, in order of decreasing preference."""
+
+    compression: str | None
+    """The “permessage-deflate” extension is enabled by default. Set compression to None to disable it. See the [compression guide](https://websockets.readthedocs.io/en/stable/topics/compression.html) for details."""
+
+    # limits
+    max_size: int | None
+    """Maximum size of incoming messages in bytes. None disables the limit."""
+
+    max_queue: int | None | tuple[int | None, int | None]
+    """High-water mark of the buffer where frames are received. It defaults to 16 frames. The low-water mark defaults to max_queue // 4. You may pass a (high, low) tuple to set the high-water and low-water marks. If you want to disable flow control entirely, you may set it to None, although that’s a bad idea."""
+
+    write_limit: int | tuple[int, int | None]
+    """High-water mark of write buffer in bytes. It is passed to set_write_buffer_limits(). It defaults to 32 KiB. You may pass a (high, low) tuple to set the high-water and low-water marks."""
diff --git a/src/openai/version.py b/src/openai/version.py
deleted file mode 100644
index 01a08ab5a9..0000000000
--- a/src/openai/version.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from ._version import __version__
-
-VERSION: str = __version__
diff --git a/tests/api_resources/audio/test_speech.py b/tests/api_resources/audio/test_speech.py
index 781ebeceb9..5b5dc24156 100644
--- a/tests/api_resources/audio/test_speech.py
+++ b/tests/api_resources/audio/test_speech.py
@@ -26,7 +26,7 @@ class TestSpeech:
     def test_method_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         speech = client.audio.speech.create(
-            input="string",
+            input="input",
             model="string",
             voice="alloy",
         )
@@ -38,7 +38,7 @@ def test_method_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
     def test_method_create_with_all_params(self, client: OpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         speech = client.audio.speech.create(
-            input="string",
+            input="input",
             model="string",
             voice="alloy",
             response_format="mp3",
@@ -53,7 +53,7 @@ def test_raw_response_create(self, client: OpenAI, respx_mock: MockRouter) -> No
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = client.audio.speech.with_raw_response.create(
-            input="string",
+            input="input",
             model="string",
             voice="alloy",
         )
@@ -68,7 +68,7 @@ def test_raw_response_create(self, client: OpenAI, respx_mock: MockRouter) -> No
     def test_streaming_response_create(self, client: OpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         with client.audio.speech.with_streaming_response.create(
-            input="string",
+            input="input",
             model="string",
             voice="alloy",
         ) as response:
@@ -89,7 +89,7 @@ class TestAsyncSpeech:
     async def test_method_create(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         speech = await async_client.audio.speech.create(
-            input="string",
+            input="input",
             model="string",
             voice="alloy",
         )
@@ -101,7 +101,7 @@ async def test_method_create(self, async_client: AsyncOpenAI, respx_mock: MockRo
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         speech = await async_client.audio.speech.create(
-            input="string",
+            input="input",
             model="string",
             voice="alloy",
             response_format="mp3",
@@ -116,7 +116,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI, respx_mock:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = await async_client.audio.speech.with_raw_response.create(
-            input="string",
+            input="input",
             model="string",
             voice="alloy",
         )
@@ -131,7 +131,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI, respx_mock:
     async def test_streaming_response_create(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
         respx_mock.post("/audio/speech").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         async with async_client.audio.speech.with_streaming_response.create(
-            input="string",
+            input="input",
             model="string",
             voice="alloy",
         ) as response:
diff --git a/tests/api_resources/audio/test_transcriptions.py b/tests/api_resources/audio/test_transcriptions.py
index ba8e9e4099..bcb75b9d68 100644
--- a/tests/api_resources/audio/test_transcriptions.py
+++ b/tests/api_resources/audio/test_transcriptions.py
@@ -9,7 +9,7 @@
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai.types.audio import Transcription
+from openai.types.audio import TranscriptionCreateResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
 
@@ -23,20 +23,20 @@ def test_method_create(self, client: OpenAI) -> None:
             file=b"raw file contents",
             model="whisper-1",
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         transcription = client.audio.transcriptions.create(
             file=b"raw file contents",
             model="whisper-1",
-            language="string",
-            prompt="string",
+            language="language",
+            prompt="prompt",
             response_format="json",
             temperature=0,
-            timestamp_granularities=["word", "segment"],
+            timestamp_granularities=["word"],
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
@@ -48,7 +48,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         transcription = response.parse()
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
@@ -60,7 +60,7 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             transcription = response.parse()
-            assert_matches_type(Transcription, transcription, path=["response"])
+            assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -74,20 +74,20 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
             file=b"raw file contents",
             model="whisper-1",
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         transcription = await async_client.audio.transcriptions.create(
             file=b"raw file contents",
             model="whisper-1",
-            language="string",
-            prompt="string",
+            language="language",
+            prompt="prompt",
             response_format="json",
             temperature=0,
-            timestamp_granularities=["word", "segment"],
+            timestamp_granularities=["word"],
         )
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
@@ -99,7 +99,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         transcription = response.parse()
-        assert_matches_type(Transcription, transcription, path=["response"])
+        assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
@@ -111,6 +111,6 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             transcription = await response.parse()
-            assert_matches_type(Transcription, transcription, path=["response"])
+            assert_matches_type(TranscriptionCreateResponse, transcription, path=["response"])
 
         assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/audio/test_translations.py b/tests/api_resources/audio/test_translations.py
index f5c6c68f0b..e12ab7e6c0 100644
--- a/tests/api_resources/audio/test_translations.py
+++ b/tests/api_resources/audio/test_translations.py
@@ -9,7 +9,7 @@
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai.types.audio import Translation
+from openai.types.audio import TranslationCreateResponse
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
 
@@ -23,18 +23,18 @@ def test_method_create(self, client: OpenAI) -> None:
             file=b"raw file contents",
             model="whisper-1",
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         translation = client.audio.translations.create(
             file=b"raw file contents",
             model="whisper-1",
-            prompt="string",
-            response_format="string",
+            prompt="prompt",
+            response_format="json",
             temperature=0,
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
@@ -46,7 +46,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         translation = response.parse()
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
@@ -58,7 +58,7 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             translation = response.parse()
-            assert_matches_type(Translation, translation, path=["response"])
+            assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
@@ -72,18 +72,18 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
             file=b"raw file contents",
             model="whisper-1",
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         translation = await async_client.audio.translations.create(
             file=b"raw file contents",
             model="whisper-1",
-            prompt="string",
-            response_format="string",
+            prompt="prompt",
+            response_format="json",
             temperature=0,
         )
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
@@ -95,7 +95,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         translation = response.parse()
-        assert_matches_type(Translation, translation, path=["response"])
+        assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
@@ -107,6 +107,6 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             translation = await response.parse()
-            assert_matches_type(Translation, translation, path=["response"])
+            assert_matches_type(TranslationCreateResponse, translation, path=["response"])
 
         assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/vector_stores/__init__.py b/tests/api_resources/beta/realtime/__init__.py
similarity index 100%
rename from tests/api_resources/beta/vector_stores/__init__.py
rename to tests/api_resources/beta/realtime/__init__.py
diff --git a/tests/api_resources/beta/realtime/test_sessions.py b/tests/api_resources/beta/realtime/test_sessions.py
new file mode 100644
index 0000000000..5ea308ca0d
--- /dev/null
+++ b/tests/api_resources/beta/realtime/test_sessions.py
@@ -0,0 +1,144 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.beta.realtime import SessionCreateResponse
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestSessions:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        session = client.beta.realtime.sessions.create()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        session = client.beta.realtime.sessions.create(
+            input_audio_format="pcm16",
+            input_audio_transcription={
+                "language": "language",
+                "model": "model",
+                "prompt": "prompt",
+            },
+            instructions="instructions",
+            max_response_output_tokens=0,
+            modalities=["text"],
+            model="gpt-4o-realtime-preview",
+            output_audio_format="pcm16",
+            temperature=0,
+            tool_choice="tool_choice",
+            tools=[
+                {
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {},
+                    "type": "function",
+                }
+            ],
+            turn_detection={
+                "create_response": True,
+                "interrupt_response": True,
+                "prefix_padding_ms": 0,
+                "silence_duration_ms": 0,
+                "threshold": 0,
+                "type": "type",
+            },
+            voice="alloy",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.beta.realtime.sessions.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.beta.realtime.sessions.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = response.parse()
+            assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+
+class TestAsyncSessions:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        session = await async_client.beta.realtime.sessions.create()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        session = await async_client.beta.realtime.sessions.create(
+            input_audio_format="pcm16",
+            input_audio_transcription={
+                "language": "language",
+                "model": "model",
+                "prompt": "prompt",
+            },
+            instructions="instructions",
+            max_response_output_tokens=0,
+            modalities=["text"],
+            model="gpt-4o-realtime-preview",
+            output_audio_format="pcm16",
+            temperature=0,
+            tool_choice="tool_choice",
+            tools=[
+                {
+                    "description": "description",
+                    "name": "name",
+                    "parameters": {},
+                    "type": "function",
+                }
+            ],
+            turn_detection={
+                "create_response": True,
+                "interrupt_response": True,
+                "prefix_padding_ms": 0,
+                "silence_duration_ms": 0,
+                "threshold": 0,
+                "type": "type",
+            },
+            voice="alloy",
+        )
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.beta.realtime.sessions.with_raw_response.create()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        session = response.parse()
+        assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.beta.realtime.sessions.with_streaming_response.create() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            session = await response.parse()
+            assert_matches_type(SessionCreateResponse, session, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
diff --git a/tests/api_resources/beta/test_assistants.py b/tests/api_resources/beta/test_assistants.py
index a92acb2ca5..82aaf87b1c 100644
--- a/tests/api_resources/beta/test_assistants.py
+++ b/tests/api_resources/beta/test_assistants.py
@@ -24,33 +24,35 @@ class TestAssistants:
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.create(
-            model="gpt-4-turbo",
-            description="string",
-            instructions="string",
-            metadata={},
-            name="string",
-            response_format="none",
+            model="gpt-4o",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
+            name="name",
+            reasoning_effort="low",
+            response_format="auto",
             temperature=1,
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {
                     "vector_store_ids": ["string"],
                     "vector_stores": [
                         {
-                            "file_ids": ["string", "string", "string"],
-                            "metadata": {},
+                            "chunking_strategy": {"type": "auto"},
+                            "file_ids": ["string"],
+                            "metadata": {"foo": "string"},
                         }
                     ],
                 },
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
@@ -58,7 +60,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
 
         assert response.is_closed is True
@@ -69,7 +71,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.beta.assistants.with_streaming_response.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -82,14 +84,14 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.retrieve(
-            "string",
+            "assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.retrieve(
-            "string",
+            "assistant_id",
         )
 
         assert response.is_closed is True
@@ -100,7 +102,7 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.beta.assistants.with_streaming_response.retrieve(
-            "string",
+            "assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -120,26 +122,27 @@ def test_path_params_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.update(
-            "string",
+            assistant_id="assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.update(
-            "string",
-            description="string",
-            instructions="string",
-            metadata={},
+            assistant_id="assistant_id",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
             model="string",
-            name="string",
-            response_format="none",
+            name="name",
+            reasoning_effort="low",
+            response_format="auto",
             temperature=1,
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {"vector_store_ids": ["string"]},
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
@@ -147,7 +150,7 @@ def test_method_update_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.update(
-            "string",
+            assistant_id="assistant_id",
         )
 
         assert response.is_closed is True
@@ -158,7 +161,7 @@ def test_raw_response_update(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
         with client.beta.assistants.with_streaming_response.update(
-            "string",
+            assistant_id="assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -172,7 +175,7 @@ def test_streaming_response_update(self, client: OpenAI) -> None:
     def test_path_params_update(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
             client.beta.assistants.with_raw_response.update(
-                "",
+                assistant_id="",
             )
 
     @parametrize
@@ -183,8 +186,8 @@ def test_method_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.list(
-            after="string",
-            before="string",
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
@@ -213,14 +216,14 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         assistant = client.beta.assistants.delete(
-            "string",
+            "assistant_id",
         )
         assert_matches_type(AssistantDeleted, assistant, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
         response = client.beta.assistants.with_raw_response.delete(
-            "string",
+            "assistant_id",
         )
 
         assert response.is_closed is True
@@ -231,7 +234,7 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
         with client.beta.assistants.with_streaming_response.delete(
-            "string",
+            "assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -255,33 +258,35 @@ class TestAsyncAssistants:
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.create(
-            model="gpt-4-turbo",
-            description="string",
-            instructions="string",
-            metadata={},
-            name="string",
-            response_format="none",
+            model="gpt-4o",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
+            name="name",
+            reasoning_effort="low",
+            response_format="auto",
             temperature=1,
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {
                     "vector_store_ids": ["string"],
                     "vector_stores": [
                         {
-                            "file_ids": ["string", "string", "string"],
-                            "metadata": {},
+                            "chunking_strategy": {"type": "auto"},
+                            "file_ids": ["string"],
+                            "metadata": {"foo": "string"},
                         }
                     ],
                 },
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
@@ -289,7 +294,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.assistants.with_raw_response.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
 
         assert response.is_closed is True
@@ -300,7 +305,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.assistants.with_streaming_response.create(
-            model="gpt-4-turbo",
+            model="gpt-4o",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -313,14 +318,14 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.retrieve(
-            "string",
+            "assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.assistants.with_raw_response.retrieve(
-            "string",
+            "assistant_id",
         )
 
         assert response.is_closed is True
@@ -331,7 +336,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.assistants.with_streaming_response.retrieve(
-            "string",
+            "assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -351,26 +356,27 @@ async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_update(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.update(
-            "string",
+            assistant_id="assistant_id",
         )
         assert_matches_type(Assistant, assistant, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.update(
-            "string",
-            description="string",
-            instructions="string",
-            metadata={},
+            assistant_id="assistant_id",
+            description="description",
+            instructions="instructions",
+            metadata={"foo": "string"},
             model="string",
-            name="string",
-            response_format="none",
+            name="name",
+            reasoning_effort="low",
+            response_format="auto",
             temperature=1,
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {"vector_store_ids": ["string"]},
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
         )
         assert_matches_type(Assistant, assistant, path=["response"])
@@ -378,7 +384,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) ->
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.assistants.with_raw_response.update(
-            "string",
+            assistant_id="assistant_id",
         )
 
         assert response.is_closed is True
@@ -389,7 +395,7 @@ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.assistants.with_streaming_response.update(
-            "string",
+            assistant_id="assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -403,7 +409,7 @@ async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> Non
     async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `assistant_id` but received ''"):
             await async_client.beta.assistants.with_raw_response.update(
-                "",
+                assistant_id="",
             )
 
     @parametrize
@@ -414,8 +420,8 @@ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.list(
-            after="string",
-            before="string",
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
@@ -444,14 +450,14 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
         assistant = await async_client.beta.assistants.delete(
-            "string",
+            "assistant_id",
         )
         assert_matches_type(AssistantDeleted, assistant, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.assistants.with_raw_response.delete(
-            "string",
+            "assistant_id",
         )
 
         assert response.is_closed is True
@@ -462,7 +468,7 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.assistants.with_streaming_response.delete(
-            "string",
+            "assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/beta/test_realtime.py b/tests/api_resources/beta/test_realtime.py
new file mode 100644
index 0000000000..537017ffd3
--- /dev/null
+++ b/tests/api_resources/beta/test_realtime.py
@@ -0,0 +1,17 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+
+import pytest
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestRealtime:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+
+class TestAsyncRealtime:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
diff --git a/tests/api_resources/beta/test_threads.py b/tests/api_resources/beta/test_threads.py
index 02c6e2586e..ea89213e95 100644
--- a/tests/api_resources/beta/test_threads.py
+++ b/tests/api_resources/beta/test_threads.py
@@ -31,108 +31,27 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
         thread = client.beta.threads.create(
             messages=[
                 {
-                    "role": "user",
                     "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
                     "role": "user",
-                    "content": "string",
                     "attachments": [
                         {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
+                            "file_id": "file_id",
+                            "tools": [{"type": "code_interpreter"}],
+                        }
                     ],
-                    "metadata": {},
-                },
+                    "metadata": {"foo": "string"},
+                }
             ],
-            metadata={},
+            metadata={"foo": "string"},
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {
                     "vector_store_ids": ["string"],
                     "vector_stores": [
                         {
-                            "file_ids": ["string", "string", "string"],
-                            "metadata": {},
+                            "chunking_strategy": {"type": "auto"},
+                            "file_ids": ["string"],
+                            "metadata": {"foo": "string"},
                         }
                     ],
                 },
@@ -163,14 +82,14 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         thread = client.beta.threads.retrieve(
-            "string",
+            "thread_id",
         )
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.beta.threads.with_raw_response.retrieve(
-            "string",
+            "thread_id",
         )
 
         assert response.is_closed is True
@@ -181,7 +100,7 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.beta.threads.with_streaming_response.retrieve(
-            "string",
+            "thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -201,17 +120,17 @@ def test_path_params_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
         thread = client.beta.threads.update(
-            "string",
+            thread_id="thread_id",
         )
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
         thread = client.beta.threads.update(
-            "string",
-            metadata={},
+            thread_id="thread_id",
+            metadata={"foo": "string"},
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {"vector_store_ids": ["string"]},
             },
         )
@@ -220,7 +139,7 @@ def test_method_update_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
         response = client.beta.threads.with_raw_response.update(
-            "string",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -231,7 +150,7 @@ def test_raw_response_update(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
         with client.beta.threads.with_streaming_response.update(
-            "string",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -245,20 +164,20 @@ def test_streaming_response_update(self, client: OpenAI) -> None:
     def test_path_params_update(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.with_raw_response.update(
-                "",
+                thread_id="",
             )
 
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         thread = client.beta.threads.delete(
-            "string",
+            "thread_id",
         )
         assert_matches_type(ThreadDeleted, thread, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
         response = client.beta.threads.with_raw_response.delete(
-            "string",
+            "thread_id",
         )
 
         assert response.is_closed is True
@@ -269,7 +188,7 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
         with client.beta.threads.with_streaming_response.delete(
-            "string",
+            "thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -289,138 +208,58 @@ def test_path_params_delete(self, client: OpenAI) -> None:
     @parametrize
     def test_method_create_and_run_overload_1(self, client: OpenAI) -> None:
         thread = client.beta.threads.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
         )
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
     def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI) -> None:
         thread = client.beta.threads.create_and_run(
-            assistant_id="string",
-            instructions="string",
+            assistant_id="assistant_id",
+            instructions="instructions",
             max_completion_tokens=256,
             max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            response_format="auto",
             stream=False,
             temperature=1,
             thread={
                 "messages": [
                     {
-                        "role": "user",
                         "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
                         "role": "user",
-                        "content": "string",
                         "attachments": [
                             {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
                         ],
-                        "metadata": {},
-                    },
+                        "metadata": {"foo": "string"},
+                    }
                 ],
+                "metadata": {"foo": "string"},
                 "tool_resources": {
-                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                    "code_interpreter": {"file_ids": ["string"]},
                     "file_search": {
                         "vector_store_ids": ["string"],
                         "vector_stores": [
                             {
-                                "file_ids": ["string", "string", "string"],
-                                "metadata": {},
+                                "chunking_strategy": {"type": "auto"},
+                                "file_ids": ["string"],
+                                "metadata": {"foo": "string"},
                             }
                         ],
                     },
                 },
-                "metadata": {},
             },
             tool_choice="none",
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {"vector_store_ids": ["string"]},
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
             truncation_strategy={
                 "type": "auto",
@@ -432,7 +271,7 @@ def test_method_create_and_run_with_all_params_overload_1(self, client: OpenAI)
     @parametrize
     def test_raw_response_create_and_run_overload_1(self, client: OpenAI) -> None:
         response = client.beta.threads.with_raw_response.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
         )
 
         assert response.is_closed is True
@@ -443,7 +282,7 @@ def test_raw_response_create_and_run_overload_1(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create_and_run_overload_1(self, client: OpenAI) -> None:
         with client.beta.threads.with_streaming_response.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -456,7 +295,7 @@ def test_streaming_response_create_and_run_overload_1(self, client: OpenAI) -> N
     @parametrize
     def test_method_create_and_run_overload_2(self, client: OpenAI) -> None:
         thread_stream = client.beta.threads.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
             stream=True,
         )
         thread_stream.response.close()
@@ -464,131 +303,51 @@ def test_method_create_and_run_overload_2(self, client: OpenAI) -> None:
     @parametrize
     def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI) -> None:
         thread_stream = client.beta.threads.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
             stream=True,
-            instructions="string",
+            instructions="instructions",
             max_completion_tokens=256,
             max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            response_format="auto",
             temperature=1,
             thread={
                 "messages": [
                     {
-                        "role": "user",
                         "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
                         "role": "user",
-                        "content": "string",
                         "attachments": [
                             {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
                         ],
-                        "metadata": {},
-                    },
+                        "metadata": {"foo": "string"},
+                    }
                 ],
+                "metadata": {"foo": "string"},
                 "tool_resources": {
-                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                    "code_interpreter": {"file_ids": ["string"]},
                     "file_search": {
                         "vector_store_ids": ["string"],
                         "vector_stores": [
                             {
-                                "file_ids": ["string", "string", "string"],
-                                "metadata": {},
+                                "chunking_strategy": {"type": "auto"},
+                                "file_ids": ["string"],
+                                "metadata": {"foo": "string"},
                             }
                         ],
                     },
                 },
-                "metadata": {},
             },
             tool_choice="none",
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {"vector_store_ids": ["string"]},
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
             truncation_strategy={
                 "type": "auto",
@@ -600,7 +359,7 @@ def test_method_create_and_run_with_all_params_overload_2(self, client: OpenAI)
     @parametrize
     def test_raw_response_create_and_run_overload_2(self, client: OpenAI) -> None:
         response = client.beta.threads.with_raw_response.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
             stream=True,
         )
 
@@ -611,7 +370,7 @@ def test_raw_response_create_and_run_overload_2(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create_and_run_overload_2(self, client: OpenAI) -> None:
         with client.beta.threads.with_streaming_response.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -636,108 +395,27 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
         thread = await async_client.beta.threads.create(
             messages=[
                 {
-                    "role": "user",
                     "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
                     "role": "user",
-                    "content": "string",
                     "attachments": [
                         {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
+                            "file_id": "file_id",
+                            "tools": [{"type": "code_interpreter"}],
+                        }
                     ],
-                    "metadata": {},
-                },
+                    "metadata": {"foo": "string"},
+                }
             ],
-            metadata={},
+            metadata={"foo": "string"},
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {
                     "vector_store_ids": ["string"],
                     "vector_stores": [
                         {
-                            "file_ids": ["string", "string", "string"],
-                            "metadata": {},
+                            "chunking_strategy": {"type": "auto"},
+                            "file_ids": ["string"],
+                            "metadata": {"foo": "string"},
                         }
                     ],
                 },
@@ -768,14 +446,14 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         thread = await async_client.beta.threads.retrieve(
-            "string",
+            "thread_id",
         )
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.with_raw_response.retrieve(
-            "string",
+            "thread_id",
         )
 
         assert response.is_closed is True
@@ -786,7 +464,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.with_streaming_response.retrieve(
-            "string",
+            "thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -806,17 +484,17 @@ async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_update(self, async_client: AsyncOpenAI) -> None:
         thread = await async_client.beta.threads.update(
-            "string",
+            thread_id="thread_id",
         )
         assert_matches_type(Thread, thread, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
         thread = await async_client.beta.threads.update(
-            "string",
-            metadata={},
+            thread_id="thread_id",
+            metadata={"foo": "string"},
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {"vector_store_ids": ["string"]},
             },
         )
@@ -825,7 +503,7 @@ async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) ->
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.with_raw_response.update(
-            "string",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -836,7 +514,7 @@ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.with_streaming_response.update(
-            "string",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -850,20 +528,20 @@ async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> Non
     async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.with_raw_response.update(
-                "",
+                thread_id="",
             )
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
         thread = await async_client.beta.threads.delete(
-            "string",
+            "thread_id",
         )
         assert_matches_type(ThreadDeleted, thread, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.with_raw_response.delete(
-            "string",
+            "thread_id",
         )
 
         assert response.is_closed is True
@@ -874,7 +552,7 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.with_streaming_response.delete(
-            "string",
+            "thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -894,138 +572,58 @@ async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
         thread = await async_client.beta.threads.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
         )
         assert_matches_type(Run, thread, path=["response"])
 
     @parametrize
     async def test_method_create_and_run_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         thread = await async_client.beta.threads.create_and_run(
-            assistant_id="string",
-            instructions="string",
+            assistant_id="assistant_id",
+            instructions="instructions",
             max_completion_tokens=256,
             max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            response_format="auto",
             stream=False,
             temperature=1,
             thread={
                 "messages": [
                     {
-                        "role": "user",
                         "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
                         "role": "user",
-                        "content": "string",
                         "attachments": [
                             {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
                         ],
-                        "metadata": {},
-                    },
+                        "metadata": {"foo": "string"},
+                    }
                 ],
+                "metadata": {"foo": "string"},
                 "tool_resources": {
-                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                    "code_interpreter": {"file_ids": ["string"]},
                     "file_search": {
                         "vector_store_ids": ["string"],
                         "vector_stores": [
                             {
-                                "file_ids": ["string", "string", "string"],
-                                "metadata": {},
+                                "chunking_strategy": {"type": "auto"},
+                                "file_ids": ["string"],
+                                "metadata": {"foo": "string"},
                             }
                         ],
                     },
                 },
-                "metadata": {},
             },
             tool_choice="none",
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {"vector_store_ids": ["string"]},
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
             truncation_strategy={
                 "type": "auto",
@@ -1037,7 +635,7 @@ async def test_method_create_and_run_with_all_params_overload_1(self, async_clie
     @parametrize
     async def test_raw_response_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.with_raw_response.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
         )
 
         assert response.is_closed is True
@@ -1048,7 +646,7 @@ async def test_raw_response_create_and_run_overload_1(self, async_client: AsyncO
     @parametrize
     async def test_streaming_response_create_and_run_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.with_streaming_response.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1061,7 +659,7 @@ async def test_streaming_response_create_and_run_overload_1(self, async_client:
     @parametrize
     async def test_method_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
         thread_stream = await async_client.beta.threads.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
             stream=True,
         )
         await thread_stream.response.aclose()
@@ -1069,131 +667,51 @@ async def test_method_create_and_run_overload_2(self, async_client: AsyncOpenAI)
     @parametrize
     async def test_method_create_and_run_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
         thread_stream = await async_client.beta.threads.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
             stream=True,
-            instructions="string",
+            instructions="instructions",
             max_completion_tokens=256,
             max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            response_format="auto",
             temperature=1,
             thread={
                 "messages": [
                     {
-                        "role": "user",
                         "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
-                        "role": "user",
-                        "content": "string",
-                        "attachments": [
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                        ],
-                        "metadata": {},
-                    },
-                    {
                         "role": "user",
-                        "content": "string",
                         "attachments": [
                             {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
-                            {
-                                "file_id": "string",
-                                "tools": [
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                    {"type": "code_interpreter"},
-                                ],
-                            },
+                                "file_id": "file_id",
+                                "tools": [{"type": "code_interpreter"}],
+                            }
                         ],
-                        "metadata": {},
-                    },
+                        "metadata": {"foo": "string"},
+                    }
                 ],
+                "metadata": {"foo": "string"},
                 "tool_resources": {
-                    "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                    "code_interpreter": {"file_ids": ["string"]},
                     "file_search": {
                         "vector_store_ids": ["string"],
                         "vector_stores": [
                             {
-                                "file_ids": ["string", "string", "string"],
-                                "metadata": {},
+                                "chunking_strategy": {"type": "auto"},
+                                "file_ids": ["string"],
+                                "metadata": {"foo": "string"},
                             }
                         ],
                     },
                 },
-                "metadata": {},
             },
             tool_choice="none",
             tool_resources={
-                "code_interpreter": {"file_ids": ["string", "string", "string"]},
+                "code_interpreter": {"file_ids": ["string"]},
                 "file_search": {"vector_store_ids": ["string"]},
             },
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
             truncation_strategy={
                 "type": "auto",
@@ -1205,7 +723,7 @@ async def test_method_create_and_run_with_all_params_overload_2(self, async_clie
     @parametrize
     async def test_raw_response_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.with_raw_response.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
             stream=True,
         )
 
@@ -1216,7 +734,7 @@ async def test_raw_response_create_and_run_overload_2(self, async_client: AsyncO
     @parametrize
     async def test_streaming_response_create_and_run_overload_2(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.with_streaming_response.create_and_run(
-            assistant_id="string",
+            assistant_id="assistant_id",
             stream=True,
         ) as response:
             assert not response.is_closed
diff --git a/tests/api_resources/beta/threads/runs/test_steps.py b/tests/api_resources/beta/threads/runs/test_steps.py
index e6108d8dad..ea3e682158 100644
--- a/tests/api_resources/beta/threads/runs/test_steps.py
+++ b/tests/api_resources/beta/threads/runs/test_steps.py
@@ -21,18 +21,28 @@ class TestSteps:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         step = client.beta.threads.runs.steps.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
+            step_id="step_id",
+            thread_id="thread_id",
+            run_id="run_id",
+        )
+        assert_matches_type(RunStep, step, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params(self, client: OpenAI) -> None:
+        step = client.beta.threads.runs.steps.retrieve(
+            step_id="step_id",
+            thread_id="thread_id",
+            run_id="run_id",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
         )
         assert_matches_type(RunStep, step, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.steps.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
+            step_id="step_id",
+            thread_id="thread_id",
+            run_id="run_id",
         )
 
         assert response.is_closed is True
@@ -43,9 +53,9 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.beta.threads.runs.steps.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
+            step_id="step_id",
+            thread_id="thread_id",
+            run_id="run_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -59,40 +69,41 @@ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
     def test_path_params_retrieve(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "string",
+                step_id="step_id",
                 thread_id="",
-                run_id="string",
+                run_id="run_id",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "string",
-                thread_id="string",
+                step_id="step_id",
+                thread_id="thread_id",
                 run_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
             client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "",
-                thread_id="string",
-                run_id="string",
+                step_id="",
+                thread_id="thread_id",
+                run_id="run_id",
             )
 
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         step = client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
         assert_matches_type(SyncCursorPage[RunStep], step, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         step = client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
+            run_id="run_id",
+            thread_id="thread_id",
+            after="after",
+            before="before",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
             limit=0,
             order="asc",
         )
@@ -101,8 +112,8 @@ def test_method_list_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.steps.with_raw_response.list(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -113,8 +124,8 @@ def test_raw_response_list(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
         with client.beta.threads.runs.steps.with_streaming_response.list(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -128,14 +139,14 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
     def test_path_params_list(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.steps.with_raw_response.list(
-                "string",
+                run_id="run_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             client.beta.threads.runs.steps.with_raw_response.list(
-                "",
-                thread_id="string",
+                run_id="",
+                thread_id="thread_id",
             )
 
 
@@ -145,18 +156,28 @@ class TestAsyncSteps:
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         step = await async_client.beta.threads.runs.steps.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
+            step_id="step_id",
+            thread_id="thread_id",
+            run_id="run_id",
+        )
+        assert_matches_type(RunStep, step, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        step = await async_client.beta.threads.runs.steps.retrieve(
+            step_id="step_id",
+            thread_id="thread_id",
+            run_id="run_id",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
         )
         assert_matches_type(RunStep, step, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
+            step_id="step_id",
+            thread_id="thread_id",
+            run_id="run_id",
         )
 
         assert response.is_closed is True
@@ -167,9 +188,9 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.steps.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
-            run_id="string",
+            step_id="step_id",
+            thread_id="thread_id",
+            run_id="run_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -183,40 +204,41 @@ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> N
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "string",
+                step_id="step_id",
                 thread_id="",
-                run_id="string",
+                run_id="run_id",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "string",
-                thread_id="string",
+                step_id="step_id",
+                thread_id="thread_id",
                 run_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `step_id` but received ''"):
             await async_client.beta.threads.runs.steps.with_raw_response.retrieve(
-                "",
-                thread_id="string",
-                run_id="string",
+                step_id="",
+                thread_id="thread_id",
+                run_id="run_id",
             )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
         step = await async_client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
         assert_matches_type(AsyncCursorPage[RunStep], step, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         step = await async_client.beta.threads.runs.steps.list(
-            "string",
-            thread_id="string",
-            after="string",
-            before="string",
+            run_id="run_id",
+            thread_id="thread_id",
+            after="after",
+            before="before",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
             limit=0,
             order="asc",
         )
@@ -225,8 +247,8 @@ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> N
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.steps.with_raw_response.list(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -237,8 +259,8 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.steps.with_streaming_response.list(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -252,12 +274,12 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
     async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.steps.with_raw_response.list(
-                "string",
+                run_id="run_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             await async_client.beta.threads.runs.steps.with_raw_response.list(
-                "",
-                thread_id="string",
+                run_id="",
+                thread_id="thread_id",
             )
diff --git a/tests/api_resources/beta/threads/test_messages.py b/tests/api_resources/beta/threads/test_messages.py
index b5be32a421..c965f0ab90 100644
--- a/tests/api_resources/beta/threads/test_messages.py
+++ b/tests/api_resources/beta/threads/test_messages.py
@@ -24,7 +24,7 @@ class TestMessages:
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.create(
-            "string",
+            thread_id="thread_id",
             content="string",
             role="user",
         )
@@ -33,31 +33,23 @@ def test_method_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.create(
-            "string",
+            thread_id="thread_id",
             content="string",
             role="user",
             attachments=[
                 {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
-                {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
-                {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
+                    "file_id": "file_id",
+                    "tools": [{"type": "code_interpreter"}],
+                }
             ],
-            metadata={},
+            metadata={"foo": "string"},
         )
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.beta.threads.messages.with_raw_response.create(
-            "string",
+            thread_id="thread_id",
             content="string",
             role="user",
         )
@@ -70,7 +62,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.beta.threads.messages.with_streaming_response.create(
-            "string",
+            thread_id="thread_id",
             content="string",
             role="user",
         ) as response:
@@ -86,7 +78,7 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
     def test_path_params_create(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.messages.with_raw_response.create(
-                "",
+                thread_id="",
                 content="string",
                 role="user",
             )
@@ -94,16 +86,16 @@ def test_path_params_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.retrieve(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.beta.threads.messages.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -114,8 +106,8 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.beta.threads.messages.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -129,38 +121,38 @@ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
     def test_path_params_retrieve(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.messages.with_raw_response.retrieve(
-                "string",
+                message_id="message_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
             client.beta.threads.messages.with_raw_response.retrieve(
-                "",
-                thread_id="string",
+                message_id="",
+                thread_id="thread_id",
             )
 
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
-            metadata={},
+            message_id="message_id",
+            thread_id="thread_id",
+            metadata={"foo": "string"},
         )
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
         response = client.beta.threads.messages.with_raw_response.update(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -171,8 +163,8 @@ def test_raw_response_update(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
         with client.beta.threads.messages.with_streaming_response.update(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -186,39 +178,39 @@ def test_streaming_response_update(self, client: OpenAI) -> None:
     def test_path_params_update(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.messages.with_raw_response.update(
-                "string",
+                message_id="message_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
             client.beta.threads.messages.with_raw_response.update(
-                "",
-                thread_id="string",
+                message_id="",
+                thread_id="thread_id",
             )
 
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.list(
-            "string",
+            thread_id="thread_id",
         )
         assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.list(
-            "string",
-            after="string",
-            before="string",
+            thread_id="thread_id",
+            after="after",
+            before="before",
             limit=0,
             order="asc",
-            run_id="string",
+            run_id="run_id",
         )
         assert_matches_type(SyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.beta.threads.messages.with_raw_response.list(
-            "string",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -229,7 +221,7 @@ def test_raw_response_list(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
         with client.beta.threads.messages.with_streaming_response.list(
-            "string",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -243,22 +235,22 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
     def test_path_params_list(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.messages.with_raw_response.list(
-                "",
+                thread_id="",
             )
 
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         message = client.beta.threads.messages.delete(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
         assert_matches_type(MessageDeleted, message, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
         response = client.beta.threads.messages.with_raw_response.delete(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -269,8 +261,8 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
         with client.beta.threads.messages.with_streaming_response.delete(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -284,14 +276,14 @@ def test_streaming_response_delete(self, client: OpenAI) -> None:
     def test_path_params_delete(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.messages.with_raw_response.delete(
-                "string",
+                message_id="message_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
             client.beta.threads.messages.with_raw_response.delete(
-                "",
-                thread_id="string",
+                message_id="",
+                thread_id="thread_id",
             )
 
 
@@ -301,7 +293,7 @@ class TestAsyncMessages:
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         message = await async_client.beta.threads.messages.create(
-            "string",
+            thread_id="thread_id",
             content="string",
             role="user",
         )
@@ -310,31 +302,23 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         message = await async_client.beta.threads.messages.create(
-            "string",
+            thread_id="thread_id",
             content="string",
             role="user",
             attachments=[
                 {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
-                {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
-                {
-                    "file_id": "string",
-                    "tools": [{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
-                },
+                    "file_id": "file_id",
+                    "tools": [{"type": "code_interpreter"}],
+                }
             ],
-            metadata={},
+            metadata={"foo": "string"},
         )
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.messages.with_raw_response.create(
-            "string",
+            thread_id="thread_id",
             content="string",
             role="user",
         )
@@ -347,7 +331,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.messages.with_streaming_response.create(
-            "string",
+            thread_id="thread_id",
             content="string",
             role="user",
         ) as response:
@@ -363,7 +347,7 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
     async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.messages.with_raw_response.create(
-                "",
+                thread_id="",
                 content="string",
                 role="user",
             )
@@ -371,16 +355,16 @@ async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         message = await async_client.beta.threads.messages.retrieve(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.messages.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -391,8 +375,8 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.messages.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -406,38 +390,38 @@ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> N
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.messages.with_raw_response.retrieve(
-                "string",
+                message_id="message_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
             await async_client.beta.threads.messages.with_raw_response.retrieve(
-                "",
-                thread_id="string",
+                message_id="",
+                thread_id="thread_id",
             )
 
     @parametrize
     async def test_method_update(self, async_client: AsyncOpenAI) -> None:
         message = await async_client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
         message = await async_client.beta.threads.messages.update(
-            "string",
-            thread_id="string",
-            metadata={},
+            message_id="message_id",
+            thread_id="thread_id",
+            metadata={"foo": "string"},
         )
         assert_matches_type(Message, message, path=["response"])
 
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.messages.with_raw_response.update(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -448,8 +432,8 @@ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.messages.with_streaming_response.update(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -463,39 +447,39 @@ async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> Non
     async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.messages.with_raw_response.update(
-                "string",
+                message_id="message_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
             await async_client.beta.threads.messages.with_raw_response.update(
-                "",
-                thread_id="string",
+                message_id="",
+                thread_id="thread_id",
             )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
         message = await async_client.beta.threads.messages.list(
-            "string",
+            thread_id="thread_id",
         )
         assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         message = await async_client.beta.threads.messages.list(
-            "string",
-            after="string",
-            before="string",
+            thread_id="thread_id",
+            after="after",
+            before="before",
             limit=0,
             order="asc",
-            run_id="string",
+            run_id="run_id",
         )
         assert_matches_type(AsyncCursorPage[Message], message, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.messages.with_raw_response.list(
-            "string",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -506,7 +490,7 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.messages.with_streaming_response.list(
-            "string",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -520,22 +504,22 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
     async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.messages.with_raw_response.list(
-                "",
+                thread_id="",
             )
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
         message = await async_client.beta.threads.messages.delete(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
         assert_matches_type(MessageDeleted, message, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.messages.with_raw_response.delete(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -546,8 +530,8 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.messages.with_streaming_response.delete(
-            "string",
-            thread_id="string",
+            message_id="message_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -561,12 +545,12 @@ async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> Non
     async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.messages.with_raw_response.delete(
-                "string",
+                message_id="message_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `message_id` but received ''"):
             await async_client.beta.threads.messages.with_raw_response.delete(
-                "",
-                thread_id="string",
+                message_id="",
+                thread_id="thread_id",
             )
diff --git a/tests/api_resources/beta/threads/test_runs.py b/tests/api_resources/beta/threads/test_runs.py
index 089dd1253e..01a1ce9ea4 100644
--- a/tests/api_resources/beta/threads/test_runs.py
+++ b/tests/api_resources/beta/threads/test_runs.py
@@ -14,8 +14,6 @@
     Run,
 )
 
-# pyright: reportDeprecated=false
-
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
 
 
@@ -25,122 +23,43 @@ class TestRuns:
     @parametrize
     def test_method_create_overload_1(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            additional_instructions="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
+            additional_instructions="additional_instructions",
             additional_messages=[
                 {
-                    "role": "user",
                     "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
                     "role": "user",
-                    "content": "string",
                     "attachments": [
                         {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
+                            "file_id": "file_id",
+                            "tools": [{"type": "code_interpreter"}],
+                        }
                     ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
+                    "metadata": {"foo": "string"},
+                }
             ],
-            instructions="string",
+            instructions="instructions",
             max_completion_tokens=256,
             max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            reasoning_effort="low",
+            response_format="auto",
             stream=False,
             temperature=1,
             tool_choice="none",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
             truncation_strategy={
                 "type": "auto",
@@ -152,8 +71,8 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
         )
 
         assert response.is_closed is True
@@ -164,8 +83,8 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -179,15 +98,15 @@ def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
     def test_path_params_create_overload_1(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.create(
-                "",
-                assistant_id="string",
+                thread_id="",
+                assistant_id="assistant_id",
             )
 
     @parametrize
     def test_method_create_overload_2(self, client: OpenAI) -> None:
         run_stream = client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
             stream=True,
         )
         run_stream.response.close()
@@ -195,114 +114,35 @@ def test_method_create_overload_2(self, client: OpenAI) -> None:
     @parametrize
     def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
         run_stream = client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
             stream=True,
-            additional_instructions="string",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
+            additional_instructions="additional_instructions",
             additional_messages=[
                 {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
                     "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
                     "role": "user",
-                    "content": "string",
                     "attachments": [
                         {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
+                            "file_id": "file_id",
+                            "tools": [{"type": "code_interpreter"}],
+                        }
                     ],
-                    "metadata": {},
-                },
+                    "metadata": {"foo": "string"},
+                }
             ],
-            instructions="string",
+            instructions="instructions",
             max_completion_tokens=256,
             max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            reasoning_effort="low",
+            response_format="auto",
             temperature=1,
             tool_choice="none",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
             truncation_strategy={
                 "type": "auto",
@@ -314,8 +154,8 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
             stream=True,
         )
 
@@ -326,8 +166,8 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -342,24 +182,24 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
     def test_path_params_create_overload_2(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.create(
-                "",
-                assistant_id="string",
+                thread_id="",
+                assistant_id="assistant_id",
                 stream=True,
             )
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.retrieve(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -370,8 +210,8 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -385,38 +225,38 @@ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
     def test_path_params_retrieve(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.retrieve(
-                "string",
+                run_id="run_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             client.beta.threads.runs.with_raw_response.retrieve(
-                "",
-                thread_id="string",
+                run_id="",
+                thread_id="thread_id",
             )
 
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
-            metadata={},
+            run_id="run_id",
+            thread_id="thread_id",
+            metadata={"foo": "string"},
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.update(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -427,8 +267,8 @@ def test_raw_response_update(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.update(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -442,29 +282,29 @@ def test_streaming_response_update(self, client: OpenAI) -> None:
     def test_path_params_update(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.update(
-                "string",
+                run_id="run_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             client.beta.threads.runs.with_raw_response.update(
-                "",
-                thread_id="string",
+                run_id="",
+                thread_id="thread_id",
             )
 
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.list(
-            "string",
+            thread_id="thread_id",
         )
         assert_matches_type(SyncCursorPage[Run], run, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.list(
-            "string",
-            after="string",
-            before="string",
+            thread_id="thread_id",
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
@@ -473,7 +313,7 @@ def test_method_list_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.list(
-            "string",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -484,7 +324,7 @@ def test_raw_response_list(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.list(
-            "string",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -498,22 +338,22 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
     def test_path_params_list(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.list(
-                "",
+                thread_id="",
             )
 
     @parametrize
     def test_method_cancel(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.cancel(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_raw_response_cancel(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.cancel(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -524,8 +364,8 @@ def test_raw_response_cancel(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_cancel(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.cancel(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -539,43 +379,35 @@ def test_streaming_response_cancel(self, client: OpenAI) -> None:
     def test_path_params_cancel(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.cancel(
-                "string",
+                run_id="run_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             client.beta.threads.runs.with_raw_response.cancel(
-                "",
-                thread_id="string",
+                run_id="",
+                thread_id="thread_id",
             )
 
     @parametrize
     def test_method_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
+            run_id="run_id",
+            thread_id="thread_id",
+            tool_outputs=[{}],
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     def test_method_submit_tool_outputs_with_all_params_overload_1(self, client: OpenAI) -> None:
         run = client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
             tool_outputs=[
                 {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
-                {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
-                {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
+                    "output": "output",
+                    "tool_call_id": "tool_call_id",
+                }
             ],
             stream=False,
         )
@@ -584,9 +416,9 @@ def test_method_submit_tool_outputs_with_all_params_overload_1(self, client: Ope
     @parametrize
     def test_raw_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
+            run_id="run_id",
+            thread_id="thread_id",
+            tool_outputs=[{}],
         )
 
         assert response.is_closed is True
@@ -597,9 +429,9 @@ def test_raw_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> No
     @parametrize
     def test_streaming_response_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
+            run_id="run_id",
+            thread_id="thread_id",
+            tool_outputs=[{}],
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -613,35 +445,35 @@ def test_streaming_response_submit_tool_outputs_overload_1(self, client: OpenAI)
     def test_path_params_submit_tool_outputs_overload_1(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "string",
+                run_id="run_id",
                 thread_id="",
-                tool_outputs=[{}, {}, {}],
+                tool_outputs=[{}],
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "",
-                thread_id="string",
-                tool_outputs=[{}, {}, {}],
+                run_id="",
+                thread_id="thread_id",
+                tool_outputs=[{}],
             )
 
     @parametrize
     def test_method_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
         run_stream = client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
             stream=True,
-            tool_outputs=[{}, {}, {}],
+            tool_outputs=[{}],
         )
         run_stream.response.close()
 
     @parametrize
     def test_raw_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
         response = client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
             stream=True,
-            tool_outputs=[{}, {}, {}],
+            tool_outputs=[{}],
         )
 
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -651,10 +483,10 @@ def test_raw_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> No
     @parametrize
     def test_streaming_response_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
         with client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
             stream=True,
-            tool_outputs=[{}, {}, {}],
+            tool_outputs=[{}],
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -668,18 +500,18 @@ def test_streaming_response_submit_tool_outputs_overload_2(self, client: OpenAI)
     def test_path_params_submit_tool_outputs_overload_2(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "string",
+                run_id="run_id",
                 thread_id="",
                 stream=True,
-                tool_outputs=[{}, {}, {}],
+                tool_outputs=[{}],
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "",
-                thread_id="string",
+                run_id="",
+                thread_id="thread_id",
                 stream=True,
-                tool_outputs=[{}, {}, {}],
+                tool_outputs=[{}],
             )
 
 
@@ -689,122 +521,43 @@ class TestAsyncRuns:
     @parametrize
     async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
-            additional_instructions="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
+            additional_instructions="additional_instructions",
             additional_messages=[
                 {
-                    "role": "user",
                     "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
                     "role": "user",
-                    "content": "string",
                     "attachments": [
                         {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
+                            "file_id": "file_id",
+                            "tools": [{"type": "code_interpreter"}],
+                        }
                     ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
+                    "metadata": {"foo": "string"},
+                }
             ],
-            instructions="string",
+            instructions="instructions",
             max_completion_tokens=256,
             max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            reasoning_effort="low",
+            response_format="auto",
             stream=False,
             temperature=1,
             tool_choice="none",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
             truncation_strategy={
                 "type": "auto",
@@ -816,8 +569,8 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
     @parametrize
     async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
         )
 
         assert response.is_closed is True
@@ -828,8 +581,8 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -
     @parametrize
     async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -843,15 +596,15 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncOpe
     async def test_path_params_create_overload_1(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.create(
-                "",
-                assistant_id="string",
+                thread_id="",
+                assistant_id="assistant_id",
             )
 
     @parametrize
     async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
         run_stream = await async_client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
             stream=True,
         )
         await run_stream.response.aclose()
@@ -859,114 +612,35 @@ async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None
     @parametrize
     async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
         run_stream = await async_client.beta.threads.runs.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
             stream=True,
-            additional_instructions="string",
+            include=["step_details.tool_calls[*].file_search.results[*].content"],
+            additional_instructions="additional_instructions",
             additional_messages=[
                 {
-                    "role": "user",
                     "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
-                    "role": "user",
-                    "content": "string",
-                    "attachments": [
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                    ],
-                    "metadata": {},
-                },
-                {
                     "role": "user",
-                    "content": "string",
                     "attachments": [
                         {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
-                        {
-                            "file_id": "string",
-                            "tools": [
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                                {"type": "code_interpreter"},
-                            ],
-                        },
+                            "file_id": "file_id",
+                            "tools": [{"type": "code_interpreter"}],
+                        }
                     ],
-                    "metadata": {},
-                },
+                    "metadata": {"foo": "string"},
+                }
             ],
-            instructions="string",
+            instructions="instructions",
             max_completion_tokens=256,
             max_prompt_tokens=256,
-            metadata={},
-            model="gpt-4-turbo",
-            response_format="none",
+            metadata={"foo": "string"},
+            model="gpt-4o",
+            parallel_tool_calls=True,
+            reasoning_effort="low",
+            response_format="auto",
             temperature=1,
             tool_choice="none",
-            tools=[{"type": "code_interpreter"}, {"type": "code_interpreter"}, {"type": "code_interpreter"}],
+            tools=[{"type": "code_interpreter"}],
             top_p=1,
             truncation_strategy={
                 "type": "auto",
@@ -978,8 +652,8 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
     @parametrize
     async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
             stream=True,
         )
 
@@ -990,8 +664,8 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -
     @parametrize
     async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.create(
-            "string",
-            assistant_id="string",
+            thread_id="thread_id",
+            assistant_id="assistant_id",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -1006,24 +680,24 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe
     async def test_path_params_create_overload_2(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.create(
-                "",
-                assistant_id="string",
+                thread_id="",
+                assistant_id="assistant_id",
                 stream=True,
             )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.retrieve(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.retrieve(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -1034,8 +708,8 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.retrieve(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1049,38 +723,38 @@ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> N
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.retrieve(
-                "string",
+                run_id="run_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.retrieve(
-                "",
-                thread_id="string",
+                run_id="",
+                thread_id="thread_id",
             )
 
     @parametrize
     async def test_method_update(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.update(
-            "string",
-            thread_id="string",
-            metadata={},
+            run_id="run_id",
+            thread_id="thread_id",
+            metadata={"foo": "string"},
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.update(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -1091,8 +765,8 @@ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.update(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1106,29 +780,29 @@ async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> Non
     async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.update(
-                "string",
+                run_id="run_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.update(
-                "",
-                thread_id="string",
+                run_id="",
+                thread_id="thread_id",
             )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.list(
-            "string",
+            thread_id="thread_id",
         )
         assert_matches_type(AsyncCursorPage[Run], run, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.list(
-            "string",
-            after="string",
-            before="string",
+            thread_id="thread_id",
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
@@ -1137,7 +811,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> N
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.list(
-            "string",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -1148,7 +822,7 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.list(
-            "string",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1162,22 +836,22 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
     async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.list(
-                "",
+                thread_id="",
             )
 
     @parametrize
     async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.cancel(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.cancel(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         )
 
         assert response.is_closed is True
@@ -1188,8 +862,8 @@ async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.cancel(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1203,43 +877,35 @@ async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> Non
     async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.cancel(
-                "string",
+                run_id="run_id",
                 thread_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.cancel(
-                "",
-                thread_id="string",
+                run_id="",
+                thread_id="thread_id",
             )
 
     @parametrize
     async def test_method_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
+            run_id="run_id",
+            thread_id="thread_id",
+            tool_outputs=[{}],
         )
         assert_matches_type(Run, run, path=["response"])
 
     @parametrize
     async def test_method_submit_tool_outputs_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
         run = await async_client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
             tool_outputs=[
                 {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
-                {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
-                {
-                    "tool_call_id": "string",
-                    "output": "string",
-                },
+                    "output": "output",
+                    "tool_call_id": "tool_call_id",
+                }
             ],
             stream=False,
         )
@@ -1248,9 +914,9 @@ async def test_method_submit_tool_outputs_with_all_params_overload_1(self, async
     @parametrize
     async def test_raw_response_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
+            run_id="run_id",
+            thread_id="thread_id",
+            tool_outputs=[{}],
         )
 
         assert response.is_closed is True
@@ -1261,9 +927,9 @@ async def test_raw_response_submit_tool_outputs_overload_1(self, async_client: A
     @parametrize
     async def test_streaming_response_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
-            tool_outputs=[{}, {}, {}],
+            run_id="run_id",
+            thread_id="thread_id",
+            tool_outputs=[{}],
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1277,35 +943,35 @@ async def test_streaming_response_submit_tool_outputs_overload_1(self, async_cli
     async def test_path_params_submit_tool_outputs_overload_1(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "string",
+                run_id="run_id",
                 thread_id="",
-                tool_outputs=[{}, {}, {}],
+                tool_outputs=[{}],
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "",
-                thread_id="string",
-                tool_outputs=[{}, {}, {}],
+                run_id="",
+                thread_id="thread_id",
+                tool_outputs=[{}],
             )
 
     @parametrize
     async def test_method_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
         run_stream = await async_client.beta.threads.runs.submit_tool_outputs(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
             stream=True,
-            tool_outputs=[{}, {}, {}],
+            tool_outputs=[{}],
         )
         await run_stream.response.aclose()
 
     @parametrize
     async def test_raw_response_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
             stream=True,
-            tool_outputs=[{}, {}, {}],
+            tool_outputs=[{}],
         )
 
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1315,10 +981,10 @@ async def test_raw_response_submit_tool_outputs_overload_2(self, async_client: A
     @parametrize
     async def test_streaming_response_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
         async with async_client.beta.threads.runs.with_streaming_response.submit_tool_outputs(
-            "string",
-            thread_id="string",
+            run_id="run_id",
+            thread_id="thread_id",
             stream=True,
-            tool_outputs=[{}, {}, {}],
+            tool_outputs=[{}],
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -1332,16 +998,16 @@ async def test_streaming_response_submit_tool_outputs_overload_2(self, async_cli
     async def test_path_params_submit_tool_outputs_overload_2(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `thread_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "string",
+                run_id="run_id",
                 thread_id="",
                 stream=True,
-                tool_outputs=[{}, {}, {}],
+                tool_outputs=[{}],
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `run_id` but received ''"):
             await async_client.beta.threads.runs.with_raw_response.submit_tool_outputs(
-                "",
-                thread_id="string",
+                run_id="",
+                thread_id="thread_id",
                 stream=True,
-                tool_outputs=[{}, {}, {}],
+                tool_outputs=[{}],
             )
diff --git a/tests/api_resources/beta/vector_stores/test_files.py b/tests/api_resources/beta/vector_stores/test_files.py
deleted file mode 100644
index 58301e2d37..0000000000
--- a/tests/api_resources/beta/vector_stores/test_files.py
+++ /dev/null
@@ -1,402 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, cast
-
-import pytest
-
-from openai import OpenAI, AsyncOpenAI
-from tests.utils import assert_matches_type
-from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.vector_stores import (
-    VectorStoreFile,
-    VectorStoreFileDeleted,
-)
-
-base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
-
-
-class TestFiles:
-    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    def test_method_create(self, client: OpenAI) -> None:
-        file = client.beta.vector_stores.files.create(
-            "vs_abc123",
-            file_id="string",
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.files.with_raw_response.create(
-            "vs_abc123",
-            file_id="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_create(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.files.with_streaming_response.create(
-            "vs_abc123",
-            file_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_create(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.create(
-                "",
-                file_id="string",
-            )
-
-    @parametrize
-    def test_method_retrieve(self, client: OpenAI) -> None:
-        file = client.beta.vector_stores.files.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.files.with_raw_response.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.files.with_streaming_response.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_retrieve(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.retrieve(
-                "file-abc123",
-                vector_store_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.retrieve(
-                "",
-                vector_store_id="vs_abc123",
-            )
-
-    @parametrize
-    def test_method_list(self, client: OpenAI) -> None:
-        file = client.beta.vector_stores.files.list(
-            "string",
-        )
-        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
-
-    @parametrize
-    def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        file = client.beta.vector_stores.files.list(
-            "string",
-            after="string",
-            before="string",
-            filter="in_progress",
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
-
-    @parametrize
-    def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.files.with_raw_response.list(
-            "string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_list(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.files.with_streaming_response.list(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_list(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.list(
-                "",
-            )
-
-    @parametrize
-    def test_method_delete(self, client: OpenAI) -> None:
-        file = client.beta.vector_stores.files.delete(
-            "string",
-            vector_store_id="string",
-        )
-        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
-
-    @parametrize
-    def test_raw_response_delete(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.files.with_raw_response.delete(
-            "string",
-            vector_store_id="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
-
-    @parametrize
-    def test_streaming_response_delete(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.files.with_streaming_response.delete(
-            "string",
-            vector_store_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = response.parse()
-            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    def test_path_params_delete(self, client: OpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.delete(
-                "string",
-                vector_store_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            client.beta.vector_stores.files.with_raw_response.delete(
-                "",
-                vector_store_id="string",
-            )
-
-
-class TestAsyncFiles:
-    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
-
-    @parametrize
-    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.vector_stores.files.create(
-            "vs_abc123",
-            file_id="string",
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.files.with_raw_response.create(
-            "vs_abc123",
-            file_id="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.files.with_streaming_response.create(
-            "vs_abc123",
-            file_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.create(
-                "",
-                file_id="string",
-            )
-
-    @parametrize
-    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.vector_stores.files.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
-        )
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.files.with_raw_response.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(VectorStoreFile, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.files.with_streaming_response.retrieve(
-            "file-abc123",
-            vector_store_id="vs_abc123",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(VectorStoreFile, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.retrieve(
-                "file-abc123",
-                vector_store_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.retrieve(
-                "",
-                vector_store_id="vs_abc123",
-            )
-
-    @parametrize
-    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.vector_stores.files.list(
-            "string",
-        )
-        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
-
-    @parametrize
-    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.vector_stores.files.list(
-            "string",
-            after="string",
-            before="string",
-            filter="in_progress",
-            limit=0,
-            order="asc",
-        )
-        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.files.with_raw_response.list(
-            "string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.files.with_streaming_response.list(
-            "string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.list(
-                "",
-            )
-
-    @parametrize
-    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
-        file = await async_client.beta.vector_stores.files.delete(
-            "string",
-            vector_store_id="string",
-        )
-        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
-
-    @parametrize
-    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.files.with_raw_response.delete(
-            "string",
-            vector_store_id="string",
-        )
-
-        assert response.is_closed is True
-        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-        file = response.parse()
-        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
-
-    @parametrize
-    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.files.with_streaming_response.delete(
-            "string",
-            vector_store_id="string",
-        ) as response:
-            assert not response.is_closed
-            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
-
-            file = await response.parse()
-            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
-
-        assert cast(Any, response.is_closed) is True
-
-    @parametrize
-    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.delete(
-                "string",
-                vector_store_id="",
-            )
-
-        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
-            await async_client.beta.vector_stores.files.with_raw_response.delete(
-                "",
-                vector_store_id="string",
-            )
diff --git a/src/openai/types/beta/chat/__init__.py b/tests/api_resources/chat/completions/__init__.py
similarity index 70%
rename from src/openai/types/beta/chat/__init__.py
rename to tests/api_resources/chat/completions/__init__.py
index f8ee8b14b1..fd8019a9a1 100644
--- a/src/openai/types/beta/chat/__init__.py
+++ b/tests/api_resources/chat/completions/__init__.py
@@ -1,3 +1 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
diff --git a/tests/api_resources/chat/completions/test_messages.py b/tests/api_resources/chat/completions/test_messages.py
new file mode 100644
index 0000000000..5caac9ec6c
--- /dev/null
+++ b/tests/api_resources/chat/completions/test_messages.py
@@ -0,0 +1,119 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.chat import ChatCompletionStoreMessage
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestMessages:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        message = client.chat.completions.messages.list(
+            completion_id="completion_id",
+        )
+        assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        message = client.chat.completions.messages.list(
+            completion_id="completion_id",
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.chat.completions.messages.with_raw_response.list(
+            completion_id="completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.chat.completions.messages.with_streaming_response.list(
+            completion_id="completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = response.parse()
+            assert_matches_type(SyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.messages.with_raw_response.list(
+                completion_id="",
+            )
+
+
+class TestAsyncMessages:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        message = await async_client.chat.completions.messages.list(
+            completion_id="completion_id",
+        )
+        assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        message = await async_client.chat.completions.messages.list(
+            completion_id="completion_id",
+            after="after",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.messages.with_raw_response.list(
+            completion_id="completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        message = response.parse()
+        assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.messages.with_streaming_response.list(
+            completion_id="completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            message = await response.parse()
+            assert_matches_type(AsyncCursorPage[ChatCompletionStoreMessage], message, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.messages.with_raw_response.list(
+                completion_id="",
+            )
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 1c195c4001..3c4a9e4a19 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -9,8 +9,10 @@
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
 from openai.types.chat import (
     ChatCompletion,
+    ChatCompletionDeleted,
 )
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
@@ -25,10 +27,10 @@ def test_method_create_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -38,61 +40,73 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
+            reasoning_effort="low",
+            response_format={"type": "text"},
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
             stream=False,
             stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                }
             ],
             top_logprobs=0,
             top_p=1,
             user="user-1234",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
+                    },
+                    "type": "approximate",
+                },
+            },
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -102,10 +116,10 @@ def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
 
         assert response.is_closed is True
@@ -119,10 +133,10 @@ def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -138,10 +152,10 @@ def test_method_create_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         )
         completion_stream.response.close()
@@ -152,61 +166,73 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
+            reasoning_effort="low",
+            response_format={"type": "text"},
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
             stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                }
             ],
             top_logprobs=0,
             top_p=1,
             user="user-1234",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
+                    },
+                    "type": "approximate",
+                },
+            },
         )
         completion_stream.response.close()
 
@@ -216,10 +242,10 @@ def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         )
 
@@ -233,10 +259,10 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -247,6 +273,160 @@ def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
 
         assert cast(Any, response.is_closed) is True
 
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        completion = client.chat.completions.retrieve(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.retrieve(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.retrieve(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        completion = client.chat.completions.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.with_raw_response.update(
+                completion_id="",
+                metadata={"foo": "string"},
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        completion = client.chat.completions.list()
+        assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        completion = client.chat.completions.list(
+            after="after",
+            limit=0,
+            metadata={"foo": "string"},
+            model="model",
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(SyncCursorPage[ChatCompletion], completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        completion = client.chat.completions.delete(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.chat.completions.with_raw_response.delete(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.chat.completions.with_streaming_response.delete(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = response.parse()
+            assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            client.chat.completions.with_raw_response.delete(
+                "",
+            )
+
 
 class TestAsyncCompletions:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
@@ -257,10 +437,10 @@ async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -270,61 +450,73 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
+            reasoning_effort="low",
+            response_format={"type": "text"},
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
             stream=False,
             stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                }
             ],
             top_logprobs=0,
             top_p=1,
             user="user-1234",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
+                    },
+                    "type": "approximate",
+                },
+            },
         )
         assert_matches_type(ChatCompletion, completion, path=["response"])
 
@@ -334,10 +526,10 @@ async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         )
 
         assert response.is_closed is True
@@ -351,10 +543,10 @@ async def test_streaming_response_create_overload_1(self, async_client: AsyncOpe
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -370,10 +562,10 @@ async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         )
         await completion_stream.response.aclose()
@@ -384,61 +576,73 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
-                    "name": "string",
+                    "role": "developer",
+                    "name": "name",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
+            audio={
+                "format": "wav",
+                "voice": "alloy",
+            },
             frequency_penalty=-2,
             function_call="none",
             functions=[
                 {
-                    "description": "string",
-                    "name": "string",
+                    "name": "name",
+                    "description": "description",
                     "parameters": {"foo": "bar"},
                 }
             ],
             logit_bias={"foo": 0},
             logprobs=True,
+            max_completion_tokens=0,
             max_tokens=0,
+            metadata={"foo": "string"},
+            modalities=["text"],
             n=1,
+            parallel_tool_calls=True,
+            prediction={
+                "content": "string",
+                "type": "content",
+            },
             presence_penalty=-2,
-            response_format={"type": "json_object"},
-            seed=-9223372036854776000,
-            stop="string",
+            reasoning_effort="low",
+            response_format={"type": "text"},
+            seed=-9007199254740991,
+            service_tier="auto",
+            stop="\n",
+            store=True,
             stream_options={"include_usage": True},
             temperature=1,
             tool_choice="none",
             tools=[
                 {
-                    "type": "function",
                     "function": {
-                        "description": "string",
-                        "name": "string",
+                        "name": "name",
+                        "description": "description",
                         "parameters": {"foo": "bar"},
+                        "strict": True,
                     },
-                },
-                {
                     "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
-                {
-                    "type": "function",
-                    "function": {
-                        "description": "string",
-                        "name": "string",
-                        "parameters": {"foo": "bar"},
-                    },
-                },
+                }
             ],
             top_logprobs=0,
             top_p=1,
             user="user-1234",
+            web_search_options={
+                "search_context_size": "low",
+                "user_location": {
+                    "approximate": {
+                        "city": "city",
+                        "country": "country",
+                        "region": "region",
+                        "timezone": "timezone",
+                    },
+                    "type": "approximate",
+                },
+            },
         )
         await completion_stream.response.aclose()
 
@@ -448,10 +652,10 @@ async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         )
 
@@ -465,10 +669,10 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe
             messages=[
                 {
                     "content": "string",
-                    "role": "system",
+                    "role": "developer",
                 }
             ],
-            model="gpt-4-turbo",
+            model="gpt-4o",
             stream=True,
         ) as response:
             assert not response.is_closed
@@ -478,3 +682,157 @@ async def test_streaming_response_create_overload_2(self, async_client: AsyncOpe
             await stream.close()
 
         assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.retrieve(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.retrieve(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.retrieve(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.with_raw_response.retrieve(
+                "",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        )
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletion, completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.update(
+            completion_id="completion_id",
+            metadata={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(ChatCompletion, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.with_raw_response.update(
+                completion_id="",
+                metadata={"foo": "string"},
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.list()
+        assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.list(
+            after="after",
+            limit=0,
+            metadata={"foo": "string"},
+            model="model",
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.list()
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.list() as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(AsyncCursorPage[ChatCompletion], completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        completion = await async_client.chat.completions.delete(
+            "completion_id",
+        )
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.chat.completions.with_raw_response.delete(
+            "completion_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        completion = response.parse()
+        assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.chat.completions.with_streaming_response.delete(
+            "completion_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            completion = await response.parse()
+            assert_matches_type(ChatCompletionDeleted, completion, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `completion_id` but received ''"):
+            await async_client.chat.completions.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/fine_tuning/jobs/test_checkpoints.py b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
index 915d5c6f63..e65f84c818 100644
--- a/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
+++ b/tests/api_resources/fine_tuning/jobs/test_checkpoints.py
@@ -21,15 +21,15 @@ class TestCheckpoints:
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         checkpoint = client.fine_tuning.jobs.checkpoints.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
         assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         checkpoint = client.fine_tuning.jobs.checkpoints.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            after="string",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
             limit=0,
         )
         assert_matches_type(SyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
@@ -37,7 +37,7 @@ def test_method_list_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.checkpoints.with_raw_response.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
 
         assert response.is_closed is True
@@ -48,7 +48,7 @@ def test_raw_response_list(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
         with client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -62,7 +62,7 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
     def test_path_params_list(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
             client.fine_tuning.jobs.checkpoints.with_raw_response.list(
-                "",
+                fine_tuning_job_id="",
             )
 
 
@@ -72,15 +72,15 @@ class TestAsyncCheckpoints:
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
         checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
         assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         checkpoint = await async_client.fine_tuning.jobs.checkpoints.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            after="string",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
             limit=0,
         )
         assert_matches_type(AsyncCursorPage[FineTuningJobCheckpoint], checkpoint, path=["response"])
@@ -88,7 +88,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> N
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
 
         assert response.is_closed is True
@@ -99,7 +99,7 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
         async with async_client.fine_tuning.jobs.checkpoints.with_streaming_response.list(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -113,5 +113,5 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
     async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
             await async_client.fine_tuning.jobs.checkpoints.with_raw_response.list(
-                "",
+                fine_tuning_job_id="",
             )
diff --git a/tests/api_resources/fine_tuning/test_jobs.py b/tests/api_resources/fine_tuning/test_jobs.py
index 1ff6d63b31..342a70dfd8 100644
--- a/tests/api_resources/fine_tuning/test_jobs.py
+++ b/tests/api_resources/fine_tuning/test_jobs.py
@@ -24,7 +24,7 @@ class TestJobs:
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
         assert_matches_type(FineTuningJob, job, path=["response"])
@@ -32,7 +32,7 @@ def test_method_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
             hyperparameters={
                 "batch_size": "auto",
@@ -44,30 +44,31 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
                     "type": "wandb",
                     "wandb": {
                         "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                        "entity": "entity",
+                        "name": "name",
+                        "tags": ["custom-tag"],
                     },
+                }
+            ],
+            metadata={"foo": "string"},
+            method={
+                "dpo": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "beta": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
                 },
-                {
-                    "type": "wandb",
-                    "wandb": {
-                        "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
-                    },
+                "supervised": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
                 },
-                {
-                    "type": "wandb",
-                    "wandb": {
-                        "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
-                    },
-                },
-            ],
+                "type": "supervised",
+            },
             seed=42,
             suffix="x",
             validation_file="file-abc123",
@@ -77,7 +78,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.with_raw_response.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
 
@@ -89,7 +90,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.fine_tuning.jobs.with_streaming_response.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         ) as response:
             assert not response.is_closed
@@ -146,8 +147,9 @@ def test_method_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.list(
-            after="string",
+            after="after",
             limit=0,
+            metadata={"foo": "string"},
         )
         assert_matches_type(SyncCursorPage[FineTuningJob], job, path=["response"])
 
@@ -212,15 +214,15 @@ def test_path_params_cancel(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list_events(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
         assert_matches_type(SyncCursorPage[FineTuningJobEvent], job, path=["response"])
 
     @parametrize
     def test_method_list_events_with_all_params(self, client: OpenAI) -> None:
         job = client.fine_tuning.jobs.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            after="string",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
             limit=0,
         )
         assert_matches_type(SyncCursorPage[FineTuningJobEvent], job, path=["response"])
@@ -228,7 +230,7 @@ def test_method_list_events_with_all_params(self, client: OpenAI) -> None:
     @parametrize
     def test_raw_response_list_events(self, client: OpenAI) -> None:
         response = client.fine_tuning.jobs.with_raw_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
 
         assert response.is_closed is True
@@ -239,7 +241,7 @@ def test_raw_response_list_events(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_list_events(self, client: OpenAI) -> None:
         with client.fine_tuning.jobs.with_streaming_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -253,7 +255,7 @@ def test_streaming_response_list_events(self, client: OpenAI) -> None:
     def test_path_params_list_events(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
             client.fine_tuning.jobs.with_raw_response.list_events(
-                "",
+                fine_tuning_job_id="",
             )
 
 
@@ -263,7 +265,7 @@ class TestAsyncJobs:
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         job = await async_client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
         assert_matches_type(FineTuningJob, job, path=["response"])
@@ -271,7 +273,7 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         job = await async_client.fine_tuning.jobs.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
             hyperparameters={
                 "batch_size": "auto",
@@ -283,30 +285,31 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
                     "type": "wandb",
                     "wandb": {
                         "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
+                        "entity": "entity",
+                        "name": "name",
+                        "tags": ["custom-tag"],
                     },
+                }
+            ],
+            metadata={"foo": "string"},
+            method={
+                "dpo": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "beta": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
                 },
-                {
-                    "type": "wandb",
-                    "wandb": {
-                        "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
-                    },
+                "supervised": {
+                    "hyperparameters": {
+                        "batch_size": "auto",
+                        "learning_rate_multiplier": "auto",
+                        "n_epochs": "auto",
+                    }
                 },
-                {
-                    "type": "wandb",
-                    "wandb": {
-                        "project": "my-wandb-project",
-                        "name": "string",
-                        "entity": "string",
-                        "tags": ["custom-tag", "custom-tag", "custom-tag"],
-                    },
-                },
-            ],
+                "type": "supervised",
+            },
             seed=42,
             suffix="x",
             validation_file="file-abc123",
@@ -316,7 +319,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.fine_tuning.jobs.with_raw_response.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         )
 
@@ -328,7 +331,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.fine_tuning.jobs.with_streaming_response.create(
-            model="gpt-3.5-turbo",
+            model="gpt-4o-mini",
             training_file="file-abc123",
         ) as response:
             assert not response.is_closed
@@ -385,8 +388,9 @@ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         job = await async_client.fine_tuning.jobs.list(
-            after="string",
+            after="after",
             limit=0,
+            metadata={"foo": "string"},
         )
         assert_matches_type(AsyncCursorPage[FineTuningJob], job, path=["response"])
 
@@ -451,15 +455,15 @@ async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_list_events(self, async_client: AsyncOpenAI) -> None:
         job = await async_client.fine_tuning.jobs.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
         assert_matches_type(AsyncCursorPage[FineTuningJobEvent], job, path=["response"])
 
     @parametrize
     async def test_method_list_events_with_all_params(self, async_client: AsyncOpenAI) -> None:
         job = await async_client.fine_tuning.jobs.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
-            after="string",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            after="after",
             limit=0,
         )
         assert_matches_type(AsyncCursorPage[FineTuningJobEvent], job, path=["response"])
@@ -467,7 +471,7 @@ async def test_method_list_events_with_all_params(self, async_client: AsyncOpenA
     @parametrize
     async def test_raw_response_list_events(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.fine_tuning.jobs.with_raw_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         )
 
         assert response.is_closed is True
@@ -478,7 +482,7 @@ async def test_raw_response_list_events(self, async_client: AsyncOpenAI) -> None
     @parametrize
     async def test_streaming_response_list_events(self, async_client: AsyncOpenAI) -> None:
         async with async_client.fine_tuning.jobs.with_streaming_response.list_events(
-            "ft-AF1WoRqd3aJAHsqc9NY7iL8F",
+            fine_tuning_job_id="ft-AF1WoRqd3aJAHsqc9NY7iL8F",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -492,5 +496,5 @@ async def test_streaming_response_list_events(self, async_client: AsyncOpenAI) -
     async def test_path_params_list_events(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `fine_tuning_job_id` but received ''"):
             await async_client.fine_tuning.jobs.with_raw_response.list_events(
-                "",
+                fine_tuning_job_id="",
             )
diff --git a/tests/api_resources/responses/__init__.py b/tests/api_resources/responses/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/responses/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/responses/test_input_items.py b/tests/api_resources/responses/test_input_items.py
new file mode 100644
index 0000000000..28c5e8ca1f
--- /dev/null
+++ b/tests/api_resources/responses/test_input_items.py
@@ -0,0 +1,121 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncCursorPage, AsyncCursorPage
+from openai.types.responses.response_item_list import Data
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestInputItems:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        input_item = client.responses.input_items.list(
+            response_id="response_id",
+        )
+        assert_matches_type(SyncCursorPage[Data], input_item, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        input_item = client.responses.input_items.list(
+            response_id="response_id",
+            after="after",
+            before="before",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[Data], input_item, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.responses.input_items.with_raw_response.list(
+            response_id="response_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        input_item = response.parse()
+        assert_matches_type(SyncCursorPage[Data], input_item, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.responses.input_items.with_streaming_response.list(
+            response_id="response_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            input_item = response.parse()
+            assert_matches_type(SyncCursorPage[Data], input_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.input_items.with_raw_response.list(
+                response_id="",
+            )
+
+
+class TestAsyncInputItems:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        input_item = await async_client.responses.input_items.list(
+            response_id="response_id",
+        )
+        assert_matches_type(AsyncCursorPage[Data], input_item, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        input_item = await async_client.responses.input_items.list(
+            response_id="response_id",
+            after="after",
+            before="before",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[Data], input_item, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.input_items.with_raw_response.list(
+            response_id="response_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        input_item = response.parse()
+        assert_matches_type(AsyncCursorPage[Data], input_item, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.input_items.with_streaming_response.list(
+            response_id="response_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            input_item = await response.parse()
+            assert_matches_type(AsyncCursorPage[Data], input_item, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.input_items.with_raw_response.list(
+                response_id="",
+            )
diff --git a/tests/api_resources/test_batches.py b/tests/api_resources/test_batches.py
index 6f9b598e61..02eade0963 100644
--- a/tests/api_resources/test_batches.py
+++ b/tests/api_resources/test_batches.py
@@ -22,8 +22,8 @@ class TestBatches:
     def test_method_create(self, client: OpenAI) -> None:
         batch = client.batches.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
@@ -31,8 +31,8 @@ def test_method_create(self, client: OpenAI) -> None:
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         batch = client.batches.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
             metadata={"foo": "string"},
         )
         assert_matches_type(Batch, batch, path=["response"])
@@ -41,8 +41,8 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
     def test_raw_response_create(self, client: OpenAI) -> None:
         response = client.batches.with_raw_response.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         )
 
         assert response.is_closed is True
@@ -54,8 +54,8 @@ def test_raw_response_create(self, client: OpenAI) -> None:
     def test_streaming_response_create(self, client: OpenAI) -> None:
         with client.batches.with_streaming_response.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -68,14 +68,14 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         batch = client.batches.retrieve(
-            "string",
+            "batch_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.batches.with_raw_response.retrieve(
-            "string",
+            "batch_id",
         )
 
         assert response.is_closed is True
@@ -86,7 +86,7 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.batches.with_streaming_response.retrieve(
-            "string",
+            "batch_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -111,7 +111,7 @@ def test_method_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         batch = client.batches.list(
-            after="string",
+            after="after",
             limit=0,
         )
         assert_matches_type(SyncCursorPage[Batch], batch, path=["response"])
@@ -139,14 +139,14 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_cancel(self, client: OpenAI) -> None:
         batch = client.batches.cancel(
-            "string",
+            "batch_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
     @parametrize
     def test_raw_response_cancel(self, client: OpenAI) -> None:
         response = client.batches.with_raw_response.cancel(
-            "string",
+            "batch_id",
         )
 
         assert response.is_closed is True
@@ -157,7 +157,7 @@ def test_raw_response_cancel(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_cancel(self, client: OpenAI) -> None:
         with client.batches.with_streaming_response.cancel(
-            "string",
+            "batch_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -182,8 +182,8 @@ class TestAsyncBatches:
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
         batch = await async_client.batches.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
@@ -191,8 +191,8 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         batch = await async_client.batches.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
             metadata={"foo": "string"},
         )
         assert_matches_type(Batch, batch, path=["response"])
@@ -201,8 +201,8 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.batches.with_raw_response.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         )
 
         assert response.is_closed is True
@@ -214,8 +214,8 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
         async with async_client.batches.with_streaming_response.create(
             completion_window="24h",
-            endpoint="/v1/chat/completions",
-            input_file_id="string",
+            endpoint="/v1/responses",
+            input_file_id="input_file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -228,14 +228,14 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         batch = await async_client.batches.retrieve(
-            "string",
+            "batch_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.batches.with_raw_response.retrieve(
-            "string",
+            "batch_id",
         )
 
         assert response.is_closed is True
@@ -246,7 +246,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.batches.with_streaming_response.retrieve(
-            "string",
+            "batch_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -271,7 +271,7 @@ async def test_method_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         batch = await async_client.batches.list(
-            after="string",
+            after="after",
             limit=0,
         )
         assert_matches_type(AsyncCursorPage[Batch], batch, path=["response"])
@@ -299,14 +299,14 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
         batch = await async_client.batches.cancel(
-            "string",
+            "batch_id",
         )
         assert_matches_type(Batch, batch, path=["response"])
 
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.batches.with_raw_response.cancel(
-            "string",
+            "batch_id",
         )
 
         assert response.is_closed is True
@@ -317,7 +317,7 @@ async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
         async with async_client.batches.with_streaming_response.cancel(
-            "string",
+            "batch_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py
index 69d914200f..9ec503c1e3 100644
--- a/tests/api_resources/test_completions.py
+++ b/tests/api_resources/test_completions.py
@@ -38,7 +38,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
             stream=False,
             stream_options={"include_usage": True},
@@ -98,7 +98,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
             stream_options={"include_usage": True},
             suffix="test.",
@@ -160,7 +160,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
             stream=False,
             stream_options={"include_usage": True},
@@ -220,7 +220,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9223372036854776000,
+            seed=0,
             stop="\n",
             stream_options={"include_usage": True},
             suffix="test.",
diff --git a/tests/api_resources/test_files.py b/tests/api_resources/test_files.py
index 882f0ddbe7..62d2b88ebf 100644
--- a/tests/api_resources/test_files.py
+++ b/tests/api_resources/test_files.py
@@ -13,7 +13,7 @@
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.types import FileObject, FileDeleted
-from openai.pagination import SyncPage, AsyncPage
+from openai.pagination import SyncCursorPage, AsyncCursorPage
 
 # pyright: reportDeprecated=false
 
@@ -60,14 +60,14 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         file = client.files.retrieve(
-            "string",
+            "file_id",
         )
         assert_matches_type(FileObject, file, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.files.with_raw_response.retrieve(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -78,7 +78,7 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.files.with_streaming_response.retrieve(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -98,14 +98,17 @@ def test_path_params_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
         file = client.files.list()
-        assert_matches_type(SyncPage[FileObject], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
         file = client.files.list(
-            purpose="string",
+            after="after",
+            limit=0,
+            order="asc",
+            purpose="purpose",
         )
-        assert_matches_type(SyncPage[FileObject], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
@@ -114,7 +117,7 @@ def test_raw_response_list(self, client: OpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(SyncPage[FileObject], file, path=["response"])
+        assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
@@ -123,21 +126,21 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = response.parse()
-            assert_matches_type(SyncPage[FileObject], file, path=["response"])
+            assert_matches_type(SyncCursorPage[FileObject], file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         file = client.files.delete(
-            "string",
+            "file_id",
         )
         assert_matches_type(FileDeleted, file, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
         response = client.files.with_raw_response.delete(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -148,7 +151,7 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
         with client.files.with_streaming_response.delete(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -168,9 +171,9 @@ def test_path_params_delete(self, client: OpenAI) -> None:
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_method_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         file = client.files.content(
-            "string",
+            "file_id",
         )
         assert isinstance(file, _legacy_response.HttpxBinaryResponseContent)
         assert file.json() == {"foo": "bar"}
@@ -178,10 +181,10 @@ def test_method_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_raw_response_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = client.files.with_raw_response.content(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -192,9 +195,9 @@ def test_raw_response_content(self, client: OpenAI, respx_mock: MockRouter) -> N
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     def test_streaming_response_content(self, client: OpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         with client.files.with_streaming_response.content(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -216,7 +219,7 @@ def test_path_params_content(self, client: OpenAI) -> None:
     def test_method_retrieve_content(self, client: OpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             file = client.files.retrieve_content(
-                "string",
+                "file_id",
             )
 
         assert_matches_type(str, file, path=["response"])
@@ -225,7 +228,7 @@ def test_method_retrieve_content(self, client: OpenAI) -> None:
     def test_raw_response_retrieve_content(self, client: OpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             response = client.files.with_raw_response.retrieve_content(
-                "string",
+                "file_id",
             )
 
         assert response.is_closed is True
@@ -237,7 +240,7 @@ def test_raw_response_retrieve_content(self, client: OpenAI) -> None:
     def test_streaming_response_retrieve_content(self, client: OpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             with client.files.with_streaming_response.retrieve_content(
-                "string",
+                "file_id",
             ) as response:
                 assert not response.is_closed
                 assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -296,14 +299,14 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         file = await async_client.files.retrieve(
-            "string",
+            "file_id",
         )
         assert_matches_type(FileObject, file, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.files.with_raw_response.retrieve(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -314,7 +317,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.files.with_streaming_response.retrieve(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -334,14 +337,17 @@ async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
         file = await async_client.files.list()
-        assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
         file = await async_client.files.list(
-            purpose="string",
+            after="after",
+            limit=0,
+            order="asc",
+            purpose="purpose",
         )
-        assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
@@ -350,7 +356,7 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
         file = response.parse()
-        assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+        assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
@@ -359,21 +365,21 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
             file = await response.parse()
-            assert_matches_type(AsyncPage[FileObject], file, path=["response"])
+            assert_matches_type(AsyncCursorPage[FileObject], file, path=["response"])
 
         assert cast(Any, response.is_closed) is True
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
         file = await async_client.files.delete(
-            "string",
+            "file_id",
         )
         assert_matches_type(FileDeleted, file, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.files.with_raw_response.delete(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -384,7 +390,7 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
         async with async_client.files.with_streaming_response.delete(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -404,9 +410,9 @@ async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_method_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         file = await async_client.files.content(
-            "string",
+            "file_id",
         )
         assert isinstance(file, _legacy_response.HttpxBinaryResponseContent)
         assert file.json() == {"foo": "bar"}
@@ -414,10 +420,10 @@ async def test_method_content(self, async_client: AsyncOpenAI, respx_mock: MockR
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_raw_response_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
 
         response = await async_client.files.with_raw_response.content(
-            "string",
+            "file_id",
         )
 
         assert response.is_closed is True
@@ -428,9 +434,9 @@ async def test_raw_response_content(self, async_client: AsyncOpenAI, respx_mock:
     @parametrize
     @pytest.mark.respx(base_url=base_url)
     async def test_streaming_response_content(self, async_client: AsyncOpenAI, respx_mock: MockRouter) -> None:
-        respx_mock.get("/files/string/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
+        respx_mock.get("/files/file_id/content").mock(return_value=httpx.Response(200, json={"foo": "bar"}))
         async with async_client.files.with_streaming_response.content(
-            "string",
+            "file_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -452,7 +458,7 @@ async def test_path_params_content(self, async_client: AsyncOpenAI) -> None:
     async def test_method_retrieve_content(self, async_client: AsyncOpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             file = await async_client.files.retrieve_content(
-                "string",
+                "file_id",
             )
 
         assert_matches_type(str, file, path=["response"])
@@ -461,7 +467,7 @@ async def test_method_retrieve_content(self, async_client: AsyncOpenAI) -> None:
     async def test_raw_response_retrieve_content(self, async_client: AsyncOpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             response = await async_client.files.with_raw_response.retrieve_content(
-                "string",
+                "file_id",
             )
 
         assert response.is_closed is True
@@ -473,7 +479,7 @@ async def test_raw_response_retrieve_content(self, async_client: AsyncOpenAI) ->
     async def test_streaming_response_retrieve_content(self, async_client: AsyncOpenAI) -> None:
         with pytest.warns(DeprecationWarning):
             async with async_client.files.with_streaming_response.retrieve_content(
-                "string",
+                "file_id",
             ) as response:
                 assert not response.is_closed
                 assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_images.py b/tests/api_resources/test_images.py
index 2e31f3354a..9bc9719bc5 100644
--- a/tests/api_resources/test_images.py
+++ b/tests/api_resources/test_images.py
@@ -31,7 +31,7 @@ def test_method_create_variation_with_all_params(self, client: OpenAI) -> None:
             model="dall-e-2",
             n=1,
             response_format="url",
-            size="1024x1024",
+            size="256x256",
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
@@ -77,7 +77,7 @@ def test_method_edit_with_all_params(self, client: OpenAI) -> None:
             model="dall-e-2",
             n=1,
             response_format="url",
-            size="1024x1024",
+            size="256x256",
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
@@ -123,7 +123,7 @@ def test_method_generate_with_all_params(self, client: OpenAI) -> None:
             n=1,
             quality="standard",
             response_format="url",
-            size="1024x1024",
+            size="256x256",
             style="vivid",
             user="user-1234",
         )
@@ -171,7 +171,7 @@ async def test_method_create_variation_with_all_params(self, async_client: Async
             model="dall-e-2",
             n=1,
             response_format="url",
-            size="1024x1024",
+            size="256x256",
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
@@ -217,7 +217,7 @@ async def test_method_edit_with_all_params(self, async_client: AsyncOpenAI) -> N
             model="dall-e-2",
             n=1,
             response_format="url",
-            size="1024x1024",
+            size="256x256",
             user="user-1234",
         )
         assert_matches_type(ImagesResponse, image, path=["response"])
@@ -263,7 +263,7 @@ async def test_method_generate_with_all_params(self, async_client: AsyncOpenAI)
             n=1,
             quality="standard",
             response_format="url",
-            size="1024x1024",
+            size="256x256",
             style="vivid",
             user="user-1234",
         )
diff --git a/tests/api_resources/test_models.py b/tests/api_resources/test_models.py
index 71f8e5834b..8791507c3e 100644
--- a/tests/api_resources/test_models.py
+++ b/tests/api_resources/test_models.py
@@ -21,14 +21,14 @@ class TestModels:
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
         model = client.models.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         )
         assert_matches_type(Model, model, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
         response = client.models.with_raw_response.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         )
 
         assert response.is_closed is True
@@ -39,7 +39,7 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
         with client.models.with_streaming_response.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -84,14 +84,14 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
         model = client.models.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
         assert_matches_type(ModelDeleted, model, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
         response = client.models.with_raw_response.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
 
         assert response.is_closed is True
@@ -102,7 +102,7 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
         with client.models.with_streaming_response.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -126,14 +126,14 @@ class TestAsyncModels:
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
         model = await async_client.models.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         )
         assert_matches_type(Model, model, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.models.with_raw_response.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         )
 
         assert response.is_closed is True
@@ -144,7 +144,7 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
         async with async_client.models.with_streaming_response.retrieve(
-            "gpt-3.5-turbo",
+            "gpt-4o-mini",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -189,14 +189,14 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
         model = await async_client.models.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
         assert_matches_type(ModelDeleted, model, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
         response = await async_client.models.with_raw_response.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         )
 
         assert response.is_closed is True
@@ -207,7 +207,7 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
         async with async_client.models.with_streaming_response.delete(
-            "ft:gpt-3.5-turbo:acemeco:suffix:abc123",
+            "ft:gpt-4o-mini:acemeco:suffix:abc123",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
diff --git a/tests/api_resources/test_moderations.py b/tests/api_resources/test_moderations.py
index 94b9ecd31b..bbdeb63e49 100644
--- a/tests/api_resources/test_moderations.py
+++ b/tests/api_resources/test_moderations.py
@@ -28,7 +28,7 @@ def test_method_create(self, client: OpenAI) -> None:
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         moderation = client.moderations.create(
             input="I want to kill them.",
-            model="text-moderation-stable",
+            model="omni-moderation-2024-09-26",
         )
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
 
@@ -71,7 +71,7 @@ async def test_method_create(self, async_client: AsyncOpenAI) -> None:
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         moderation = await async_client.moderations.create(
             input="I want to kill them.",
-            model="text-moderation-stable",
+            model="omni-moderation-2024-09-26",
         )
         assert_matches_type(ModerationCreateResponse, moderation, path=["response"])
 
diff --git a/tests/api_resources/test_responses.py b/tests/api_resources/test_responses.py
new file mode 100644
index 0000000000..e45a5becf3
--- /dev/null
+++ b/tests/api_resources/test_responses.py
@@ -0,0 +1,498 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.responses import Response
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestResponses:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create_overload_1(self, client: OpenAI) -> None:
+        response = client.responses.create(
+            input="string",
+            model="gpt-4o",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
+        response = client.responses.create(
+            input="string",
+            model="gpt-4o",
+            include=["file_search_call.results"],
+            instructions="instructions",
+            max_output_tokens=0,
+            metadata={"foo": "string"},
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            reasoning={
+                "effort": "low",
+                "generate_summary": "concise",
+            },
+            store=True,
+            stream=False,
+            temperature=1,
+            text={"format": {"type": "text"}},
+            tool_choice="none",
+            tools=[
+                {
+                    "type": "file_search",
+                    "vector_store_ids": ["string"],
+                    "filters": {
+                        "key": "key",
+                        "type": "eq",
+                        "value": "string",
+                    },
+                    "max_num_results": 0,
+                    "ranking_options": {
+                        "ranker": "auto",
+                        "score_threshold": 0,
+                    },
+                }
+            ],
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_raw_response_create_overload_1(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.create(
+            input="string",
+            model="gpt-4o",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create_overload_1(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.create(
+            input="string",
+            model="gpt-4o",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    def test_method_create_overload_2(self, client: OpenAI) -> None:
+        response_stream = client.responses.create(
+            input="string",
+            model="gpt-4o",
+            stream=True,
+        )
+        response_stream.response.close()
+
+    @parametrize
+    def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
+        response_stream = client.responses.create(
+            input="string",
+            model="gpt-4o",
+            stream=True,
+            include=["file_search_call.results"],
+            instructions="instructions",
+            max_output_tokens=0,
+            metadata={"foo": "string"},
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            reasoning={
+                "effort": "low",
+                "generate_summary": "concise",
+            },
+            store=True,
+            temperature=1,
+            text={"format": {"type": "text"}},
+            tool_choice="none",
+            tools=[
+                {
+                    "type": "file_search",
+                    "vector_store_ids": ["string"],
+                    "filters": {
+                        "key": "key",
+                        "type": "eq",
+                        "value": "string",
+                    },
+                    "max_num_results": 0,
+                    "ranking_options": {
+                        "ranker": "auto",
+                        "score_threshold": 0,
+                    },
+                }
+            ],
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        response_stream.response.close()
+
+    @parametrize
+    def test_raw_response_create_overload_2(self, client: OpenAI) -> None:
+        response = client.responses.with_raw_response.create(
+            input="string",
+            model="gpt-4o",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        stream.close()
+
+    @parametrize
+    def test_streaming_response_create_overload_2(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.create(
+            input="string",
+            model="gpt-4o",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = response.parse()
+            stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        response = client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_method_retrieve_with_all_params(self, client: OpenAI) -> None:
+        response = client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            include=["file_search_call.results"],
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.with_raw_response.retrieve(
+                response_id="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        response = client.responses.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert response is None
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        http_response = client.responses.with_raw_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert response is None
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.responses.with_streaming_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = http_response.parse()
+            assert response is None
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            client.responses.with_raw_response.delete(
+                "",
+            )
+
+
+class TestAsyncResponses:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.create(
+            input="string",
+            model="gpt-4o",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_1(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.create(
+            input="string",
+            model="gpt-4o",
+            include=["file_search_call.results"],
+            instructions="instructions",
+            max_output_tokens=0,
+            metadata={"foo": "string"},
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            reasoning={
+                "effort": "low",
+                "generate_summary": "concise",
+            },
+            store=True,
+            stream=False,
+            temperature=1,
+            text={"format": {"type": "text"}},
+            tool_choice="none",
+            tools=[
+                {
+                    "type": "file_search",
+                    "vector_store_ids": ["string"],
+                    "filters": {
+                        "key": "key",
+                        "type": "eq",
+                        "value": "string",
+                    },
+                    "max_num_results": 0,
+                    "ranking_options": {
+                        "ranker": "auto",
+                        "score_threshold": 0,
+                    },
+                }
+            ],
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.create(
+            input="string",
+            model="gpt-4o",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create_overload_1(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.create(
+            input="string",
+            model="gpt-4o",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    async def test_method_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response_stream = await async_client.responses.create(
+            input="string",
+            model="gpt-4o",
+            stream=True,
+        )
+        await response_stream.response.aclose()
+
+    @parametrize
+    async def test_method_create_with_all_params_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response_stream = await async_client.responses.create(
+            input="string",
+            model="gpt-4o",
+            stream=True,
+            include=["file_search_call.results"],
+            instructions="instructions",
+            max_output_tokens=0,
+            metadata={"foo": "string"},
+            parallel_tool_calls=True,
+            previous_response_id="previous_response_id",
+            reasoning={
+                "effort": "low",
+                "generate_summary": "concise",
+            },
+            store=True,
+            temperature=1,
+            text={"format": {"type": "text"}},
+            tool_choice="none",
+            tools=[
+                {
+                    "type": "file_search",
+                    "vector_store_ids": ["string"],
+                    "filters": {
+                        "key": "key",
+                        "type": "eq",
+                        "value": "string",
+                    },
+                    "max_num_results": 0,
+                    "ranking_options": {
+                        "ranker": "auto",
+                        "score_threshold": 0,
+                    },
+                }
+            ],
+            top_p=1,
+            truncation="auto",
+            user="user-1234",
+        )
+        await response_stream.response.aclose()
+
+    @parametrize
+    async def test_raw_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.with_raw_response.create(
+            input="string",
+            model="gpt-4o",
+            stream=True,
+        )
+
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        stream = response.parse()
+        await stream.close()
+
+    @parametrize
+    async def test_streaming_response_create_overload_2(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.create(
+            input="string",
+            model="gpt-4o",
+            stream=True,
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            stream = await response.parse()
+            await stream.close()
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_method_retrieve_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+            include=["file_search_call.results"],
+        )
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert_matches_type(Response, response, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.retrieve(
+            response_id="resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert_matches_type(Response, response, path=["response"])
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.with_raw_response.retrieve(
+                response_id="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.responses.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+        assert response is None
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        http_response = await async_client.responses.with_raw_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        )
+
+        assert http_response.is_closed is True
+        assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+        response = http_response.parse()
+        assert response is None
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.responses.with_streaming_response.delete(
+            "resp_677efb5139a88190b512bc3fef8e535d",
+        ) as http_response:
+            assert not http_response.is_closed
+            assert http_response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            response = await http_response.parse()
+            assert response is None
+
+        assert cast(Any, http_response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `response_id` but received ''"):
+            await async_client.responses.with_raw_response.delete(
+                "",
+            )
diff --git a/tests/api_resources/test_uploads.py b/tests/api_resources/test_uploads.py
new file mode 100644
index 0000000000..a14c4f8da2
--- /dev/null
+++ b/tests/api_resources/test_uploads.py
@@ -0,0 +1,280 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types import Upload
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestUploads:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        upload = client.uploads.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.uploads.with_raw_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.uploads.with_streaming_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_method_cancel(self, client: OpenAI) -> None:
+        upload = client.uploads.cancel(
+            "upload_abc123",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_raw_response_cancel(self, client: OpenAI) -> None:
+        response = client.uploads.with_raw_response.cancel(
+            "upload_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_streaming_response_cancel(self, client: OpenAI) -> None:
+        with client.uploads.with_streaming_response.cancel(
+            "upload_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_cancel(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            client.uploads.with_raw_response.cancel(
+                "",
+            )
+
+    @parametrize
+    def test_method_complete(self, client: OpenAI) -> None:
+        upload = client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_method_complete_with_all_params(self, client: OpenAI) -> None:
+        upload = client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+            md5="md5",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_raw_response_complete(self, client: OpenAI) -> None:
+        response = client.uploads.with_raw_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    def test_streaming_response_complete(self, client: OpenAI) -> None:
+        with client.uploads.with_streaming_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_complete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            client.uploads.with_raw_response.complete(
+                upload_id="",
+                part_ids=["string"],
+            )
+
+
+class TestAsyncUploads:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.with_raw_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.with_streaming_response.create(
+            bytes=0,
+            filename="filename",
+            mime_type="mime_type",
+            purpose="assistants",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = await response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.cancel(
+            "upload_abc123",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.with_raw_response.cancel(
+            "upload_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.with_streaming_response.cancel(
+            "upload_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = await response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            await async_client.uploads.with_raw_response.cancel(
+                "",
+            )
+
+    @parametrize
+    async def test_method_complete(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_method_complete_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        upload = await async_client.uploads.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+            md5="md5",
+        )
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_raw_response_complete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.with_raw_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        upload = response.parse()
+        assert_matches_type(Upload, upload, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_complete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.with_streaming_response.complete(
+            upload_id="upload_abc123",
+            part_ids=["string"],
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            upload = await response.parse()
+            assert_matches_type(Upload, upload, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_complete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            await async_client.uploads.with_raw_response.complete(
+                upload_id="",
+                part_ids=["string"],
+            )
diff --git a/tests/api_resources/beta/test_vector_stores.py b/tests/api_resources/test_vector_stores.py
similarity index 58%
rename from tests/api_resources/beta/test_vector_stores.py
rename to tests/api_resources/test_vector_stores.py
index e671c96a45..54bb75bc1d 100644
--- a/tests/api_resources/beta/test_vector_stores.py
+++ b/tests/api_resources/test_vector_stores.py
@@ -9,11 +9,12 @@
 
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
-from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta import (
+from openai.types import (
     VectorStore,
     VectorStoreDeleted,
+    VectorStoreSearchResponse,
 )
+from openai.pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
 
@@ -23,25 +24,26 @@ class TestVectorStores:
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.create()
+        vector_store = client.vector_stores.create()
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.create(
+        vector_store = client.vector_stores.create(
+            chunking_strategy={"type": "auto"},
             expires_after={
                 "anchor": "last_active_at",
                 "days": 1,
             },
-            file_ids=["string", "string", "string"],
-            metadata={},
-            name="string",
+            file_ids=["string"],
+            metadata={"foo": "string"},
+            name="name",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.with_raw_response.create()
+        response = client.vector_stores.with_raw_response.create()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -50,7 +52,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.with_streaming_response.create() as response:
+        with client.vector_stores.with_streaming_response.create() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -61,15 +63,15 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.retrieve(
-            "string",
+        vector_store = client.vector_stores.retrieve(
+            "vector_store_id",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.with_raw_response.retrieve(
-            "string",
+        response = client.vector_stores.with_raw_response.retrieve(
+            "vector_store_id",
         )
 
         assert response.is_closed is True
@@ -79,8 +81,8 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.with_streaming_response.retrieve(
-            "string",
+        with client.vector_stores.with_streaming_response.retrieve(
+            "vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -93,34 +95,34 @@ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.with_raw_response.retrieve(
+            client.vector_stores.with_raw_response.retrieve(
                 "",
             )
 
     @parametrize
     def test_method_update(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.update(
-            "string",
+        vector_store = client.vector_stores.update(
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     def test_method_update_with_all_params(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.update(
-            "string",
+        vector_store = client.vector_stores.update(
+            vector_store_id="vector_store_id",
             expires_after={
                 "anchor": "last_active_at",
                 "days": 1,
             },
-            metadata={},
-            name="string",
+            metadata={"foo": "string"},
+            name="name",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     def test_raw_response_update(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.with_raw_response.update(
-            "string",
+        response = client.vector_stores.with_raw_response.update(
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -130,8 +132,8 @@ def test_raw_response_update(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_update(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.with_streaming_response.update(
-            "string",
+        with client.vector_stores.with_streaming_response.update(
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -144,20 +146,20 @@ def test_streaming_response_update(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_update(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.with_raw_response.update(
-                "",
+            client.vector_stores.with_raw_response.update(
+                vector_store_id="",
             )
 
     @parametrize
     def test_method_list(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.list()
+        vector_store = client.vector_stores.list()
         assert_matches_type(SyncCursorPage[VectorStore], vector_store, path=["response"])
 
     @parametrize
     def test_method_list_with_all_params(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.list(
-            after="string",
-            before="string",
+        vector_store = client.vector_stores.list(
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
@@ -165,7 +167,7 @@ def test_method_list_with_all_params(self, client: OpenAI) -> None:
 
     @parametrize
     def test_raw_response_list(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.with_raw_response.list()
+        response = client.vector_stores.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -174,7 +176,7 @@ def test_raw_response_list(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_list(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.with_streaming_response.list() as response:
+        with client.vector_stores.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -185,15 +187,15 @@ def test_streaming_response_list(self, client: OpenAI) -> None:
 
     @parametrize
     def test_method_delete(self, client: OpenAI) -> None:
-        vector_store = client.beta.vector_stores.delete(
-            "string",
+        vector_store = client.vector_stores.delete(
+            "vector_store_id",
         )
         assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
 
     @parametrize
     def test_raw_response_delete(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.with_raw_response.delete(
-            "string",
+        response = client.vector_stores.with_raw_response.delete(
+            "vector_store_id",
         )
 
         assert response.is_closed is True
@@ -203,8 +205,8 @@ def test_raw_response_delete(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_delete(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.with_streaming_response.delete(
-            "string",
+        with client.vector_stores.with_streaming_response.delete(
+            "vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -217,35 +219,97 @@ def test_streaming_response_delete(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_delete(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.with_raw_response.delete(
+            client.vector_stores.with_raw_response.delete(
                 "",
             )
 
+    @parametrize
+    def test_method_search(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+        assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    def test_method_search_with_all_params(self, client: OpenAI) -> None:
+        vector_store = client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+            filters={
+                "key": "key",
+                "type": "eq",
+                "value": "string",
+            },
+            max_num_results=1,
+            ranking_options={
+                "ranker": "auto",
+                "score_threshold": 0,
+            },
+            rewrite_query=True,
+        )
+        assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    def test_raw_response_search(self, client: OpenAI) -> None:
+        response = client.vector_stores.with_raw_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    def test_streaming_response_search(self, client: OpenAI) -> None:
+        with client.vector_stores.with_streaming_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = response.parse()
+            assert_matches_type(SyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_search(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.with_raw_response.search(
+                vector_store_id="",
+                query="string",
+            )
+
 
 class TestAsyncVectorStores:
     parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.create()
+        vector_store = await async_client.vector_stores.create()
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.create(
+        vector_store = await async_client.vector_stores.create(
+            chunking_strategy={"type": "auto"},
             expires_after={
                 "anchor": "last_active_at",
                 "days": 1,
             },
-            file_ids=["string", "string", "string"],
-            metadata={},
-            name="string",
+            file_ids=["string"],
+            metadata={"foo": "string"},
+            name="name",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.with_raw_response.create()
+        response = await async_client.vector_stores.with_raw_response.create()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -254,7 +318,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.with_streaming_response.create() as response:
+        async with async_client.vector_stores.with_streaming_response.create() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -265,15 +329,15 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.retrieve(
-            "string",
+        vector_store = await async_client.vector_stores.retrieve(
+            "vector_store_id",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.with_raw_response.retrieve(
-            "string",
+        response = await async_client.vector_stores.with_raw_response.retrieve(
+            "vector_store_id",
         )
 
         assert response.is_closed is True
@@ -283,8 +347,8 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.with_streaming_response.retrieve(
-            "string",
+        async with async_client.vector_stores.with_streaming_response.retrieve(
+            "vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -297,34 +361,34 @@ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> N
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.with_raw_response.retrieve(
+            await async_client.vector_stores.with_raw_response.retrieve(
                 "",
             )
 
     @parametrize
     async def test_method_update(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.update(
-            "string",
+        vector_store = await async_client.vector_stores.update(
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     async def test_method_update_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.update(
-            "string",
+        vector_store = await async_client.vector_stores.update(
+            vector_store_id="vector_store_id",
             expires_after={
                 "anchor": "last_active_at",
                 "days": 1,
             },
-            metadata={},
-            name="string",
+            metadata={"foo": "string"},
+            name="name",
         )
         assert_matches_type(VectorStore, vector_store, path=["response"])
 
     @parametrize
     async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.with_raw_response.update(
-            "string",
+        response = await async_client.vector_stores.with_raw_response.update(
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -334,8 +398,8 @@ async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.with_streaming_response.update(
-            "string",
+        async with async_client.vector_stores.with_streaming_response.update(
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -348,20 +412,20 @@ async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.with_raw_response.update(
-                "",
+            await async_client.vector_stores.with_raw_response.update(
+                vector_store_id="",
             )
 
     @parametrize
     async def test_method_list(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.list()
+        vector_store = await async_client.vector_stores.list()
         assert_matches_type(AsyncCursorPage[VectorStore], vector_store, path=["response"])
 
     @parametrize
     async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.list(
-            after="string",
-            before="string",
+        vector_store = await async_client.vector_stores.list(
+            after="after",
+            before="before",
             limit=0,
             order="asc",
         )
@@ -369,7 +433,7 @@ async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> N
 
     @parametrize
     async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.with_raw_response.list()
+        response = await async_client.vector_stores.with_raw_response.list()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -378,7 +442,7 @@ async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.with_streaming_response.list() as response:
+        async with async_client.vector_stores.with_streaming_response.list() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -389,15 +453,15 @@ async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
-        vector_store = await async_client.beta.vector_stores.delete(
-            "string",
+        vector_store = await async_client.vector_stores.delete(
+            "vector_store_id",
         )
         assert_matches_type(VectorStoreDeleted, vector_store, path=["response"])
 
     @parametrize
     async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.with_raw_response.delete(
-            "string",
+        response = await async_client.vector_stores.with_raw_response.delete(
+            "vector_store_id",
         )
 
         assert response.is_closed is True
@@ -407,8 +471,8 @@ async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.with_streaming_response.delete(
-            "string",
+        async with async_client.vector_stores.with_streaming_response.delete(
+            "vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -421,6 +485,67 @@ async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.with_raw_response.delete(
+            await async_client.vector_stores.with_raw_response.delete(
                 "",
             )
+
+    @parametrize
+    async def test_method_search(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+        assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    async def test_method_search_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        vector_store = await async_client.vector_stores.search(
+            vector_store_id="vs_abc123",
+            query="string",
+            filters={
+                "key": "key",
+                "type": "eq",
+                "value": "string",
+            },
+            max_num_results=1,
+            ranking_options={
+                "ranker": "auto",
+                "score_threshold": 0,
+            },
+            rewrite_query=True,
+        )
+        assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    async def test_raw_response_search(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.with_raw_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        vector_store = response.parse()
+        assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_search(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.with_streaming_response.search(
+            vector_store_id="vs_abc123",
+            query="string",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            vector_store = await response.parse()
+            assert_matches_type(AsyncPage[VectorStoreSearchResponse], vector_store, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_search(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.with_raw_response.search(
+                vector_store_id="",
+                query="string",
+            )
diff --git a/tests/api_resources/uploads/__init__.py b/tests/api_resources/uploads/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/uploads/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/uploads/test_parts.py b/tests/api_resources/uploads/test_parts.py
new file mode 100644
index 0000000000..2bba241a6d
--- /dev/null
+++ b/tests/api_resources/uploads/test_parts.py
@@ -0,0 +1,106 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.types.uploads import UploadPart
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestParts:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        part = client.uploads.parts.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.uploads.parts.with_raw_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        part = response.parse()
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.uploads.parts.with_streaming_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            part = response.parse()
+            assert_matches_type(UploadPart, part, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            client.uploads.parts.with_raw_response.create(
+                upload_id="",
+                data=b"raw file contents",
+            )
+
+
+class TestAsyncParts:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        part = await async_client.uploads.parts.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.uploads.parts.with_raw_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        part = response.parse()
+        assert_matches_type(UploadPart, part, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.uploads.parts.with_streaming_response.create(
+            upload_id="upload_abc123",
+            data=b"raw file contents",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            part = await response.parse()
+            assert_matches_type(UploadPart, part, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `upload_id` but received ''"):
+            await async_client.uploads.parts.with_raw_response.create(
+                upload_id="",
+                data=b"raw file contents",
+            )
diff --git a/tests/api_resources/vector_stores/__init__.py b/tests/api_resources/vector_stores/__init__.py
new file mode 100644
index 0000000000..fd8019a9a1
--- /dev/null
+++ b/tests/api_resources/vector_stores/__init__.py
@@ -0,0 +1 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
diff --git a/tests/api_resources/beta/vector_stores/test_file_batches.py b/tests/api_resources/vector_stores/test_file_batches.py
similarity index 66%
rename from tests/api_resources/beta/vector_stores/test_file_batches.py
rename to tests/api_resources/vector_stores/test_file_batches.py
index 9854d1a138..0587cfc56a 100644
--- a/tests/api_resources/beta/vector_stores/test_file_batches.py
+++ b/tests/api_resources/vector_stores/test_file_batches.py
@@ -10,7 +10,7 @@
 from openai import OpenAI, AsyncOpenAI
 from tests.utils import assert_matches_type
 from openai.pagination import SyncCursorPage, AsyncCursorPage
-from openai.types.beta.vector_stores import (
+from openai.types.vector_stores import (
     VectorStoreFile,
     VectorStoreFileBatch,
 )
@@ -23,16 +23,26 @@ class TestFileBatches:
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
-        file_batch = client.beta.vector_stores.file_batches.create(
-            "vs_abc123",
+        file_batch = client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
+            file_ids=["string"],
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        file_batch = client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
             file_ids=["string"],
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.file_batches.with_raw_response.create(
-            "vs_abc123",
+        response = client.vector_stores.file_batches.with_raw_response.create(
+            vector_store_id="vs_abc123",
             file_ids=["string"],
         )
 
@@ -43,8 +53,8 @@ def test_raw_response_create(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.file_batches.with_streaming_response.create(
-            "vs_abc123",
+        with client.vector_stores.file_batches.with_streaming_response.create(
+            vector_store_id="vs_abc123",
             file_ids=["string"],
         ) as response:
             assert not response.is_closed
@@ -58,23 +68,23 @@ def test_streaming_response_create(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_create(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.create(
-                "",
+            client.vector_stores.file_batches.with_raw_response.create(
+                vector_store_id="",
                 file_ids=["string"],
             )
 
     @parametrize
     def test_method_retrieve(self, client: OpenAI) -> None:
-        file_batch = client.beta.vector_stores.file_batches.retrieve(
-            "vsfb_abc123",
+        file_batch = client.vector_stores.file_batches.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     def test_raw_response_retrieve(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-            "vsfb_abc123",
+        response = client.vector_stores.file_batches.with_raw_response.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         )
 
@@ -85,8 +95,8 @@ def test_raw_response_retrieve(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_retrieve(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.file_batches.with_streaming_response.retrieve(
-            "vsfb_abc123",
+        with client.vector_stores.file_batches.with_streaming_response.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         ) as response:
             assert not response.is_closed
@@ -100,30 +110,30 @@ def test_streaming_response_retrieve(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_retrieve(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-                "vsfb_abc123",
+            client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="vsfb_abc123",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-                "",
+            client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="",
                 vector_store_id="vs_abc123",
             )
 
     @parametrize
     def test_method_cancel(self, client: OpenAI) -> None:
-        file_batch = client.beta.vector_stores.file_batches.cancel(
-            "string",
-            vector_store_id="string",
+        file_batch = client.vector_stores.file_batches.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     def test_raw_response_cancel(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.file_batches.with_raw_response.cancel(
-            "string",
-            vector_store_id="string",
+        response = client.vector_stores.file_batches.with_raw_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -133,9 +143,9 @@ def test_raw_response_cancel(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_cancel(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.file_batches.with_streaming_response.cancel(
-            "string",
-            vector_store_id="string",
+        with client.vector_stores.file_batches.with_streaming_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -148,32 +158,32 @@ def test_streaming_response_cancel(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_cancel(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.cancel(
-                "string",
+            client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="batch_id",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.cancel(
-                "",
-                vector_store_id="string",
+            client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="",
+                vector_store_id="vector_store_id",
             )
 
     @parametrize
     def test_method_list_files(self, client: OpenAI) -> None:
-        file_batch = client.beta.vector_stores.file_batches.list_files(
-            "string",
-            vector_store_id="string",
+        file_batch = client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(SyncCursorPage[VectorStoreFile], file_batch, path=["response"])
 
     @parametrize
     def test_method_list_files_with_all_params(self, client: OpenAI) -> None:
-        file_batch = client.beta.vector_stores.file_batches.list_files(
-            "string",
-            vector_store_id="string",
-            after="string",
-            before="string",
+        file_batch = client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
             filter="in_progress",
             limit=0,
             order="asc",
@@ -182,9 +192,9 @@ def test_method_list_files_with_all_params(self, client: OpenAI) -> None:
 
     @parametrize
     def test_raw_response_list_files(self, client: OpenAI) -> None:
-        response = client.beta.vector_stores.file_batches.with_raw_response.list_files(
-            "string",
-            vector_store_id="string",
+        response = client.vector_stores.file_batches.with_raw_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -194,9 +204,9 @@ def test_raw_response_list_files(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_list_files(self, client: OpenAI) -> None:
-        with client.beta.vector_stores.file_batches.with_streaming_response.list_files(
-            "string",
-            vector_store_id="string",
+        with client.vector_stores.file_batches.with_streaming_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -209,15 +219,15 @@ def test_streaming_response_list_files(self, client: OpenAI) -> None:
     @parametrize
     def test_path_params_list_files(self, client: OpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.list_files(
-                "string",
+            client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="batch_id",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            client.beta.vector_stores.file_batches.with_raw_response.list_files(
-                "",
-                vector_store_id="string",
+            client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="",
+                vector_store_id="vector_store_id",
             )
 
 
@@ -226,16 +236,26 @@ class TestAsyncFileBatches:
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
-        file_batch = await async_client.beta.vector_stores.file_batches.create(
-            "vs_abc123",
+        file_batch = await async_client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
+            file_ids=["string"],
+        )
+        assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file_batch = await async_client.vector_stores.file_batches.create(
+            vector_store_id="vs_abc123",
             file_ids=["string"],
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.file_batches.with_raw_response.create(
-            "vs_abc123",
+        response = await async_client.vector_stores.file_batches.with_raw_response.create(
+            vector_store_id="vs_abc123",
             file_ids=["string"],
         )
 
@@ -246,8 +266,8 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.file_batches.with_streaming_response.create(
-            "vs_abc123",
+        async with async_client.vector_stores.file_batches.with_streaming_response.create(
+            vector_store_id="vs_abc123",
             file_ids=["string"],
         ) as response:
             assert not response.is_closed
@@ -261,23 +281,23 @@ async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.create(
-                "",
+            await async_client.vector_stores.file_batches.with_raw_response.create(
+                vector_store_id="",
                 file_ids=["string"],
             )
 
     @parametrize
     async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
-        file_batch = await async_client.beta.vector_stores.file_batches.retrieve(
-            "vsfb_abc123",
+        file_batch = await async_client.vector_stores.file_batches.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-            "vsfb_abc123",
+        response = await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         )
 
@@ -288,8 +308,8 @@ async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.file_batches.with_streaming_response.retrieve(
-            "vsfb_abc123",
+        async with async_client.vector_stores.file_batches.with_streaming_response.retrieve(
+            batch_id="vsfb_abc123",
             vector_store_id="vs_abc123",
         ) as response:
             assert not response.is_closed
@@ -303,30 +323,30 @@ async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> N
     @parametrize
     async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-                "vsfb_abc123",
+            await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="vsfb_abc123",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.retrieve(
-                "",
+            await async_client.vector_stores.file_batches.with_raw_response.retrieve(
+                batch_id="",
                 vector_store_id="vs_abc123",
             )
 
     @parametrize
     async def test_method_cancel(self, async_client: AsyncOpenAI) -> None:
-        file_batch = await async_client.beta.vector_stores.file_batches.cancel(
-            "string",
-            vector_store_id="string",
+        file_batch = await async_client.vector_stores.file_batches.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(VectorStoreFileBatch, file_batch, path=["response"])
 
     @parametrize
     async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.file_batches.with_raw_response.cancel(
-            "string",
-            vector_store_id="string",
+        response = await async_client.vector_stores.file_batches.with_raw_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -336,9 +356,9 @@ async def test_raw_response_cancel(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.file_batches.with_streaming_response.cancel(
-            "string",
-            vector_store_id="string",
+        async with async_client.vector_stores.file_batches.with_streaming_response.cancel(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -351,32 +371,32 @@ async def test_streaming_response_cancel(self, async_client: AsyncOpenAI) -> Non
     @parametrize
     async def test_path_params_cancel(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.cancel(
-                "string",
+            await async_client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="batch_id",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.cancel(
-                "",
-                vector_store_id="string",
+            await async_client.vector_stores.file_batches.with_raw_response.cancel(
+                batch_id="",
+                vector_store_id="vector_store_id",
             )
 
     @parametrize
     async def test_method_list_files(self, async_client: AsyncOpenAI) -> None:
-        file_batch = await async_client.beta.vector_stores.file_batches.list_files(
-            "string",
-            vector_store_id="string",
+        file_batch = await async_client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
         assert_matches_type(AsyncCursorPage[VectorStoreFile], file_batch, path=["response"])
 
     @parametrize
     async def test_method_list_files_with_all_params(self, async_client: AsyncOpenAI) -> None:
-        file_batch = await async_client.beta.vector_stores.file_batches.list_files(
-            "string",
-            vector_store_id="string",
-            after="string",
-            before="string",
+        file_batch = await async_client.vector_stores.file_batches.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
             filter="in_progress",
             limit=0,
             order="asc",
@@ -385,9 +405,9 @@ async def test_method_list_files_with_all_params(self, async_client: AsyncOpenAI
 
     @parametrize
     async def test_raw_response_list_files(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.vector_stores.file_batches.with_raw_response.list_files(
-            "string",
-            vector_store_id="string",
+        response = await async_client.vector_stores.file_batches.with_raw_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         )
 
         assert response.is_closed is True
@@ -397,9 +417,9 @@ async def test_raw_response_list_files(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_list_files(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.vector_stores.file_batches.with_streaming_response.list_files(
-            "string",
-            vector_store_id="string",
+        async with async_client.vector_stores.file_batches.with_streaming_response.list_files(
+            batch_id="batch_id",
+            vector_store_id="vector_store_id",
         ) as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -412,13 +432,13 @@ async def test_streaming_response_list_files(self, async_client: AsyncOpenAI) ->
     @parametrize
     async def test_path_params_list_files(self, async_client: AsyncOpenAI) -> None:
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.list_files(
-                "string",
+            await async_client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="batch_id",
                 vector_store_id="",
             )
 
         with pytest.raises(ValueError, match=r"Expected a non-empty value for `batch_id` but received ''"):
-            await async_client.beta.vector_stores.file_batches.with_raw_response.list_files(
-                "",
-                vector_store_id="string",
+            await async_client.vector_stores.file_batches.with_raw_response.list_files(
+                batch_id="",
+                vector_store_id="vector_store_id",
             )
diff --git a/tests/api_resources/vector_stores/test_files.py b/tests/api_resources/vector_stores/test_files.py
new file mode 100644
index 0000000000..c13442261e
--- /dev/null
+++ b/tests/api_resources/vector_stores/test_files.py
@@ -0,0 +1,625 @@
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
+
+from __future__ import annotations
+
+import os
+from typing import Any, cast
+
+import pytest
+
+from openai import OpenAI, AsyncOpenAI
+from tests.utils import assert_matches_type
+from openai.pagination import SyncPage, AsyncPage, SyncCursorPage, AsyncCursorPage
+from openai.types.vector_stores import (
+    VectorStoreFile,
+    FileContentResponse,
+    VectorStoreFileDeleted,
+)
+
+base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
+
+
+class TestFiles:
+    parametrize = pytest.mark.parametrize("client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    def test_method_create(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_method_create_with_all_params(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_create(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_create(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_create(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.create(
+                vector_store_id="",
+                file_id="file_id",
+            )
+
+    @parametrize
+    def test_method_retrieve(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_retrieve(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_retrieve(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_retrieve(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.retrieve(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.retrieve(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
+
+    @parametrize
+    def test_method_update(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_update(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_update(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_update(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.update(
+                file_id="file-abc123",
+                vector_store_id="",
+                attributes={"foo": "string"},
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.update(
+                file_id="",
+                vector_store_id="vs_abc123",
+                attributes={"foo": "string"},
+            )
+
+    @parametrize
+    def test_method_list(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    def test_method_list_with_all_params(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="in_progress",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    def test_raw_response_list(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.list(
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_list(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.list(
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(SyncCursorPage[VectorStoreFile], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_list(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.list(
+                vector_store_id="",
+            )
+
+    @parametrize
+    def test_method_delete(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    def test_raw_response_delete(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_delete(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_delete(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.delete(
+                file_id="file_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.delete(
+                file_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    def test_method_content(self, client: OpenAI) -> None:
+        file = client.vector_stores.files.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(SyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    def test_raw_response_content(self, client: OpenAI) -> None:
+        response = client.vector_stores.files.with_raw_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(SyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    def test_streaming_response_content(self, client: OpenAI) -> None:
+        with client.vector_stores.files.with_streaming_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = response.parse()
+            assert_matches_type(SyncPage[FileContentResponse], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    def test_path_params_content(self, client: OpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            client.vector_stores.files.with_raw_response.content(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            client.vector_stores.files.with_raw_response.content(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
+
+
+class TestAsyncFiles:
+    parametrize = pytest.mark.parametrize("async_client", [False, True], indirect=True, ids=["loose", "strict"])
+
+    @parametrize
+    async def test_method_create(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+            attributes={"foo": "string"},
+            chunking_strategy={"type": "auto"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.create(
+            vector_store_id="vs_abc123",
+            file_id="file_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_create(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.create(
+                vector_store_id="",
+                file_id="file_id",
+            )
+
+    @parametrize
+    async def test_method_retrieve(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_retrieve(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.retrieve(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_retrieve(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.retrieve(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.retrieve(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
+
+    @parametrize
+    async def test_method_update(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_update(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFile, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_update(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.update(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+            attributes={"foo": "string"},
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFile, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_update(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.update(
+                file_id="file-abc123",
+                vector_store_id="",
+                attributes={"foo": "string"},
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.update(
+                file_id="",
+                vector_store_id="vs_abc123",
+                attributes={"foo": "string"},
+            )
+
+    @parametrize
+    async def test_method_list(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    async def test_method_list_with_all_params(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.list(
+            vector_store_id="vector_store_id",
+            after="after",
+            before="before",
+            filter="in_progress",
+            limit=0,
+            order="asc",
+        )
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_list(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.list(
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_list(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.list(
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AsyncCursorPage[VectorStoreFile], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_list(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.list(
+                vector_store_id="",
+            )
+
+    @parametrize
+    async def test_method_delete(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_delete(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_delete(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.delete(
+            file_id="file_id",
+            vector_store_id="vector_store_id",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(VectorStoreFileDeleted, file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_delete(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.delete(
+                file_id="file_id",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.delete(
+                file_id="",
+                vector_store_id="vector_store_id",
+            )
+
+    @parametrize
+    async def test_method_content(self, async_client: AsyncOpenAI) -> None:
+        file = await async_client.vector_stores.files.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+        assert_matches_type(AsyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    async def test_raw_response_content(self, async_client: AsyncOpenAI) -> None:
+        response = await async_client.vector_stores.files.with_raw_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        )
+
+        assert response.is_closed is True
+        assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+        file = response.parse()
+        assert_matches_type(AsyncPage[FileContentResponse], file, path=["response"])
+
+    @parametrize
+    async def test_streaming_response_content(self, async_client: AsyncOpenAI) -> None:
+        async with async_client.vector_stores.files.with_streaming_response.content(
+            file_id="file-abc123",
+            vector_store_id="vs_abc123",
+        ) as response:
+            assert not response.is_closed
+            assert response.http_request.headers.get("X-Stainless-Lang") == "python"
+
+            file = await response.parse()
+            assert_matches_type(AsyncPage[FileContentResponse], file, path=["response"])
+
+        assert cast(Any, response.is_closed) is True
+
+    @parametrize
+    async def test_path_params_content(self, async_client: AsyncOpenAI) -> None:
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `vector_store_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.content(
+                file_id="file-abc123",
+                vector_store_id="",
+            )
+
+        with pytest.raises(ValueError, match=r"Expected a non-empty value for `file_id` but received ''"):
+            await async_client.vector_stores.files.with_raw_response.content(
+                file_id="",
+                vector_store_id="vs_abc123",
+            )
diff --git a/tests/conftest.py b/tests/conftest.py
index 15af57e770..fa82d39d86 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,11 +1,11 @@
 from __future__ import annotations
 
 import os
-import asyncio
 import logging
 from typing import TYPE_CHECKING, Iterator, AsyncIterator
 
 import pytest
+from pytest_asyncio import is_async_test
 
 from openai import OpenAI, AsyncOpenAI
 
@@ -17,11 +17,13 @@
 logging.getLogger("openai").setLevel(logging.DEBUG)
 
 
-@pytest.fixture(scope="session")
-def event_loop() -> Iterator[asyncio.AbstractEventLoop]:
-    loop = asyncio.new_event_loop()
-    yield loop
-    loop.close()
+# automatically add `pytest.mark.asyncio()` to all of our async tests
+# so we don't have to add that boilerplate everywhere
+def pytest_collection_modifyitems(items: list[pytest.Function]) -> None:
+    pytest_asyncio_tests = (item for item in items if is_async_test(item))
+    session_scope_marker = pytest.mark.asyncio(loop_scope="session")
+    for async_test in pytest_asyncio_tests:
+        async_test.add_marker(session_scope_marker, append=False)
 
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010/")
diff --git a/tests/lib/test_azure.py b/tests/lib/test_azure.py
deleted file mode 100644
index 9360b2925a..0000000000
--- a/tests/lib/test_azure.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from typing import Union
-from typing_extensions import Literal
-
-import pytest
-
-from openai._models import FinalRequestOptions
-from openai.lib.azure import AzureOpenAI, AsyncAzureOpenAI
-
-Client = Union[AzureOpenAI, AsyncAzureOpenAI]
-
-
-sync_client = AzureOpenAI(
-    api_version="2023-07-01",
-    api_key="example API key",
-    azure_endpoint="https://example-resource.azure.openai.com/",
-)
-
-async_client = AsyncAzureOpenAI(
-    api_version="2023-07-01",
-    api_key="example API key",
-    azure_endpoint="https://example-resource.azure.openai.com/",
-)
-
-
-@pytest.mark.parametrize("client", [sync_client, async_client])
-def test_implicit_deployment_path(client: Client) -> None:
-    req = client._build_request(
-        FinalRequestOptions.construct(
-            method="post",
-            url="/chat/completions",
-            json_data={"model": "my-deployment-model"},
-        )
-    )
-    assert (
-        req.url
-        == "https://example-resource.azure.openai.com/openai/deployments/my-deployment-model/chat/completions?api-version=2023-07-01"
-    )
-
-
-@pytest.mark.parametrize(
-    "client,method",
-    [
-        (sync_client, "copy"),
-        (sync_client, "with_options"),
-        (async_client, "copy"),
-        (async_client, "with_options"),
-    ],
-)
-def test_client_copying(client: Client, method: Literal["copy", "with_options"]) -> None:
-    if method == "copy":
-        copied = client.copy()
-    else:
-        copied = client.with_options()
-
-    assert copied._custom_query == {"api-version": "2023-07-01"}
-
-
-@pytest.mark.parametrize(
-    "client",
-    [sync_client, async_client],
-)
-def test_client_copying_override_options(client: Client) -> None:
-    copied = client.copy(
-        api_version="2022-05-01",
-    )
-    assert copied._custom_query == {"api-version": "2022-05-01"}
diff --git a/tests/lib/test_old_api.py b/tests/lib/test_old_api.py
deleted file mode 100644
index 261b8acb94..0000000000
--- a/tests/lib/test_old_api.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import pytest
-
-import openai
-from openai.lib._old_api import APIRemovedInV1
-
-
-def test_basic_attribute_access_works() -> None:
-    for attr in dir(openai):
-        dir(getattr(openai, attr))
-
-
-def test_helpful_error_is_raised() -> None:
-    with pytest.raises(APIRemovedInV1):
-        openai.Completion.create()  # type: ignore
-
-    with pytest.raises(APIRemovedInV1):
-        openai.ChatCompletion.create()  # type: ignore
diff --git a/tests/test_client.py b/tests/test_client.py
index c1e545e66f..62654afe1e 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -4,12 +4,17 @@
 
 import gc
 import os
+import sys
 import json
+import time
 import asyncio
 import inspect
+import subprocess
 import tracemalloc
 from typing import Any, Union, cast
+from textwrap import dedent
 from unittest import mock
+from typing_extensions import Literal
 
 import httpx
 import pytest
@@ -17,11 +22,14 @@
 from pydantic import ValidationError
 
 from openai import OpenAI, AsyncOpenAI, APIResponseValidationError
+from openai._types import Omit
+from openai._utils import maybe_transform
 from openai._models import BaseModel, FinalRequestOptions
 from openai._constants import RAW_RESPONSE_HEADER
 from openai._streaming import Stream, AsyncStream
 from openai._exceptions import OpenAIError, APIStatusError, APITimeoutError, APIResponseValidationError
 from openai._base_client import DEFAULT_TIMEOUT, HTTPX_DEFAULT_TIMEOUT, BaseClient, make_request_options
+from openai.types.chat.completion_create_params import CompletionCreateParamsNonStreaming
 
 from .utils import update_env
 
@@ -328,7 +336,8 @@ def test_validate_headers(self) -> None:
         assert request.headers.get("Authorization") == f"Bearer {api_key}"
 
         with pytest.raises(OpenAIError):
-            client2 = OpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
+            with update_env(**{"OPENAI_API_KEY": Omit()}):
+                client2 = OpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
             _ = client2
 
     def test_default_query_option(self) -> None:
@@ -343,11 +352,11 @@ def test_default_query_option(self) -> None:
             FinalRequestOptions(
                 method="get",
                 url="/foo",
-                params={"foo": "baz", "query_param": "overriden"},
+                params={"foo": "baz", "query_param": "overridden"},
             )
         )
         url = httpx.URL(request.url)
-        assert dict(url.params) == {"foo": "baz", "query_param": "overriden"}
+        assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
 
     def test_request_extra_json(self) -> None:
         request = self.client._build_request(
@@ -695,6 +704,7 @@ class Model(BaseModel):
             [3, "", 0.5],
             [2, "", 0.5 * 2.0],
             [1, "", 0.5 * 4.0],
+            [-1100, "", 8],  # test large number potentially overflowing
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
@@ -716,14 +726,17 @@ def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter) -> No
                 "/chat/completions",
                 body=cast(
                     object,
-                    dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
+                    maybe_transform(
+                        dict(
+                            messages=[
+                                {
+                                    "role": "user",
+                                    "content": "Say this is a test",
+                                }
+                            ],
+                            model="gpt-4o",
+                        ),
+                        CompletionCreateParamsNonStreaming,
                     ),
                 ),
                 cast_to=httpx.Response,
@@ -742,14 +755,17 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non
                 "/chat/completions",
                 body=cast(
                     object,
-                    dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
+                    maybe_transform(
+                        dict(
+                            messages=[
+                                {
+                                    "role": "user",
+                                    "content": "Say this is a test",
+                                }
+                            ],
+                            model="gpt-4o",
+                        ),
+                        CompletionCreateParamsNonStreaming,
                     ),
                 ),
                 cast_to=httpx.Response,
@@ -758,6 +774,140 @@ def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter) -> Non
 
         assert _get_open_connections(self.client) == 0
 
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.parametrize("failure_mode", ["status", "exception"])
+    def test_retries_taken(
+        self,
+        client: OpenAI,
+        failures_before_success: int,
+        failure_mode: Literal["status", "exception"],
+        respx_mock: MockRouter,
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                if failure_mode == "exception":
+                    raise RuntimeError("oops")
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        )
+
+        assert response.retries_taken == failures_before_success
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_omit_retry_count_header(
+        self, client: OpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": Omit()},
+        )
+
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_overwrite_retry_count_header(
+        self, client: OpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": "42"},
+        )
+
+        assert response.http_request.headers.get("x-stainless-retry-count") == "42"
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    def test_retries_taken_new_response_class(
+        self, client: OpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        ) as response:
+            assert response.retries_taken == failures_before_success
+            assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
 
 class TestAsyncOpenAI:
     client = AsyncOpenAI(base_url=base_url, api_key=api_key, _strict_response_validation=True)
@@ -1044,7 +1194,8 @@ def test_validate_headers(self) -> None:
         assert request.headers.get("Authorization") == f"Bearer {api_key}"
 
         with pytest.raises(OpenAIError):
-            client2 = AsyncOpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
+            with update_env(**{"OPENAI_API_KEY": Omit()}):
+                client2 = AsyncOpenAI(base_url=base_url, api_key=None, _strict_response_validation=True)
             _ = client2
 
     def test_default_query_option(self) -> None:
@@ -1059,11 +1210,11 @@ def test_default_query_option(self) -> None:
             FinalRequestOptions(
                 method="get",
                 url="/foo",
-                params={"foo": "baz", "query_param": "overriden"},
+                params={"foo": "baz", "query_param": "overridden"},
             )
         )
         url = httpx.URL(request.url)
-        assert dict(url.params) == {"foo": "baz", "query_param": "overriden"}
+        assert dict(url.params) == {"foo": "baz", "query_param": "overridden"}
 
     def test_request_extra_json(self) -> None:
         request = self.client._build_request(
@@ -1425,6 +1576,7 @@ class Model(BaseModel):
             [3, "", 0.5],
             [2, "", 0.5 * 2.0],
             [1, "", 0.5 * 4.0],
+            [-1100, "", 8],  # test large number potentially overflowing
         ],
     )
     @mock.patch("time.time", mock.MagicMock(return_value=1696004797))
@@ -1447,14 +1599,17 @@ async def test_retrying_timeout_errors_doesnt_leak(self, respx_mock: MockRouter)
                 "/chat/completions",
                 body=cast(
                     object,
-                    dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
+                    maybe_transform(
+                        dict(
+                            messages=[
+                                {
+                                    "role": "user",
+                                    "content": "Say this is a test",
+                                }
+                            ],
+                            model="gpt-4o",
+                        ),
+                        CompletionCreateParamsNonStreaming,
                     ),
                 ),
                 cast_to=httpx.Response,
@@ -1473,14 +1628,17 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter)
                 "/chat/completions",
                 body=cast(
                     object,
-                    dict(
-                        messages=[
-                            {
-                                "role": "user",
-                                "content": "Say this is a test",
-                            }
-                        ],
-                        model="gpt-3.5-turbo",
+                    maybe_transform(
+                        dict(
+                            messages=[
+                                {
+                                    "role": "user",
+                                    "content": "Say this is a test",
+                                }
+                            ],
+                            model="gpt-4o",
+                        ),
+                        CompletionCreateParamsNonStreaming,
                     ),
                 ),
                 cast_to=httpx.Response,
@@ -1488,3 +1646,186 @@ async def test_retrying_status_errors_doesnt_leak(self, respx_mock: MockRouter)
             )
 
         assert _get_open_connections(self.client) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize("failure_mode", ["status", "exception"])
+    async def test_retries_taken(
+        self,
+        async_client: AsyncOpenAI,
+        failures_before_success: int,
+        failure_mode: Literal["status", "exception"],
+        respx_mock: MockRouter,
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                if failure_mode == "exception":
+                    raise RuntimeError("oops")
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        )
+
+        assert response.retries_taken == failures_before_success
+        assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    async def test_omit_retry_count_header(
+        self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": Omit()},
+        )
+
+        assert len(response.http_request.headers.get_list("x-stainless-retry-count")) == 0
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    async def test_overwrite_retry_count_header(
+        self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        response = await client.chat.completions.with_raw_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+            extra_headers={"x-stainless-retry-count": "42"},
+        )
+
+        assert response.http_request.headers.get("x-stainless-retry-count") == "42"
+
+    @pytest.mark.parametrize("failures_before_success", [0, 2, 4])
+    @mock.patch("openai._base_client.BaseClient._calculate_retry_timeout", _low_retry_timeout)
+    @pytest.mark.respx(base_url=base_url)
+    @pytest.mark.asyncio
+    async def test_retries_taken_new_response_class(
+        self, async_client: AsyncOpenAI, failures_before_success: int, respx_mock: MockRouter
+    ) -> None:
+        client = async_client.with_options(max_retries=4)
+
+        nb_retries = 0
+
+        def retry_handler(_request: httpx.Request) -> httpx.Response:
+            nonlocal nb_retries
+            if nb_retries < failures_before_success:
+                nb_retries += 1
+                return httpx.Response(500)
+            return httpx.Response(200)
+
+        respx_mock.post("/chat/completions").mock(side_effect=retry_handler)
+
+        async with client.chat.completions.with_streaming_response.create(
+            messages=[
+                {
+                    "content": "string",
+                    "role": "developer",
+                }
+            ],
+            model="gpt-4o",
+        ) as response:
+            assert response.retries_taken == failures_before_success
+            assert int(response.http_request.headers.get("x-stainless-retry-count")) == failures_before_success
+
+    def test_get_platform(self) -> None:
+        # A previous implementation of asyncify could leave threads unterminated when
+        # used with nest_asyncio.
+        #
+        # Since nest_asyncio.apply() is global and cannot be un-applied, this
+        # test is run in a separate process to avoid affecting other tests.
+        test_code = dedent("""
+        import asyncio
+        import nest_asyncio
+        import threading
+
+        from openai._utils import asyncify
+        from openai._base_client import get_platform 
+
+        async def test_main() -> None:
+            result = await asyncify(get_platform)()
+            print(result)
+            for thread in threading.enumerate():
+                print(thread.name)
+
+        nest_asyncio.apply()
+        asyncio.run(test_main())
+        """)
+        with subprocess.Popen(
+            [sys.executable, "-c", test_code],
+            text=True,
+        ) as process:
+            timeout = 10  # seconds
+
+            start_time = time.monotonic()
+            while True:
+                return_code = process.poll()
+                if return_code is not None:
+                    if return_code != 0:
+                        raise AssertionError("calling get_platform using asyncify resulted in a non-zero exit code")
+
+                    # success
+                    break
+
+                if time.monotonic() - start_time > timeout:
+                    process.kill()
+                    raise AssertionError("calling get_platform using asyncify resulted in a hung process")
+
+                time.sleep(0.1)
diff --git a/tests/test_deepcopy.py b/tests/test_deepcopy.py
index 8cf65ce94e..86a2adb1a2 100644
--- a/tests/test_deepcopy.py
+++ b/tests/test_deepcopy.py
@@ -41,8 +41,7 @@ def test_nested_list() -> None:
     assert_different_identities(obj1[1], obj2[1])
 
 
-class MyObject:
-    ...
+class MyObject: ...
 
 
 def test_ignores_other_types() -> None:
diff --git a/tests/test_legacy_response.py b/tests/test_legacy_response.py
index 45025f81d0..4f24ce187d 100644
--- a/tests/test_legacy_response.py
+++ b/tests/test_legacy_response.py
@@ -1,5 +1,5 @@
 import json
-from typing import cast
+from typing import Any, Union, cast
 from typing_extensions import Annotated
 
 import httpx
@@ -12,8 +12,7 @@
 from openai._legacy_response import LegacyAPIResponse
 
 
-class PydanticModel(pydantic.BaseModel):
-    ...
+class PydanticModel(pydantic.BaseModel): ...
 
 
 def test_response_parse_mismatched_basemodel(client: OpenAI) -> None:
@@ -33,6 +32,31 @@ def test_response_parse_mismatched_basemodel(client: OpenAI) -> None:
         response.parse(to=PydanticModel)
 
 
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+def test_response_parse_bool(client: OpenAI, content: str, expected: bool) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = response.parse(to=bool)
+    assert result is expected
+
+
 def test_response_parse_custom_stream(client: OpenAI) -> None:
     response = LegacyAPIResponse(
         raw=httpx.Response(200, content=b"foo"),
@@ -82,3 +106,23 @@ def test_response_parse_annotated_type(client: OpenAI) -> None:
     )
     assert obj.foo == "hello!"
     assert obj.bar == 2
+
+
+class OtherModel(pydantic.BaseModel):
+    a: str
+
+
+@pytest.mark.parametrize("client", [False], indirect=True)  # loose validation
+def test_response_parse_expect_model_union_non_json_content(client: OpenAI) -> None:
+    response = LegacyAPIResponse(
+        raw=httpx.Response(200, content=b"foo", headers={"Content-Type": "application/text"}),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=cast(Any, Union[CustomModel, OtherModel]))
+    assert isinstance(obj, str)
+    assert obj == "foo"
diff --git a/tests/test_models.py b/tests/test_models.py
index b703444248..b9be1f3ea3 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,7 +1,7 @@
 import json
 from typing import Any, Dict, List, Union, Optional, cast
 from datetime import datetime, timezone
-from typing_extensions import Literal, Annotated
+from typing_extensions import Literal, Annotated, TypeAliasType
 
 import pytest
 import pydantic
@@ -245,7 +245,7 @@ class Model(BaseModel):
     assert m.foo is True
 
     m = Model.construct(foo="CARD_HOLDER")
-    assert m.foo is "CARD_HOLDER"
+    assert m.foo == "CARD_HOLDER"
 
     m = Model.construct(foo={"bar": False})
     assert isinstance(m.foo, Submodel1)
@@ -520,19 +520,15 @@ class Model(BaseModel):
     assert m3.to_dict(exclude_none=True) == {}
     assert m3.to_dict(exclude_defaults=True) == {}
 
-    if PYDANTIC_V2:
-
-        class Model2(BaseModel):
-            created_at: datetime
+    class Model2(BaseModel):
+        created_at: datetime
 
-        time_str = "2024-03-21T11:39:01.275859"
-        m4 = Model2.construct(created_at=time_str)
-        assert m4.to_dict(mode="python") == {"created_at": datetime.fromisoformat(time_str)}
-        assert m4.to_dict(mode="json") == {"created_at": time_str}
-    else:
-        with pytest.raises(ValueError, match="mode is only supported in Pydantic v2"):
-            m.to_dict(mode="json")
+    time_str = "2024-03-21T11:39:01.275859"
+    m4 = Model2.construct(created_at=time_str)
+    assert m4.to_dict(mode="python") == {"created_at": datetime.fromisoformat(time_str)}
+    assert m4.to_dict(mode="json") == {"created_at": time_str}
 
+    if not PYDANTIC_V2:
         with pytest.raises(ValueError, match="warnings is only supported in Pydantic v2"):
             m.to_dict(warnings=False)
 
@@ -558,9 +554,6 @@ class Model(BaseModel):
     assert m3.model_dump(exclude_none=True) == {}
 
     if not PYDANTIC_V2:
-        with pytest.raises(ValueError, match="mode is only supported in Pydantic v2"):
-            m.model_dump(mode="json")
-
         with pytest.raises(ValueError, match="round_trip is only supported in Pydantic v2"):
             m.model_dump(round_trip=True)
 
@@ -568,6 +561,14 @@ class Model(BaseModel):
             m.model_dump(warnings=False)
 
 
+def test_compat_method_no_error_for_warnings() -> None:
+    class Model(BaseModel):
+        foo: Optional[str]
+
+    m = Model(foo="hello")
+    assert isinstance(model_dump(m, warnings=False), dict)
+
+
 def test_to_json() -> None:
     class Model(BaseModel):
         foo: Optional[str] = Field(alias="FOO", default=None)
@@ -827,3 +828,61 @@ class B(BaseModel):
     # if the discriminator details object stays the same between invocations then
     # we hit the cache
     assert UnionType.__discriminator__ is discriminator
+
+
+@pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1")
+def test_type_alias_type() -> None:
+    Alias = TypeAliasType("Alias", str)
+
+    class Model(BaseModel):
+        alias: Alias
+        union: Union[int, Alias]
+
+    m = construct_type(value={"alias": "foo", "union": "bar"}, type_=Model)
+    assert isinstance(m, Model)
+    assert isinstance(m.alias, str)
+    assert m.alias == "foo"
+    assert isinstance(m.union, str)
+    assert m.union == "bar"
+
+
+@pytest.mark.skipif(not PYDANTIC_V2, reason="TypeAliasType is not supported in Pydantic v1")
+def test_field_named_cls() -> None:
+    class Model(BaseModel):
+        cls: str
+
+    m = construct_type(value={"cls": "foo"}, type_=Model)
+    assert isinstance(m, Model)
+    assert isinstance(m.cls, str)
+
+
+def test_discriminated_union_case() -> None:
+    class A(BaseModel):
+        type: Literal["a"]
+
+        data: bool
+
+    class B(BaseModel):
+        type: Literal["b"]
+
+        data: List[Union[A, object]]
+
+    class ModelA(BaseModel):
+        type: Literal["modelA"]
+
+        data: int
+
+    class ModelB(BaseModel):
+        type: Literal["modelB"]
+
+        required: str
+
+        data: Union[A, B]
+
+    # when constructing ModelA | ModelB, value data doesn't match ModelB exactly - missing `required`
+    m = construct_type(
+        value={"type": "modelB", "data": {"type": "a", "data": True}},
+        type_=cast(Any, Annotated[Union[ModelA, ModelB], PropertyInfo(discriminator="type")]),
+    )
+
+    assert isinstance(m, ModelB)
diff --git a/tests/test_module_client.py b/tests/test_module_client.py
index 05b5f81111..5dc474e02d 100644
--- a/tests/test_module_client.py
+++ b/tests/test_module_client.py
@@ -2,8 +2,6 @@
 
 from __future__ import annotations
 
-import os as _os
-
 import httpx
 import pytest
 from httpx import URL
@@ -23,11 +21,6 @@ def reset_state() -> None:
     openai.default_headers = None
     openai.default_query = None
     openai.http_client = None
-    openai.api_type = _os.environ.get("OPENAI_API_TYPE")  # type: ignore
-    openai.api_version = None
-    openai.azure_endpoint = None
-    openai.azure_ad_token = None
-    openai.azure_ad_token_provider = None
 
 
 @pytest.fixture(autouse=True)
@@ -94,90 +87,3 @@ def test_http_client_option() -> None:
     openai.http_client = new_client
 
     assert openai.completions._client._client is new_client
-
-
-import contextlib
-from typing import Iterator
-
-from openai.lib.azure import AzureOpenAI
-
-
-@contextlib.contextmanager
-def fresh_env() -> Iterator[None]:
-    old = _os.environ.copy()
-
-    try:
-        _os.environ.clear()
-        yield
-    finally:
-        _os.environ.update(old)
-
-
-def test_only_api_key_results_in_openai_api() -> None:
-    with fresh_env():
-        openai.api_type = None
-        openai.api_key = "example API key"
-
-        assert type(openai.completions._client).__name__ == "_ModuleClient"
-
-
-def test_azure_api_key_env_without_api_version() -> None:
-    with fresh_env():
-        openai.api_type = None
-        _os.environ["AZURE_OPENAI_API_KEY"] = "example API key"
-
-        with pytest.raises(
-            ValueError,
-            match=r"Must provide either the `api_version` argument or the `OPENAI_API_VERSION` environment variable",
-        ):
-            openai.completions._client  # noqa: B018
-
-
-def test_azure_api_key_and_version_env() -> None:
-    with fresh_env():
-        openai.api_type = None
-        _os.environ["AZURE_OPENAI_API_KEY"] = "example API key"
-        _os.environ["OPENAI_API_VERSION"] = "example-version"
-
-        with pytest.raises(
-            ValueError,
-            match=r"Must provide one of the `base_url` or `azure_endpoint` arguments, or the `AZURE_OPENAI_ENDPOINT` environment variable",
-        ):
-            openai.completions._client  # noqa: B018
-
-
-def test_azure_api_key_version_and_endpoint_env() -> None:
-    with fresh_env():
-        openai.api_type = None
-        _os.environ["AZURE_OPENAI_API_KEY"] = "example API key"
-        _os.environ["OPENAI_API_VERSION"] = "example-version"
-        _os.environ["AZURE_OPENAI_ENDPOINT"] = "https://www.example/"
-
-        openai.completions._client  # noqa: B018
-
-        assert openai.api_type == "azure"
-
-
-def test_azure_azure_ad_token_version_and_endpoint_env() -> None:
-    with fresh_env():
-        openai.api_type = None
-        _os.environ["AZURE_OPENAI_AD_TOKEN"] = "example AD token"
-        _os.environ["OPENAI_API_VERSION"] = "example-version"
-        _os.environ["AZURE_OPENAI_ENDPOINT"] = "https://www.example/"
-
-        client = openai.completions._client
-        assert isinstance(client, AzureOpenAI)
-        assert client._azure_ad_token == "example AD token"
-
-
-def test_azure_azure_ad_token_provider_version_and_endpoint_env() -> None:
-    with fresh_env():
-        openai.api_type = None
-        _os.environ["OPENAI_API_VERSION"] = "example-version"
-        _os.environ["AZURE_OPENAI_ENDPOINT"] = "https://www.example/"
-        openai.azure_ad_token_provider = lambda: "token"
-
-        client = openai.completions._client
-        assert isinstance(client, AzureOpenAI)
-        assert client._azure_ad_token_provider is not None
-        assert client._azure_ad_token_provider() == "token"
diff --git a/tests/test_response.py b/tests/test_response.py
index af153b67c4..d022306440 100644
--- a/tests/test_response.py
+++ b/tests/test_response.py
@@ -1,5 +1,5 @@
 import json
-from typing import List, cast
+from typing import Any, List, Union, cast
 from typing_extensions import Annotated
 
 import httpx
@@ -19,16 +19,13 @@
 from openai._base_client import FinalRequestOptions
 
 
-class ConcreteBaseAPIResponse(APIResponse[bytes]):
-    ...
+class ConcreteBaseAPIResponse(APIResponse[bytes]): ...
 
 
-class ConcreteAPIResponse(APIResponse[List[str]]):
-    ...
+class ConcreteAPIResponse(APIResponse[List[str]]): ...
 
 
-class ConcreteAsyncAPIResponse(APIResponse[httpx.Response]):
-    ...
+class ConcreteAsyncAPIResponse(APIResponse[httpx.Response]): ...
 
 
 def test_extract_response_type_direct_classes() -> None:
@@ -56,8 +53,7 @@ def test_extract_response_type_binary_response() -> None:
     assert extract_response_type(AsyncBinaryAPIResponse) == bytes
 
 
-class PydanticModel(pydantic.BaseModel):
-    ...
+class PydanticModel(pydantic.BaseModel): ...
 
 
 def test_response_parse_mismatched_basemodel(client: OpenAI) -> None:
@@ -192,3 +188,90 @@ async def test_async_response_parse_annotated_type(async_client: AsyncOpenAI) ->
     )
     assert obj.foo == "hello!"
     assert obj.bar == 2
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+def test_response_parse_bool(client: OpenAI, content: str, expected: bool) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = response.parse(to=bool)
+    assert result is expected
+
+
+@pytest.mark.parametrize(
+    "content, expected",
+    [
+        ("false", False),
+        ("true", True),
+        ("False", False),
+        ("True", True),
+        ("TrUe", True),
+        ("FalSe", False),
+    ],
+)
+async def test_async_response_parse_bool(client: AsyncOpenAI, content: str, expected: bool) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=content),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    result = await response.parse(to=bool)
+    assert result is expected
+
+
+class OtherModel(BaseModel):
+    a: str
+
+
+@pytest.mark.parametrize("client", [False], indirect=True)  # loose validation
+def test_response_parse_expect_model_union_non_json_content(client: OpenAI) -> None:
+    response = APIResponse(
+        raw=httpx.Response(200, content=b"foo", headers={"Content-Type": "application/text"}),
+        client=client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = response.parse(to=cast(Any, Union[CustomModel, OtherModel]))
+    assert isinstance(obj, str)
+    assert obj == "foo"
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("async_client", [False], indirect=True)  # loose validation
+async def test_async_response_parse_expect_model_union_non_json_content(async_client: AsyncOpenAI) -> None:
+    response = AsyncAPIResponse(
+        raw=httpx.Response(200, content=b"foo", headers={"Content-Type": "application/text"}),
+        client=async_client,
+        stream=False,
+        stream_cls=None,
+        cast_to=str,
+        options=FinalRequestOptions.construct(method="get", url="/foo"),
+    )
+
+    obj = await response.parse(to=cast(Any, Union[CustomModel, OtherModel]))
+    assert isinstance(obj, str)
+    assert obj == "foo"
diff --git a/tests/test_transform.py b/tests/test_transform.py
index 1eb6cde9d6..385fbe2b2c 100644
--- a/tests/test_transform.py
+++ b/tests/test_transform.py
@@ -2,7 +2,7 @@
 
 import io
 import pathlib
-from typing import Any, List, Union, TypeVar, Iterable, Optional, cast
+from typing import Any, Dict, List, Union, TypeVar, Iterable, Optional, cast
 from datetime import date, datetime
 from typing_extensions import Required, Annotated, TypedDict
 
@@ -177,17 +177,32 @@ class DateDict(TypedDict, total=False):
     foo: Annotated[date, PropertyInfo(format="iso8601")]
 
 
+class DatetimeModel(BaseModel):
+    foo: datetime
+
+
+class DateModel(BaseModel):
+    foo: Optional[date]
+
+
 @parametrize
 @pytest.mark.asyncio
 async def test_iso8601_format(use_async: bool) -> None:
     dt = datetime.fromisoformat("2023-02-23T14:16:36.337692+00:00")
+    tz = "Z" if PYDANTIC_V2 else "+00:00"
     assert await transform({"foo": dt}, DatetimeDict, use_async) == {"foo": "2023-02-23T14:16:36.337692+00:00"}  # type: ignore[comparison-overlap]
+    assert await transform(DatetimeModel(foo=dt), Any, use_async) == {"foo": "2023-02-23T14:16:36.337692" + tz}  # type: ignore[comparison-overlap]
 
     dt = dt.replace(tzinfo=None)
     assert await transform({"foo": dt}, DatetimeDict, use_async) == {"foo": "2023-02-23T14:16:36.337692"}  # type: ignore[comparison-overlap]
+    assert await transform(DatetimeModel(foo=dt), Any, use_async) == {"foo": "2023-02-23T14:16:36.337692"}  # type: ignore[comparison-overlap]
 
     assert await transform({"foo": None}, DateDict, use_async) == {"foo": None}  # type: ignore[comparison-overlap]
+    assert await transform(DateModel(foo=None), Any, use_async) == {"foo": None}  # type: ignore
     assert await transform({"foo": date.fromisoformat("2023-02-23")}, DateDict, use_async) == {"foo": "2023-02-23"}  # type: ignore[comparison-overlap]
+    assert await transform(DateModel(foo=date.fromisoformat("2023-02-23")), DateDict, use_async) == {
+        "foo": "2023-02-23"
+    }  # type: ignore[comparison-overlap]
 
 
 @parametrize
@@ -373,6 +388,15 @@ def my_iter() -> Iterable[Baz8]:
     }
 
 
+@parametrize
+@pytest.mark.asyncio
+async def test_dictionary_items(use_async: bool) -> None:
+    class DictItems(TypedDict):
+        foo_baz: Annotated[str, PropertyInfo(alias="fooBaz")]
+
+    assert await transform({"foo": {"foo_baz": "bar"}}, Dict[str, DictItems], use_async) == {"foo": {"fooBaz": "bar"}}
+
+
 class TypedDictIterableUnionStr(TypedDict):
     foo: Annotated[Union[str, Iterable[Baz8]], PropertyInfo(alias="FOO")]
 
diff --git a/tests/test_utils/test_typing.py b/tests/test_utils/test_typing.py
index 690960802a..535935b9e1 100644
--- a/tests/test_utils/test_typing.py
+++ b/tests/test_utils/test_typing.py
@@ -9,24 +9,19 @@
 _T3 = TypeVar("_T3")
 
 
-class BaseGeneric(Generic[_T]):
-    ...
+class BaseGeneric(Generic[_T]): ...
 
 
-class SubclassGeneric(BaseGeneric[_T]):
-    ...
+class SubclassGeneric(BaseGeneric[_T]): ...
 
 
-class BaseGenericMultipleTypeArgs(Generic[_T, _T2, _T3]):
-    ...
+class BaseGenericMultipleTypeArgs(Generic[_T, _T2, _T3]): ...
 
 
-class SubclassGenericMultipleTypeArgs(BaseGenericMultipleTypeArgs[_T, _T2, _T3]):
-    ...
+class SubclassGenericMultipleTypeArgs(BaseGenericMultipleTypeArgs[_T, _T2, _T3]): ...
 
 
-class SubclassDifferentOrderGenericMultipleTypeArgs(BaseGenericMultipleTypeArgs[_T2, _T, _T3]):
-    ...
+class SubclassDifferentOrderGenericMultipleTypeArgs(BaseGenericMultipleTypeArgs[_T2, _T, _T3]): ...
 
 
 def test_extract_type_var() -> None:
diff --git a/tests/utils.py b/tests/utils.py
index 060b99339f..bb2f861218 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -8,7 +8,7 @@
 from datetime import date, datetime
 from typing_extensions import Literal, get_args, get_origin, assert_type
 
-from openai._types import NoneType
+from openai._types import Omit, NoneType
 from openai._utils import (
     is_dict,
     is_list,
@@ -16,6 +16,7 @@
     is_union_type,
     extract_type_arg,
     is_annotated_type,
+    is_type_alias_type,
 )
 from openai._compat import PYDANTIC_V2, field_outer_type, get_model_fields
 from openai._models import BaseModel
@@ -51,6 +52,9 @@ def assert_matches_type(
     path: list[str],
     allow_none: bool = False,
 ) -> None:
+    if is_type_alias_type(type_):
+        type_ = type_.__value__
+
     # unwrap `Annotated[T, ...]` -> `T`
     if is_annotated_type(type_):
         type_ = extract_type_arg(type_, 0)
@@ -139,11 +143,15 @@ def _assert_list_type(type_: type[object], value: object) -> None:
 
 
 @contextlib.contextmanager
-def update_env(**new_env: str) -> Iterator[None]:
+def update_env(**new_env: str | Omit) -> Iterator[None]:
     old = os.environ.copy()
 
     try:
-        os.environ.update(new_env)
+        for name, value in new_env.items():
+            if isinstance(value, Omit):
+                os.environ.pop(name, None)
+            else:
+                os.environ[name] = value
 
         yield None
     finally: